本文整理了Java中org.apache.spark.api.java.JavaRDD.mapToPair()
方法的一些代码示例,展示了JavaRDD.mapToPair()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.mapToPair()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:mapToPair
暂无
代码示例来源:origin: OryxProject/oryx
private static JavaPairRDD<String,float[]> readFeaturesRDD(JavaSparkContext sparkContext, Path path) {
log.info("Loading features RDD from {}", path);
JavaRDD<String> featureLines = sparkContext.textFile(path.toString());
return featureLines.mapToPair(line -> {
List<?> update = TextUtils.readJSON(line, List.class);
String key = update.get(0).toString();
float[] vector = TextUtils.convertViaJSON(update.get(1), float[].class);
return new Tuple2<>(key, vector);
});
}
代码示例来源:origin: databricks/learning-spark
public void run(String master, String csv1, String csv2) throws Exception {
JavaSparkContext sc = new JavaSparkContext(
master, "basicjoincsv", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> csvFile1 = sc.textFile(csv1);
JavaRDD<String> csvFile2 = sc.textFile(csv2);
JavaPairRDD<Integer, String[]> keyedRDD1 = csvFile1.mapToPair(new ParseLine());
JavaPairRDD<Integer, String[]> keyedRDD2 = csvFile1.mapToPair(new ParseLine());
JavaPairRDD<Integer, Tuple2<String[], String[]>> result = keyedRDD1.join(keyedRDD2);
List<Tuple2<Integer, Tuple2<String[], String[]>>> resultCollection = result.collect();
}
}
代码示例来源:origin: databricks/learning-spark
public static final JavaPairRDD<String, Long> ipAddressCount(
JavaRDD<ApacheAccessLog> accessLogRDD) {
return accessLogRDD
.mapToPair(new IpTuple())
.reduceByKey(new LongSumReducer());
}
代码示例来源:origin: databricks/learning-spark
public static final JavaPairRDD<Integer, Long> responseCodeCount(
JavaRDD<ApacheAccessLog> accessLogRDD) {
return accessLogRDD
.mapToPair(new ResponseCodeTuple())
.reduceByKey(new LongSumReducer());
}
代码示例来源:origin: databricks/learning-spark
public static final JavaPairRDD<String, Long> endpointCount(
JavaRDD<ApacheAccessLog> accessLogRDD) {
return accessLogRDD
.mapToPair(new EndPointTuple())
.reduceByKey(new LongSumReducer());
}
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
assertEquals(Arrays.asList(
new Tuple2<>(1, 1),
new Tuple2<>(0, 2),
new Tuple2<>(1, 3),
new Tuple2<>(0, 4)), rdd3.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
assertEquals(Arrays.asList(
new Tuple2<>(1, 1),
new Tuple2<>(0, 2),
new Tuple2<>(1, 3),
new Tuple2<>(0, 4)), rdd3.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
assertEquals(Arrays.asList(
new Tuple2<>(1, 1),
new Tuple2<>(0, 2),
new Tuple2<>(1, 3),
new Tuple2<>(0, 4)), rdd3.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD =
rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD =
rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD =
rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
内容来源于网络,如有侵权,请联系作者删除!