本文整理了Java中org.apache.spark.api.java.JavaPairRDD.collect()
方法的一些代码示例,展示了JavaPairRDD.collect()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.collect()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:collect
暂无
代码示例来源:origin: OryxProject/oryx
private static Collection<Tuple2<String,String>> collect(JavaPairRDD<String,String> rdd) {
if (rdd == null) {
return Collections.emptyList();
} else {
return rdd.collect();
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD =
rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
}
代码示例来源:origin: databricks/learning-spark
public void run(String master, String csv1, String csv2) throws Exception {
JavaSparkContext sc = new JavaSparkContext(
master, "basicjoincsv", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> csvFile1 = sc.textFile(csv1);
JavaRDD<String> csvFile2 = sc.textFile(csv2);
JavaPairRDD<Integer, String[]> keyedRDD1 = csvFile1.mapToPair(new ParseLine());
JavaPairRDD<Integer, String[]> keyedRDD2 = csvFile1.mapToPair(new ParseLine());
JavaPairRDD<Integer, Tuple2<String[], String[]>> result = keyedRDD1.join(keyedRDD2);
List<Tuple2<Integer, Tuple2<String[], String[]>>> resultCollection = result.collect();
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD =
rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
assertEquals(new Tuple2<>("1", 1), s.get(0));
assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD =
rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
assertEquals(new Tuple2<>("1", 1), s.get(0));
assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void collectAsMapWithIntArrayValues() {
// Regression test for SPARK-1040
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
pairRDD.collect(); // Works fine
pairRDD.collectAsMap(); // Used to crash with ClassCastException
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
assertEquals(Arrays.asList(
new Tuple2<>(1, 1),
new Tuple2<>(0, 2),
new Tuple2<>(1, 3),
new Tuple2<>(0, 4)), rdd3.collect());
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
assertEquals(Arrays.asList(
new Tuple2<>(1, 1),
new Tuple2<>(0, 2),
new Tuple2<>(1, 3),
new Tuple2<>(0, 4)), rdd3.collect());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
内容来源于网络,如有侵权,请联系作者删除!