org.apache.spark.api.java.JavaPairRDD.collect()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(8.1k)|赞(0)|评价(0)|浏览(88)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.collect()方法的一些代码示例,展示了JavaPairRDD.collect()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.collect()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:collect

JavaPairRDD.collect介绍

暂无

代码示例

代码示例来源:origin: OryxProject/oryx

private static Collection<Tuple2<String,String>> collect(JavaPairRDD<String,String> rdd) {
 if (rdd == null) {
  return Collections.emptyList();
 } else {
  return rdd.collect();
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
 public void collectAsMapWithIntArrayValues() {
  // Regression test for SPARK-1040
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
  JavaPairRDD<Integer, int[]> pairRDD =
   rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
  pairRDD.collect();  // Works fine
  pairRDD.collectAsMap();  // Used to crash with ClassCastException
 }
}

代码示例来源:origin: databricks/learning-spark

public void run(String master, String csv1, String csv2) throws Exception {
    JavaSparkContext sc = new JavaSparkContext(
   master, "basicjoincsv", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> csvFile1 = sc.textFile(csv1);
  JavaRDD<String> csvFile2 = sc.textFile(csv2);
  JavaPairRDD<Integer, String[]> keyedRDD1 = csvFile1.mapToPair(new ParseLine());
  JavaPairRDD<Integer, String[]> keyedRDD2 = csvFile1.mapToPair(new ParseLine());
  JavaPairRDD<Integer, Tuple2<String[], String[]>> result = keyedRDD1.join(keyedRDD2);
  List<Tuple2<Integer, Tuple2<String[], String[]>>> resultCollection = result.collect();
  }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
 public void collectAsMapWithIntArrayValues() {
  // Regression test for SPARK-1040
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
  JavaPairRDD<Integer, int[]> pairRDD =
   rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
  pairRDD.collect();  // Works fine
  pairRDD.collectAsMap();  // Used to crash with ClassCastException
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void keyBy() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
 List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
 assertEquals(new Tuple2<>("1", 1), s.get(0));
 assertEquals(new Tuple2<>("2", 2), s.get(1));
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void collectAsMapWithIntArrayValues() {
 // Regression test for SPARK-1040
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
 JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
 pairRDD.collect();  // Works fine
 pairRDD.collectAsMap();  // Used to crash with ClassCastException
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void keyBy() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
 List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
 Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
 Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
 public void collectAsMapWithIntArrayValues() {
  // Regression test for SPARK-1040
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
  JavaPairRDD<Integer, int[]> pairRDD =
   rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
  pairRDD.collect();  // Works fine
  pairRDD.collectAsMap();  // Used to crash with ClassCastException
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void keyBy() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
 List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
 assertEquals(new Tuple2<>("1", 1), s.get(0));
 assertEquals(new Tuple2<>("2", 2), s.get(1));
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void collectAsMapWithIntArrayValues() {
 // Regression test for SPARK-1040
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
 JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
 pairRDD.collect();  // Works fine
 pairRDD.collectAsMap();  // Used to crash with ClassCastException
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void keyBy() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
 List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
 Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
 Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void keyBy() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
 List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
 Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
 Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
 JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
 JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
 JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
 assertEquals(Arrays.asList(
  new Tuple2<>(1, 1),
  new Tuple2<>(0, 2),
  new Tuple2<>(1, 3),
  new Tuple2<>(0, 4)), rdd3.collect());
}

代码示例来源:origin: org.apache.spark/spark-core

@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
 JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
 JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
 JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
 assertEquals(Arrays.asList(
  new Tuple2<>(1, 1),
  new Tuple2<>(0, 2),
  new Tuple2<>(1, 3),
  new Tuple2<>(0, 4)), rdd3.collect());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法