org.apache.spark.api.java.JavaRDD.collect()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(11.1k)|赞(0)|评价(0)|浏览(125)

本文整理了Java中org.apache.spark.api.java.JavaRDD.collect()方法的一些代码示例,展示了JavaRDD.collect()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.collect()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:collect

JavaRDD.collect介绍

暂无

代码示例

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
  String master;
  if (args.length > 0) {
   master = args[0];
  } else {
   master = "local";
  }
  JavaSparkContext sc = new JavaSparkContext(
   master, "basicmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
  JavaRDD<Integer> result = rdd.map(
   new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}});
  System.out.println(StringUtils.join(result.collect(), ","));
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void mapPartitions() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
   int sum = 0;
   while (iter.hasNext()) {
    sum += iter.next();
   }
   return Collections.singletonList(sum).iterator();
  });
 assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-core

@SuppressWarnings("unchecked")
@Test
public void objectFilesOfComplexTypes() {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
 rdd.saveAsObjectFile(outputDir);
 // Try reading the output back as an object file
 JavaRDD<Tuple2<Integer, String>> readRDD = sc.objectFile(outputDir);
 assertEquals(pairs, readRDD.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void binaryRecords() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark isn't always easy to use.\n".getBytes(StandardCharsets.UTF_8);
 int numOfCopies = 10;
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 for (int i = 0; i < numOfCopies; i++) {
  ByteBuffer bbuf = ByteBuffer.wrap(content1);
  channel1.write(bbuf);
 }
 channel1.close();
 JavaRDD<byte[]> readRDD = sc.binaryRecords(tempDirName, content1.length);
 assertEquals(numOfCopies,readRDD.count());
 List<byte[]> result = readRDD.collect();
 for (byte[] res : result) {
  assertArrayEquals(content1, res);
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void mapsFromPairsToPairs() {
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
 // Regression test for SPARK-668:
 JavaPairRDD<String, Integer> swapped =
  pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
 swapped.collect();
 // There was never a bug here, but it's worth testing:
 pairRDD.map(Tuple2::swap).collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void mapPartitionsWithIndex() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitionsWithIndex((index, iter) -> {
   int sum = 0;
   while (iter.hasNext()) {
    sum += iter.next();
   }
   return Collections.singletonList(sum).iterator();
  }, false);
 assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@SuppressWarnings("unchecked")
@Test
public void objectFilesOfComplexTypes() {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
 rdd.saveAsObjectFile(outputDir);
 // Try reading the output back as an object file
 JavaRDD<Tuple2<Integer, String>> readRDD = sc.objectFile(outputDir);
 assertEquals(pairs, readRDD.collect());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void binaryRecords() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark isn't always easy to use.\n".getBytes(StandardCharsets.UTF_8);
 int numOfCopies = 10;
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 for (int i = 0; i < numOfCopies; i++) {
  ByteBuffer bbuf = ByteBuffer.wrap(content1);
  channel1.write(bbuf);
 }
 channel1.close();
 JavaRDD<byte[]> readRDD = sc.binaryRecords(tempDirName, content1.length);
 assertEquals(numOfCopies,readRDD.count());
 List<byte[]> result = readRDD.collect();
 for (byte[] res : result) {
  assertArrayEquals(content1, res);
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void mapsFromPairsToPairs() {
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
 // Regression test for SPARK-668:
 JavaPairRDD<String, Integer> swapped =
  pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
 swapped.collect();
 // There was never a bug here, but it's worth testing:
 pairRDD.map(Tuple2::swap).collect();
}

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
    String master;
    if (args.length > 0) {
   master = args[0];
    } else {
      master = "local";
    }
    JavaSparkContext sc = new JavaSparkContext(
   master, "basicmapfilter", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
  JavaRDD<Integer> squared = rdd.map(
   new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}});
  JavaRDD<Integer> result = squared.filter(
   new Function<Integer, Boolean>() { public Boolean call(Integer x) { return x != 1; }});
  System.out.println(StringUtils.join(result.collect(), ","));
  }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void mapPartitions() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
  int sum = 0;
  while (iter.hasNext()) {
   sum += iter.next();
  }
  return Collections.singletonList(sum).iterator();
 });
 Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@SuppressWarnings("unchecked")
@Test
public void objectFilesOfComplexTypes() {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
 rdd.saveAsObjectFile(outputDir);
 // Try reading the output back as an object file
 JavaRDD<Tuple2<Integer, String>> readRDD = sc.objectFile(outputDir);
 assertEquals(pairs, readRDD.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void binaryRecords() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark isn't always easy to use.\n".getBytes(StandardCharsets.UTF_8);
 int numOfCopies = 10;
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 for (int i = 0; i < numOfCopies; i++) {
  ByteBuffer bbuf = ByteBuffer.wrap(content1);
  channel1.write(bbuf);
 }
 channel1.close();
 JavaRDD<byte[]> readRDD = sc.binaryRecords(tempDirName, content1.length);
 assertEquals(numOfCopies,readRDD.count());
 List<byte[]> result = readRDD.collect();
 for (byte[] res : result) {
  assertArrayEquals(content1, res);
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void mapsFromPairsToPairs() {
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
 // Regression test for SPARK-668:
 JavaPairRDD<String, Integer> swapped =
  pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
 swapped.collect();
 // There was never a bug here, but it's worth testing:
 pairRDD.map(Tuple2::swap).collect();
}

代码示例来源:origin: databricks/learning-spark

JavaRDD<String> rdd = sc.parallelize(
 Arrays.asList("KK6JKQ", "Ve3UoW", "kk6jlk", "W6BB"));
JavaRDD<String> result = rdd.mapPartitions(
System.out.println(StringUtils.join(result.collect(), ","));

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void mapPartitions() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
   int sum = 0;
   while (iter.hasNext()) {
    sum += iter.next();
   }
   return Collections.singletonList(sum).iterator();
  });
 assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-core

@SuppressWarnings("unchecked")
@Test
public void hadoopFile() {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
 rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
  .saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class);
 JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir,
  SequenceFileInputFormat.class, IntWritable.class, Text.class);
 assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}

相关文章

微信公众号

最新文章

更多