本文整理了Java中org.apache.spark.api.java.JavaRDD.collect()
方法的一些代码示例,展示了JavaRDD.collect()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.collect()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:collect
暂无
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
String master;
if (args.length > 0) {
master = args[0];
} else {
master = "local";
}
JavaSparkContext sc = new JavaSparkContext(
master, "basicmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
JavaRDD<Integer> result = rdd.map(
new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}});
System.out.println(StringUtils.join(result.collect(), ","));
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void mapPartitions() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
});
assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void objectFilesOfComplexTypes() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.saveAsObjectFile(outputDir);
// Try reading the output back as an object file
JavaRDD<Tuple2<Integer, String>> readRDD = sc.objectFile(outputDir);
assertEquals(pairs, readRDD.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void binaryRecords() throws Exception {
// Reusing the wholeText files example
byte[] content1 = "spark isn't always easy to use.\n".getBytes(StandardCharsets.UTF_8);
int numOfCopies = 10;
String tempDirName = tempDir.getAbsolutePath();
File file1 = new File(tempDirName + "/part-00000");
FileOutputStream fos1 = new FileOutputStream(file1);
FileChannel channel1 = fos1.getChannel();
for (int i = 0; i < numOfCopies; i++) {
ByteBuffer bbuf = ByteBuffer.wrap(content1);
channel1.write(bbuf);
}
channel1.close();
JavaRDD<byte[]> readRDD = sc.binaryRecords(tempDirName, content1.length);
assertEquals(numOfCopies,readRDD.count());
List<byte[]> result = readRDD.collect();
for (byte[] res : result) {
assertArrayEquals(content1, res);
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void mapsFromPairsToPairs() {
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
// Regression test for SPARK-668:
JavaPairRDD<String, Integer> swapped =
pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
swapped.collect();
// There was never a bug here, but it's worth testing:
pairRDD.map(Tuple2::swap).collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void mapPartitionsWithIndex() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitionsWithIndex((index, iter) -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
}, false);
assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void objectFilesOfComplexTypes() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.saveAsObjectFile(outputDir);
// Try reading the output back as an object file
JavaRDD<Tuple2<Integer, String>> readRDD = sc.objectFile(outputDir);
assertEquals(pairs, readRDD.collect());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void binaryRecords() throws Exception {
// Reusing the wholeText files example
byte[] content1 = "spark isn't always easy to use.\n".getBytes(StandardCharsets.UTF_8);
int numOfCopies = 10;
String tempDirName = tempDir.getAbsolutePath();
File file1 = new File(tempDirName + "/part-00000");
FileOutputStream fos1 = new FileOutputStream(file1);
FileChannel channel1 = fos1.getChannel();
for (int i = 0; i < numOfCopies; i++) {
ByteBuffer bbuf = ByteBuffer.wrap(content1);
channel1.write(bbuf);
}
channel1.close();
JavaRDD<byte[]> readRDD = sc.binaryRecords(tempDirName, content1.length);
assertEquals(numOfCopies,readRDD.count());
List<byte[]> result = readRDD.collect();
for (byte[] res : result) {
assertArrayEquals(content1, res);
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void mapsFromPairsToPairs() {
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
// Regression test for SPARK-668:
JavaPairRDD<String, Integer> swapped =
pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
swapped.collect();
// There was never a bug here, but it's worth testing:
pairRDD.map(Tuple2::swap).collect();
}
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
String master;
if (args.length > 0) {
master = args[0];
} else {
master = "local";
}
JavaSparkContext sc = new JavaSparkContext(
master, "basicmapfilter", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
JavaRDD<Integer> squared = rdd.map(
new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}});
JavaRDD<Integer> result = squared.filter(
new Function<Integer, Boolean>() { public Boolean call(Integer x) { return x != 1; }});
System.out.println(StringUtils.join(result.collect(), ","));
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void mapPartitions() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
});
Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void objectFilesOfComplexTypes() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.saveAsObjectFile(outputDir);
// Try reading the output back as an object file
JavaRDD<Tuple2<Integer, String>> readRDD = sc.objectFile(outputDir);
assertEquals(pairs, readRDD.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void binaryRecords() throws Exception {
// Reusing the wholeText files example
byte[] content1 = "spark isn't always easy to use.\n".getBytes(StandardCharsets.UTF_8);
int numOfCopies = 10;
String tempDirName = tempDir.getAbsolutePath();
File file1 = new File(tempDirName + "/part-00000");
FileOutputStream fos1 = new FileOutputStream(file1);
FileChannel channel1 = fos1.getChannel();
for (int i = 0; i < numOfCopies; i++) {
ByteBuffer bbuf = ByteBuffer.wrap(content1);
channel1.write(bbuf);
}
channel1.close();
JavaRDD<byte[]> readRDD = sc.binaryRecords(tempDirName, content1.length);
assertEquals(numOfCopies,readRDD.count());
List<byte[]> result = readRDD.collect();
for (byte[] res : result) {
assertArrayEquals(content1, res);
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void mapsFromPairsToPairs() {
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
// Regression test for SPARK-668:
JavaPairRDD<String, Integer> swapped =
pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
swapped.collect();
// There was never a bug here, but it's worth testing:
pairRDD.map(Tuple2::swap).collect();
}
代码示例来源:origin: databricks/learning-spark
JavaRDD<String> rdd = sc.parallelize(
Arrays.asList("KK6JKQ", "Ve3UoW", "kk6jlk", "W6BB"));
JavaRDD<String> result = rdd.mapPartitions(
System.out.println(StringUtils.join(result.collect(), ","));
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void mapPartitions() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
});
assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void hadoopFile() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir,
SequenceFileInputFormat.class, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
内容来源于网络,如有侵权,请联系作者删除!