本文整理了Java中org.apache.spark.api.java.JavaPairRDD.map()
方法的一些代码示例,展示了JavaPairRDD.map()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.map()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:map
暂无
代码示例来源:origin: OryxProject/oryx
private static void saveFeaturesRDD(JavaPairRDD<Integer,float[]> features,
Path path,
Broadcast<? extends Map<Integer,String>> bIndexToID) {
log.info("Saving features RDD to {}", path);
features.map(keyAndVector -> {
String id = bIndexToID.value().get(keyAndVector._1());
float[] vector = keyAndVector._2();
return TextUtils.joinJSON(Arrays.asList(id, vector));
}).saveAsTextFile(path.toString(), GzipCodec.class);
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void hadoopFile() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir,
SequenceFileInputFormat.class, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: OryxProject/oryx
/**
* Combines {@link Rating}s with the same user/item into one, with score as the sum of
* all of the scores.
*/
private JavaRDD<Rating> aggregateScores(JavaRDD<? extends Rating> original, double epsilon) {
JavaPairRDD<Tuple2<Integer,Integer>,Double> tuples =
original.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()));
JavaPairRDD<Tuple2<Integer,Integer>,Double> aggregated;
if (implicit) {
// TODO can we avoid groupByKey? reduce, combine, fold don't seem viable since
// they don't guarantee the delete elements are properly handled
aggregated = tuples.groupByKey().mapValues(MLFunctions.SUM_WITH_NAN);
} else {
// For non-implicit, last wins.
aggregated = tuples.foldByKey(Double.NaN, (current, next) -> next);
}
JavaPairRDD<Tuple2<Integer,Integer>,Double> noNaN =
aggregated.filter(kv -> !Double.isNaN(kv._2()));
if (logStrength) {
return noNaN.map(userProductScore -> new Rating(
userProductScore._1()._1(),
userProductScore._1()._2(),
Math.log1p(userProductScore._2() / epsilon)));
} else {
return noNaN.map(userProductScore -> new Rating(
userProductScore._1()._1(),
userProductScore._1()._2(),
userProductScore._2()));
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void hadoopFileCompressed() {
String outputDir = new File(tempDir, "output_compressed").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class,
SequenceFileOutputFormat.class, DefaultCodec.class);
JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir,
SequenceFileInputFormat.class, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void mapsFromPairsToPairs() {
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
// Regression test for SPARK-668:
JavaPairRDD<String, Integer> swapped =
pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
swapped.collect();
// There was never a bug here, but it's worth testing:
pairRDD.map(Tuple2::swap).collect();
}
代码示例来源:origin: OryxProject/oryx
if (model.isLogStrength()) {
double epsilon = model.getEpsilon();
inputRDD = noNaN.map(tuple -> new UserItemStrength(tuple._1()._1(), tuple._1()._2(),
(float) Math.log1p(tuple._2() / epsilon)));
} else {
inputRDD = noNaN.map(tuple -> new UserItemStrength(tuple._1()._1(), tuple._1()._2(),
tuple._2().floatValue()));
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void readWithNewAPIHadoopFile() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output = sc.newAPIHadoopFile(outputDir,
org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.class,
IntWritable.class, Text.class, Job.getInstance().getConfiguration());
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void mapsFromPairsToPairs() {
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
// Regression test for SPARK-668:
JavaPairRDD<String, Integer> swapped =
pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
swapped.collect();
// There was never a bug here, but it's worth testing:
pairRDD.map(Tuple2::swap).collect();
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void readWithNewAPIHadoopFile() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output = sc.newAPIHadoopFile(outputDir,
org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.class,
IntWritable.class, Text.class, Job.getInstance().getConfiguration());
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void mapsFromPairsToPairs() {
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
// Regression test for SPARK-668:
JavaPairRDD<String, Integer> swapped =
pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap()).iterator());
swapped.collect();
// There was never a bug here, but it's worth testing:
pairRDD.map(Tuple2::swap).collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void readWithNewAPIHadoopFile() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output = sc.newAPIHadoopFile(outputDir,
org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.class,
IntWritable.class, Text.class, Job.getInstance().getConfiguration());
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: deeplearning4j/dl4j-examples
JavaRDD<DataSet> data = sc.binaryFiles(testDir + "/*").map(new LoadDataFunction());
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void writeWithNewAPIHadoopFile() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsNewAPIHadoopFile(outputDir, IntWritable.class, Text.class,
org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output =
sc.sequenceFile(outputDir, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void hadoopFileCompressed() {
String outputDir = new File(tempDir, "output_compressed").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class,
SequenceFileOutputFormat.class, DefaultCodec.class);
JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir,
SequenceFileInputFormat.class, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void hadoopFile() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir,
SequenceFileInputFormat.class, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void hadoopFile() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir,
SequenceFileInputFormat.class, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void writeWithNewAPIHadoopFile() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsNewAPIHadoopFile(outputDir, IntWritable.class, Text.class,
org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output =
sc.sequenceFile(outputDir, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void writeWithNewAPIHadoopFile() {
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsNewAPIHadoopFile(outputDir, IntWritable.class, Text.class,
org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class);
JavaPairRDD<IntWritable, Text> output =
sc.sequenceFile(outputDir, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void hadoopFileCompressed() {
String outputDir = new File(tempDir, "output_compressed").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> rdd = sc.parallelizePairs(pairs);
rdd.mapToPair(pair -> new Tuple2<>(new IntWritable(pair._1()), new Text(pair._2())))
.saveAsHadoopFile(outputDir, IntWritable.class, Text.class,
SequenceFileOutputFormat.class, DefaultCodec.class);
JavaPairRDD<IntWritable, Text> output = sc.hadoopFile(outputDir,
SequenceFileInputFormat.class, IntWritable.class, Text.class);
assertEquals(pairs.toString(), output.map(Tuple2::toString).collect().toString());
}
代码示例来源:origin: SeldonIO/seldon-server
Map<String, Integer> userIdLookupMap_wrapped = new HashMap<String, Integer>(userIdLookupMap);
final Broadcast<Map<String, Integer>> broadcastVar = jsc.broadcast(userIdLookupMap_wrapped);
JavaRDD<String> json_only_with_zeros = filtered_by_client.map(new Function<Tuple2<String, ActionData>, String>() {
内容来源于网络,如有侵权,请联系作者删除!