本文整理了Java中org.apache.spark.api.java.JavaRDD.flatMap()
方法的一些代码示例,展示了JavaRDD.flatMap()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.flatMap()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:flatMap
暂无
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new Exception("Usage BasicFlatMap sparkMaster inputFile");
}
JavaSparkContext sc = new JavaSparkContext(
args[0], "basicflatmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> rdd = sc.textFile(args[1]);
JavaRDD<String> words = rdd.flatMap(
new FlatMapFunction<String, String>() { public Iterable<String> call(String x) {
return Arrays.asList(x.split(" "));
}});
Map<String, Long> result = words.countByValue();
for (Entry<String, Long> entry: result.entrySet()) {
System.out.println(entry.getKey() + ":" + entry.getValue());
}
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void flatMap() {
JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
"The quick brown fox jumps over the lazy dog."));
JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
Assert.assertEquals("Hello", words.first());
Assert.assertEquals(11, words.count());
JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> {
List<Tuple2<String, String>> pairs2 = new LinkedList<>();
for (String word : s.split(" ")) {
pairs2.add(new Tuple2<>(word, word));
}
return pairs2.iterator();
});
Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first());
Assert.assertEquals(11, pairs.count());
JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
List<Double> lengths = new LinkedList<>();
for (String word : s.split(" ")) {
lengths.add((double) word.length());
}
return lengths.iterator();
});
Assert.assertEquals(5.0, doubles.first(), 0.01);
Assert.assertEquals(11, pairs.count());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void flatMap() {
JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
"The quick brown fox jumps over the lazy dog."));
JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
Assert.assertEquals("Hello", words.first());
Assert.assertEquals(11, words.count());
JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> {
List<Tuple2<String, String>> pairs2 = new LinkedList<>();
for (String word : s.split(" ")) {
pairs2.add(new Tuple2<>(word, word));
}
return pairs2.iterator();
});
Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first());
Assert.assertEquals(11, pairs.count());
JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
List<Double> lengths = new LinkedList<>();
for (String word : s.split(" ")) {
lengths.add((double) word.length());
}
return lengths.iterator();
});
Assert.assertEquals(5.0, doubles.first(), 0.01);
Assert.assertEquals(11, pairs.count());
}
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
String master = args[0];
JavaSparkContext sc = new JavaSparkContext(
master, "wordcount", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> rdd = sc.textFile(args[1]);
JavaPairRDD<String, Integer> counts = rdd.flatMap(
new FlatMapFunction<String, String>() {
public Iterable<String> call(String x) {
return Arrays.asList(x.split(" "));
}}).mapToPair(new PairFunction<String, String, Integer>(){
public Tuple2<String, Integer> call(String x){
return new Tuple2(x, 1);
}}).reduceByKey(new Function2<Integer, Integer, Integer>(){
public Integer call(Integer x, Integer y){ return x+y;}});
counts.saveAsTextFile(args[2]);
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void flatMap() {
JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
"The quick brown fox jumps over the lazy dog."));
JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
assertEquals("Hello", words.first());
assertEquals(11, words.count());
JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> {
List<Tuple2<String, String>> pairs = new LinkedList<>();
for (String word : s.split(" ")) {
pairs.add(new Tuple2<>(word, word));
}
return pairs.iterator();
}
);
assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first());
assertEquals(11, pairsRDD.count());
JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
List<Double> lengths = new LinkedList<>();
for (String word : s.split(" ")) {
lengths.add((double) word.length());
}
return lengths.iterator();
});
assertEquals(5.0, doubles.first(), 0.01);
assertEquals(11, pairsRDD.count());
}
代码示例来源:origin: databricks/learning-spark
JavaRDD<String> callSigns = rdd.flatMap(
new FlatMapFunction<String, String>() { public Iterable<String> call(String line) {
if (line.equals("")) {
String distScriptName = "finddistance.R";
sc.addFile(distScript);
JavaRDD<String> pipeInputs = contactsContactLists.values().map(new VerifyCallLogs()).flatMap(
new FlatMapFunction<CallLog[], String>() { public Iterable<String> call(CallLog[] calls) {
ArrayList<String> latLons = new ArrayList<String>();
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void flatMap() {
JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
"The quick brown fox jumps over the lazy dog."));
JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
assertEquals("Hello", words.first());
assertEquals(11, words.count());
JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> {
List<Tuple2<String, String>> pairs = new LinkedList<>();
for (String word : s.split(" ")) {
pairs.add(new Tuple2<>(word, word));
}
return pairs.iterator();
}
);
assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first());
assertEquals(11, pairsRDD.count());
JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
List<Double> lengths = new LinkedList<>();
for (String word : s.split(" ")) {
lengths.add((double) word.length());
}
return lengths.iterator();
});
assertEquals(5.0, doubles.first(), 0.01);
assertEquals(11, pairsRDD.count());
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<String> words = lines.flatMap((String s) -> {
return Util.convertStringToWords(s, N).iterator();
});
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void flatMap() {
JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
"The quick brown fox jumps over the lazy dog."));
JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
Assert.assertEquals("Hello", words.first());
Assert.assertEquals(11, words.count());
JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> {
List<Tuple2<String, String>> pairs2 = new LinkedList<>();
for (String word : s.split(" ")) {
pairs2.add(new Tuple2<>(word, word));
}
return pairs2.iterator();
});
Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first());
Assert.assertEquals(11, pairs.count());
JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
List<Double> lengths = new LinkedList<>();
for (String word : s.split(" ")) {
lengths.add((double) word.length());
}
return lengths.iterator();
});
Assert.assertEquals(5.0, doubles.first(), 0.01);
Assert.assertEquals(11, pairs.count());
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<String> words = lines.flatMap((String line) -> Util.convertLineToWords(line, N).iterator());
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void flatMap() {
JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
"The quick brown fox jumps over the lazy dog."));
JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
assertEquals("Hello", words.first());
assertEquals(11, words.count());
JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> {
List<Tuple2<String, String>> pairs = new LinkedList<>();
for (String word : s.split(" ")) {
pairs.add(new Tuple2<>(word, word));
}
return pairs.iterator();
}
);
assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first());
assertEquals(11, pairsRDD.count());
JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
List<Double> lengths = new LinkedList<>();
for (String word : s.split(" ")) {
lengths.add((double) word.length());
}
return lengths.iterator();
});
assertEquals(5.0, doubles.first(), 0.01);
assertEquals(11, pairsRDD.count());
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
代码示例来源:origin: mahmoudparsian/data-algorithms-book
lines.flatMap((String s) -> Util.convertToPairOfWordAndDocument(s, N).iterator()
);
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
代码示例来源:origin: mahmoudparsian/data-algorithms-book
lines.flatMap(new FlatMapFunction<
代码示例来源:origin: Impetus/Kundera
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
final Class clazz = m.getEntityClazz();
SparkContext sc = sparkClient.sparkContext;
Configuration config = new Configuration();
config.set(
"mongo.input.uri",
buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
m.getTableName()));
JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
Object.class, BSONObject.class).toJavaRDD();
JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
{
@Override
public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
{
BSONObject obj = arg._2();
Object javaObject = generateJavaObjectFromBSON(obj, clazz);
return Arrays.asList(javaObject);
}
});
sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
.flatMap(new FlatMapFunction<String, Row>() {
private static final long serialVersionUID = 5481855142090322683L;
代码示例来源:origin: org.datavec/datavec-spark
/**
*
* @param schema
* @param data
* @return
*/
public static DataQualityAnalysis analyzeQualitySequence(Schema schema, JavaRDD<List<List<Writable>>> data) {
JavaRDD<List<Writable>> fmSeq = data.flatMap(new SequenceFlatMapFunction());
return analyzeQuality(schema, fmSeq);
}
代码示例来源:origin: apache/tinkerpop
final JavaRDD<Traverser.Admin<Object>> nextRDD = inputRDD.values()
.flatMap(vertexWritable -> {
if (identityTraversal) // g.V.count()-style (identity)
return IteratorUtils.of(traversal.getTraverserGenerator().generate(vertexWritable.get(), (Step) graphStep, 1l));
代码示例来源:origin: com.davidbracewell/mango
@Override
public <R> SparkStream<R> flatMap(@NonNull SerializableFunction<? super T, Stream<? extends R>> mapper) {
return new SparkStream<>(rdd.flatMap(t -> {
Configurator.INSTANCE.configure(configBroadcast.value());
return Cast.as(mapper.apply(t).iterator());
}));
}
内容来源于网络,如有侵权,请联系作者删除!