org.apache.spark.api.java.JavaRDD.flatMap()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(11.2k)|赞(0)|评价(0)|浏览(89)

本文整理了Java中org.apache.spark.api.java.JavaRDD.flatMap()方法的一些代码示例,展示了JavaRDD.flatMap()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.flatMap()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:flatMap

JavaRDD.flatMap介绍

暂无

代码示例

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
   throw new Exception("Usage BasicFlatMap sparkMaster inputFile");
    }

  JavaSparkContext sc = new JavaSparkContext(
   args[0], "basicflatmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> rdd = sc.textFile(args[1]);
  JavaRDD<String> words = rdd.flatMap(
   new FlatMapFunction<String, String>() { public Iterable<String> call(String x) {
     return Arrays.asList(x.split(" "));
    }});
  Map<String, Long> result = words.countByValue();
  for (Entry<String, Long> entry: result.entrySet()) {
   System.out.println(entry.getKey() + ":" + entry.getValue());
  }
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void flatMap() {
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
  "The quick brown fox jumps over the lazy dog."));
 JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
 Assert.assertEquals("Hello", words.first());
 Assert.assertEquals(11, words.count());
 JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> {
  List<Tuple2<String, String>> pairs2 = new LinkedList<>();
  for (String word : s.split(" ")) {
   pairs2.add(new Tuple2<>(word, word));
  }
  return pairs2.iterator();
 });
 Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first());
 Assert.assertEquals(11, pairs.count());
 JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
  List<Double> lengths = new LinkedList<>();
  for (String word : s.split(" ")) {
   lengths.add((double) word.length());
  }
  return lengths.iterator();
 });
 Assert.assertEquals(5.0, doubles.first(), 0.01);
 Assert.assertEquals(11, pairs.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void flatMap() {
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
  "The quick brown fox jumps over the lazy dog."));
 JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
 Assert.assertEquals("Hello", words.first());
 Assert.assertEquals(11, words.count());
 JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> {
  List<Tuple2<String, String>> pairs2 = new LinkedList<>();
  for (String word : s.split(" ")) {
   pairs2.add(new Tuple2<>(word, word));
  }
  return pairs2.iterator();
 });
 Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first());
 Assert.assertEquals(11, pairs.count());
 JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
  List<Double> lengths = new LinkedList<>();
  for (String word : s.split(" ")) {
   lengths.add((double) word.length());
  }
  return lengths.iterator();
 });
 Assert.assertEquals(5.0, doubles.first(), 0.01);
 Assert.assertEquals(11, pairs.count());
}

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
    String master = args[0];
    JavaSparkContext sc = new JavaSparkContext(
   master, "wordcount", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> rdd = sc.textFile(args[1]);
  JavaPairRDD<String, Integer> counts = rdd.flatMap(
   new FlatMapFunction<String, String>() {
    public Iterable<String> call(String x) {
     return Arrays.asList(x.split(" "));
    }}).mapToPair(new PairFunction<String, String, Integer>(){
      public Tuple2<String, Integer> call(String x){
       return new Tuple2(x, 1);
      }}).reduceByKey(new Function2<Integer, Integer, Integer>(){
        public Integer call(Integer x, Integer y){ return x+y;}});
  counts.saveAsTextFile(args[2]);
  }
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void flatMap() {
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
  "The quick brown fox jumps over the lazy dog."));
 JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
 assertEquals("Hello", words.first());
 assertEquals(11, words.count());
 JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> {
   List<Tuple2<String, String>> pairs = new LinkedList<>();
   for (String word : s.split(" ")) {
    pairs.add(new Tuple2<>(word, word));
   }
   return pairs.iterator();
  }
 );
 assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first());
 assertEquals(11, pairsRDD.count());
 JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
  List<Double> lengths = new LinkedList<>();
  for (String word : s.split(" ")) {
   lengths.add((double) word.length());
  }
  return lengths.iterator();
 });
 assertEquals(5.0, doubles.first(), 0.01);
 assertEquals(11, pairsRDD.count());
}

代码示例来源:origin: databricks/learning-spark

JavaRDD<String> callSigns = rdd.flatMap(
 new FlatMapFunction<String, String>() { public Iterable<String> call(String line) {
   if (line.equals("")) {
String distScriptName = "finddistance.R";
sc.addFile(distScript);
JavaRDD<String> pipeInputs = contactsContactLists.values().map(new VerifyCallLogs()).flatMap(
 new FlatMapFunction<CallLog[], String>() { public Iterable<String> call(CallLog[] calls) {
   ArrayList<String> latLons = new ArrayList<String>();

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void flatMap() {
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
  "The quick brown fox jumps over the lazy dog."));
 JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
 assertEquals("Hello", words.first());
 assertEquals(11, words.count());
 JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> {
   List<Tuple2<String, String>> pairs = new LinkedList<>();
   for (String word : s.split(" ")) {
    pairs.add(new Tuple2<>(word, word));
   }
   return pairs.iterator();
  }
 );
 assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first());
 assertEquals(11, pairsRDD.count());
 JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
  List<Double> lengths = new LinkedList<>();
  for (String word : s.split(" ")) {
   lengths.add((double) word.length());
  }
  return lengths.iterator();
 });
 assertEquals(5.0, doubles.first(), 0.01);
 assertEquals(11, pairsRDD.count());
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaRDD<String> words = lines.flatMap((String s) -> {
  return Util.convertStringToWords(s, N).iterator();
});

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void flatMap() {
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
  "The quick brown fox jumps over the lazy dog."));
 JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
 Assert.assertEquals("Hello", words.first());
 Assert.assertEquals(11, words.count());
 JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> {
  List<Tuple2<String, String>> pairs2 = new LinkedList<>();
  for (String word : s.split(" ")) {
   pairs2.add(new Tuple2<>(word, word));
  }
  return pairs2.iterator();
 });
 Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first());
 Assert.assertEquals(11, pairs.count());
 JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
  List<Double> lengths = new LinkedList<>();
  for (String word : s.split(" ")) {
   lengths.add((double) word.length());
  }
  return lengths.iterator();
 });
 Assert.assertEquals(5.0, doubles.first(), 0.01);
 Assert.assertEquals(11, pairs.count());
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaRDD<String> words = lines.flatMap((String line) -> Util.convertLineToWords(line, N).iterator());

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void flatMap() {
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!",
  "The quick brown fox jumps over the lazy dog."));
 JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator());
 assertEquals("Hello", words.first());
 assertEquals(11, words.count());
 JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> {
   List<Tuple2<String, String>> pairs = new LinkedList<>();
   for (String word : s.split(" ")) {
    pairs.add(new Tuple2<>(word, word));
   }
   return pairs.iterator();
  }
 );
 assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first());
 assertEquals(11, pairsRDD.count());
 JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> {
  List<Double> lengths = new LinkedList<>();
  for (String word : s.split(" ")) {
   lengths.add((double) word.length());
  }
  return lengths.iterator();
 });
 assertEquals(5.0, doubles.first(), 0.01);
 assertEquals(11, pairsRDD.count());
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {

代码示例来源:origin: mahmoudparsian/data-algorithms-book

lines.flatMap((String s) -> Util.convertToPairOfWordAndDocument(s, N).iterator()
);

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {

代码示例来源:origin: mahmoudparsian/data-algorithms-book

lines.flatMap(new FlatMapFunction<

代码示例来源:origin: Impetus/Kundera

public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  final Class clazz = m.getEntityClazz();
  SparkContext sc = sparkClient.sparkContext;
  Configuration config = new Configuration();
  config.set(
      "mongo.input.uri",
      buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
          m.getTableName()));
  JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
      Object.class, BSONObject.class).toJavaRDD();
  JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
  {
    @Override
    public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
    {
      BSONObject obj = arg._2();
      Object javaObject = generateJavaObjectFromBSON(obj, clazz);
      return Arrays.asList(javaObject);
    }
  });
  sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

.flatMap(new FlatMapFunction<String, Row>() {
  private static final long serialVersionUID = 5481855142090322683L;

代码示例来源:origin: org.datavec/datavec-spark

/**
 *
 * @param schema
 * @param data
 * @return
 */
public static DataQualityAnalysis analyzeQualitySequence(Schema schema, JavaRDD<List<List<Writable>>> data) {
  JavaRDD<List<Writable>> fmSeq = data.flatMap(new SequenceFlatMapFunction());
  return analyzeQuality(schema, fmSeq);
}

代码示例来源:origin: apache/tinkerpop

final JavaRDD<Traverser.Admin<Object>> nextRDD = inputRDD.values()
    .flatMap(vertexWritable -> {
      if (identityTraversal)                          // g.V.count()-style (identity)
        return IteratorUtils.of(traversal.getTraverserGenerator().generate(vertexWritable.get(), (Step) graphStep, 1l));

代码示例来源:origin: com.davidbracewell/mango

@Override
public <R> SparkStream<R> flatMap(@NonNull SerializableFunction<? super T, Stream<? extends R>> mapper) {
 return new SparkStream<>(rdd.flatMap(t -> {
   Configurator.INSTANCE.configure(configBroadcast.value());
   return Cast.as(mapper.apply(t).iterator());
 }));
}

相关文章

微信公众号

最新文章

更多