org.apache.spark.api.java.JavaPairRDD.flatMap()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(6.4k)|赞(0)|评价(0)|浏览(99)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.flatMap()方法的一些代码示例,展示了JavaPairRDD.flatMap()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.flatMap()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:flatMap

JavaPairRDD.flatMap介绍

暂无

代码示例

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
   throw new Exception("Usage BasicLoadCsv sparkMaster csvInputFile csvOutputFile key");
    }
  String master = args[0];
  String csvInput = args[1];
  String outputFile = args[2];
  final String key = args[3];

    JavaSparkContext sc = new JavaSparkContext(
   master, "loadwholecsv", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaPairRDD<String, String> csvData = sc.wholeTextFiles(csvInput);
  JavaRDD<String[]> keyedRDD = csvData.flatMap(new ParseLine());
  JavaRDD<String[]> result =
   keyedRDD.filter(new Function<String[], Boolean>() {
     public Boolean call(String[] input) { return input[0].equals(key); }});

  result.saveAsTextFile(outputFile);
  }
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

triadsGrouped.flatMap((Tuple2<Tuple2<Long,Long>, Iterable<Long>> s) -> {

代码示例来源:origin: mahmoudparsian/data-algorithms-book

triadsGrouped.flatMap(new FlatMapFunction<

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

@Override
public <T> SparkCollection<T> flatMap(FlatMapFunction<Tuple2<K, V>, T> function) {
 return new RDDCollection<>(sec, jsc, datasetContext, sinkFactory, pairRDD.flatMap(function));
}

代码示例来源:origin: seznam/euphoria

.flatMap(
  new FlatMapFunctionWithCollector<>(
    (t, collector) -> {

代码示例来源:origin: cloudera-labs/envelope

@SuppressWarnings( {"rawtypes", "unchecked"})
private Dataset<Row> readInputFormat(String path) throws Exception {
 LOG.debug("Reading InputFormat[{}]: {}", inputType, path);
 Class<? extends InputFormat> typeClazz = Class.forName(inputType).asSubclass(InputFormat.class);
 Class<?> keyClazz = Class.forName(keyType);
 Class<?> valueClazz = Class.forName(valueType);
 @SuppressWarnings("resource")
 JavaSparkContext context = new JavaSparkContext(Contexts.getSparkSession().sparkContext());
 JavaPairRDD<?, ?> rdd = context.newAPIHadoopFile(path, typeClazz, keyClazz, valueClazz, new Configuration());
 TranslateFunction translateFunction = new TranslateFunction(translatorConfig);
 return Contexts.getSparkSession().createDataFrame(rdd.flatMap(translateFunction), translateFunction.getSchema());
}

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

@Override
public JavaRDD<OUT> call(JavaPairRDD<JOIN_KEY, List<JoinElement<INPUT_RECORD>>> input,
             Time batchTime) throws Exception {
 if (function == null) {
  function = Compat.convert(
   new JoinMergeFunction<JOIN_KEY, INPUT_RECORD, OUT>(dynamicDriverContext.getPluginFunctionContext()));
 }
 return input.flatMap(function);
}

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

@Override
 public JavaRDD<RecordInfo<Object>> call(JavaPairRDD<GROUP_KEY, Iterable<GROUP_VAL>> input,
                        Time batchTime) throws Exception {
  if (function == null) {
   function = Compat.convert(
    new AggregatorAggregateFunction<GROUP_KEY, GROUP_VAL, OUT>(dynamicDriverContext.getPluginFunctionContext()));
  }
  return input.flatMap(function);
 }
}

代码示例来源:origin: cloudera-labs/envelope

@SuppressWarnings({ "unchecked", "rawtypes" })
public JavaRDD<Row> translate(JavaRDD raw) {
 TranslateFunction translateFunction = getTranslateFunction(config);
 JavaPairRDD<?, ?> prepared = raw.mapToPair(((StreamInput)getInput(true)).getPrepareFunction());
 JavaRDD<Row> translated = prepared.flatMap(translateFunction);
 
 return translated;
}

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

@Override
protected SparkCollection<RecordInfo<Object>> getSource(StageSpec stageSpec, StageStatisticsCollector collector) {
 PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
 return new RDDCollection<>(sec, jsc, datasetContext, sinkFactory,
               sourceFactory.createRDD(sec, jsc, stageSpec.getName(), Object.class, Object.class)
                .flatMap(Compat.convert(new BatchSourceFunction(pluginFunctionContext,
                                        numOfRecordsPreview))));
}

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

@Override
public SparkCollection<RecordInfo<Object>> aggregate(StageSpec stageSpec, @Nullable Integer partitions,
                           StageStatisticsCollector collector) {
 PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
 PairFlatMapFunc<T, Object, T> groupByFunction = new AggregatorGroupByFunction<>(pluginFunctionContext);
 PairFlatMapFunction<T, Object, T> sparkGroupByFunction = Compat.convert(groupByFunction);
 JavaPairRDD<Object, T> keyedCollection = rdd.flatMapToPair(sparkGroupByFunction);
 JavaPairRDD<Object, Iterable<T>> groupedCollection = partitions == null ?
  keyedCollection.groupByKey() : keyedCollection.groupByKey(partitions);
 FlatMapFunc<Tuple2<Object, Iterable<T>>, RecordInfo<Object>> aggregateFunction =
  new AggregatorAggregateFunction<>(pluginFunctionContext);
 FlatMapFunction<Tuple2<Object, Iterable<T>>, RecordInfo<Object>> sparkAggregateFunction =
  Compat.convert(aggregateFunction);
 return wrap(groupedCollection.flatMap(sparkAggregateFunction));
}

代码示例来源:origin: cloudera-labs/envelope

private JavaRDD<Row> planMutationsByKey(Dataset<Row> arriving, List<String> keyFieldNames,
                    Config plannerConfig, Config outputConfig) {
 JavaPairRDD<Row, Row> keyedArriving = 
   arriving.javaRDD().keyBy(new ExtractKeyFunction(keyFieldNames, accumulators));
 JavaPairRDD<Row, Iterable<Row>> arrivingByKey = 
   keyedArriving.groupByKey(getPartitioner(keyedArriving));
 JavaPairRDD<Row, Tuple2<Iterable<Row>, Iterable<Row>>> arrivingAndExistingByKey =
   arrivingByKey.mapPartitionsToPair(new JoinExistingForKeysFunction(outputConfig, keyFieldNames, accumulators));
 JavaRDD<Row> planned = 
   arrivingAndExistingByKey.flatMap(new PlanForKeyFunction(plannerConfig, accumulators));
 return planned;
}

代码示例来源:origin: sryza/simplesparkapp

JavaPairRDD<Character, Integer> charCounts = filtered.flatMap(
 new FlatMapFunction<Tuple2<String, Integer>, Character>() {
  @Override

代码示例来源:origin: org.datavec/datavec-spark

leftJV.cogroup(rightJV);
return cogroupedJV.flatMap(new ExecuteJoinFromCoGroupFlatMapFunction(join));

代码示例来源:origin: org.datavec/datavec-spark_2.11

leftJV.cogroup(rightJV);
return cogroupedJV.flatMap(new ExecuteJoinFromCoGroupFlatMapFunction(join));

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法