org.apache.spark.api.java.JavaRDD.mapPartitionsToPair()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(5.3k)|赞(0)|评价(0)|浏览(230)

本文整理了Java中org.apache.spark.api.java.JavaRDD.mapPartitionsToPair()方法的一些代码示例,展示了JavaRDD.mapPartitionsToPair()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.mapPartitionsToPair()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:mapPartitionsToPair

JavaRDD.mapPartitionsToPair介绍

暂无

代码示例

代码示例来源:origin: apache/kylin

JavaPairRDD<SelfDefineSortableKey, Text> flatOutputRDD = recordRDD.mapPartitionsToPair(
    new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten));

代码示例来源:origin: databricks/learning-spark

System.out.println("Saved country contact counts as a file");
JavaPairRDD<String, CallLog[]> contactsContactLists = validCallSigns.mapPartitionsToPair(
 new PairFlatMapFunction<Iterator<String>, String, CallLog[]>() {
  public Iterable<Tuple2<String, CallLog[]>> call(Iterator<String> input) {

代码示例来源:origin: org.apache.crunch/crunch-spark

private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
  List<PCollectionImpl<?>> parents = getParents();
  JavaPairRDD[] rdds = new JavaPairRDD[parents.size()];
  for (int i = 0; i < rdds.length; i++) {
   if (parents.get(i) instanceof PTableBase) {
    rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
   } else {
    JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
    rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
   }
  }
  return runtime.getSparkContext().union(rdds);
 }
}

代码示例来源:origin: apache/crunch

private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
  List<PCollectionImpl<?>> parents = getParents();
  JavaPairRDD[] rdds = new JavaPairRDD[parents.size()];
  for (int i = 0; i < rdds.length; i++) {
   if (parents.get(i) instanceof PTableBase) {
    rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
   } else {
    JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
    rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
   }
  }
  return runtime.getSparkContext().union(rdds);
 }
}

代码示例来源:origin: datacleaner/DataCleaner

processedTuplesRdd.mapPartitionsToPair(new TuplesToTuplesFunction<>(), preservePartitions);
final JavaRDD<InputRow> coalescedInputRowsRDD = inputRowsRDD.coalesce(1);
namedAnalyzerResultsRDD =
    coalescedInputRowsRDD.mapPartitionsToPair(new RowProcessingFunction(_sparkJobContext));

代码示例来源:origin: org.apache.crunch/crunch-spark

private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
  if (combineFn instanceof CombineFn && getOnlyParent() instanceof PGroupedTableImpl) {
   runtime.setCombineFn((CombineFn) combineFn);
  }
  JavaRDDLike<?, ?> parentRDD = ((SparkCollection) getOnlyParent()).getJavaRDDLike(runtime);
  fn.configure(runtime.getConfiguration());
  return parentRDD
    .mapPartitionsWithIndex(
      new FlatMapIndexFn(fn, parentRDD instanceof JavaPairRDD, runtime.getRuntimeContext()),
      false)
    .mapPartitionsToPair(new CrunchPairTuple2());
 }
}

代码示例来源:origin: apache/crunch

private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
  if (combineFn instanceof CombineFn && getOnlyParent() instanceof PGroupedTableImpl) {
   runtime.setCombineFn((CombineFn) combineFn);
  }
  JavaRDDLike<?, ?> parentRDD = ((SparkCollection) getOnlyParent()).getJavaRDDLike(runtime);
  fn.configure(runtime.getConfiguration());
  return parentRDD
    .mapPartitionsWithIndex(
      new FlatMapIndexFn(fn, parentRDD instanceof JavaPairRDD, runtime.getRuntimeContext()),
      false)
    .mapPartitionsToPair(new CrunchPairTuple2());
 }
}

代码示例来源:origin: org.apache.kylin/kylin-engine-spark

JavaPairRDD<SelfDefineSortableKey, Text> flatOutputRDD = recordRDD.mapPartitionsToPair(
    new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten));

代码示例来源:origin: cdapio/cdap

@GET
@Path("/plugin")
public void plugin(HttpServiceRequest request, HttpServiceResponder responder,
          @QueryParam("pluginType") String pluginType,
          @QueryParam("pluginName") String pluginName,
          @QueryParam("file") String file) {
 try (SparkHttpServicePluginContext pluginContext = getContext().getPluginContext()) {
  Class<?> cls = pluginContext.usePluginClass(pluginType, pluginName, "pluginId",
                        PluginProperties.builder().build());
  if (cls == null) {
   responder.sendError(404, "Plugin of type " + pluginType + " and name " + pluginName + " not found.");
   return;
  }
  JavaSparkContext jsc = getContext().getJavaSparkContext();
  Map<String, Integer> counts = jsc.textFile(file)
   .mapPartitionsToPair(iterator -> {
    ToIntFunction<String> func = pluginContext.newPluginInstance("pluginId");
    List<Tuple2<String, Integer>> result = new ArrayList<>();
    while (iterator.hasNext()) {
     String element = iterator.next();
     result.add(new Tuple2<>(element, func.applyAsInt(element)));
    }
    return result;
   })
   .reduceByKey((v1, v2) -> v1 + v2)
   .collectAsMap();
  responder.sendJson(200, counts, new TypeToken<Map<String, Integer>>() { }.getType(), new Gson());
 }
}

代码示例来源:origin: cdapio/cdap

.mapPartitionsToPair(iterator -> {
 ToIntFunction<String> func = pluginContext.newPluginInstance("pluggable",
                                new UDTNameMacroEvaluator(udtName));

代码示例来源:origin: org.apache.beam/beam-runners-spark

WindowingHelpers.unwindowFunction()),
  true)
.mapPartitionsToPair(TranslationUtils.toPairFlatMapFunction(), true)
.mapValues(

相关文章

微信公众号

最新文章

更多