本文整理了Java中org.apache.spark.api.java.JavaRDD.mapPartitionsToPair()
方法的一些代码示例,展示了JavaRDD.mapPartitionsToPair()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.mapPartitionsToPair()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:mapPartitionsToPair
暂无
代码示例来源:origin: apache/kylin
JavaPairRDD<SelfDefineSortableKey, Text> flatOutputRDD = recordRDD.mapPartitionsToPair(
new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten));
代码示例来源:origin: databricks/learning-spark
System.out.println("Saved country contact counts as a file");
JavaPairRDD<String, CallLog[]> contactsContactLists = validCallSigns.mapPartitionsToPair(
new PairFlatMapFunction<Iterator<String>, String, CallLog[]>() {
public Iterable<Tuple2<String, CallLog[]>> call(Iterator<String> input) {
代码示例来源:origin: org.apache.crunch/crunch-spark
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
List<PCollectionImpl<?>> parents = getParents();
JavaPairRDD[] rdds = new JavaPairRDD[parents.size()];
for (int i = 0; i < rdds.length; i++) {
if (parents.get(i) instanceof PTableBase) {
rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
} else {
JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
}
}
return runtime.getSparkContext().union(rdds);
}
}
代码示例来源:origin: apache/crunch
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
List<PCollectionImpl<?>> parents = getParents();
JavaPairRDD[] rdds = new JavaPairRDD[parents.size()];
for (int i = 0; i < rdds.length; i++) {
if (parents.get(i) instanceof PTableBase) {
rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
} else {
JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
}
}
return runtime.getSparkContext().union(rdds);
}
}
代码示例来源:origin: datacleaner/DataCleaner
processedTuplesRdd.mapPartitionsToPair(new TuplesToTuplesFunction<>(), preservePartitions);
final JavaRDD<InputRow> coalescedInputRowsRDD = inputRowsRDD.coalesce(1);
namedAnalyzerResultsRDD =
coalescedInputRowsRDD.mapPartitionsToPair(new RowProcessingFunction(_sparkJobContext));
代码示例来源:origin: org.apache.crunch/crunch-spark
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
if (combineFn instanceof CombineFn && getOnlyParent() instanceof PGroupedTableImpl) {
runtime.setCombineFn((CombineFn) combineFn);
}
JavaRDDLike<?, ?> parentRDD = ((SparkCollection) getOnlyParent()).getJavaRDDLike(runtime);
fn.configure(runtime.getConfiguration());
return parentRDD
.mapPartitionsWithIndex(
new FlatMapIndexFn(fn, parentRDD instanceof JavaPairRDD, runtime.getRuntimeContext()),
false)
.mapPartitionsToPair(new CrunchPairTuple2());
}
}
代码示例来源:origin: apache/crunch
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
if (combineFn instanceof CombineFn && getOnlyParent() instanceof PGroupedTableImpl) {
runtime.setCombineFn((CombineFn) combineFn);
}
JavaRDDLike<?, ?> parentRDD = ((SparkCollection) getOnlyParent()).getJavaRDDLike(runtime);
fn.configure(runtime.getConfiguration());
return parentRDD
.mapPartitionsWithIndex(
new FlatMapIndexFn(fn, parentRDD instanceof JavaPairRDD, runtime.getRuntimeContext()),
false)
.mapPartitionsToPair(new CrunchPairTuple2());
}
}
代码示例来源:origin: org.apache.kylin/kylin-engine-spark
JavaPairRDD<SelfDefineSortableKey, Text> flatOutputRDD = recordRDD.mapPartitionsToPair(
new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten));
代码示例来源:origin: cdapio/cdap
@GET
@Path("/plugin")
public void plugin(HttpServiceRequest request, HttpServiceResponder responder,
@QueryParam("pluginType") String pluginType,
@QueryParam("pluginName") String pluginName,
@QueryParam("file") String file) {
try (SparkHttpServicePluginContext pluginContext = getContext().getPluginContext()) {
Class<?> cls = pluginContext.usePluginClass(pluginType, pluginName, "pluginId",
PluginProperties.builder().build());
if (cls == null) {
responder.sendError(404, "Plugin of type " + pluginType + " and name " + pluginName + " not found.");
return;
}
JavaSparkContext jsc = getContext().getJavaSparkContext();
Map<String, Integer> counts = jsc.textFile(file)
.mapPartitionsToPair(iterator -> {
ToIntFunction<String> func = pluginContext.newPluginInstance("pluginId");
List<Tuple2<String, Integer>> result = new ArrayList<>();
while (iterator.hasNext()) {
String element = iterator.next();
result.add(new Tuple2<>(element, func.applyAsInt(element)));
}
return result;
})
.reduceByKey((v1, v2) -> v1 + v2)
.collectAsMap();
responder.sendJson(200, counts, new TypeToken<Map<String, Integer>>() { }.getType(), new Gson());
}
}
代码示例来源:origin: cdapio/cdap
.mapPartitionsToPair(iterator -> {
ToIntFunction<String> func = pluginContext.newPluginInstance("pluggable",
new UDTNameMacroEvaluator(udtName));
代码示例来源:origin: org.apache.beam/beam-runners-spark
WindowingHelpers.unwindowFunction()),
true)
.mapPartitionsToPair(TranslationUtils.toPairFlatMapFunction(), true)
.mapValues(
内容来源于网络,如有侵权,请联系作者删除!