本文整理了Java中org.apache.spark.api.java.JavaPairRDD.mapPartitions()
方法的一些代码示例,展示了JavaPairRDD.mapPartitions()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.mapPartitions()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:mapPartitions
暂无
代码示例来源:origin: mahmoudparsian/data-algorithms-book
pairs.mapPartitions((Iterator<Tuple2<String,Integer>> iter) -> {
SortedMap<Integer, String> top10 = new TreeMap<Integer, String>();
while (iter.hasNext()) {
代码示例来源:origin: org.apache.beam/beam-runners-spark
/**
* An implementation of {@link
* org.apache.beam.runners.core.GroupByKeyViaGroupByKeyOnly.GroupByKeyOnly} for the Spark runner.
*/
public static <K, V> JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupByKeyOnly(
JavaRDD<WindowedValue<KV<K, V>>> rdd,
Coder<K> keyCoder,
WindowedValueCoder<V> wvCoder,
@Nullable Partitioner partitioner) {
// we use coders to convert objects in the PCollection to byte arrays, so they
// can be transferred over the network for the shuffle.
JavaPairRDD<ByteArray, byte[]> pairRDD =
rdd.map(new ReifyTimestampsAndWindowsFunction<>())
.map(WindowingHelpers.unwindowFunction())
.mapToPair(TranslationUtils.toPairFunction())
.mapToPair(CoderHelpers.toByteFunction(keyCoder, wvCoder));
// If no partitioner is passed, the default group by key operation is called
JavaPairRDD<ByteArray, Iterable<byte[]>> groupedRDD =
(partitioner != null) ? pairRDD.groupByKey(partitioner) : pairRDD.groupByKey();
// using mapPartitions allows to preserve the partitioner
// and avoid unnecessary shuffle downstream.
return groupedRDD
.mapPartitionsToPair(
TranslationUtils.pairFunctionToPairFlatMapFunction(
CoderHelpers.fromByteFunctionIterable(keyCoder, wvCoder)),
true)
.mapPartitions(TranslationUtils.fromPairFlatMapFunction(), true)
.mapPartitions(
TranslationUtils.functionToFlatMapFunction(WindowingHelpers.windowFunction()), true);
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<SortedMap<Integer, String>> partitions = pairs.mapPartitions(
new FlatMapFunction<Iterator<Tuple2<String,Integer>>, SortedMap<Integer, String>>() {
@Override
代码示例来源:origin: mahmoudparsian/data-algorithms-book
uniqueKeys.mapPartitions((Iterator<Tuple2<String,Integer>> iter) -> {
final int N1 = topN.value();
SortedMap<Integer, String> localTopN = new TreeMap<Integer, String>();
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<Map<Character, Long>> partitions = fastqRDD.mapPartitions(
new FlatMapFunction<Iterator<Tuple2<LongWritable ,Text>>, Map<Character,Long>>() {
@Override
代码示例来源:origin: mahmoudparsian/data-algorithms-book
kmersGrouped.mapPartitions((Iterator<Tuple2<String,Integer>> iter) -> {
int N1 = broadcastN.value();
SortedMap<Integer, String> topN = new TreeMap<Integer, String>();
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<SortedMap<Integer, String>> partitions = uniqueKeys.mapPartitions(
new FlatMapFunction<Iterator<Tuple2<String,Integer>>, SortedMap<Integer, String>>() {
@Override
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<SortedMap<Integer, String>> partitions = kmersGrouped.mapPartitions(
new FlatMapFunction<Iterator<Tuple2<String,Integer>>, SortedMap<Integer, String>>() {
@Override
代码示例来源:origin: scipr-lab/dizk
static <GroupT extends AbstractGroup<GroupT>> GroupT distributedSortedMSM(
final JavaPairRDD<BigInteger, GroupT> input) {
return input.mapPartitions(partition -> {
final List<Tuple2<BigInteger, GroupT>> pairs = new ArrayList<>();
while (partition.hasNext()) {
pairs.add(partition.next());
}
return Collections.singletonList(sortedMSM(pairs)).iterator();
}).reduce(GroupT::add);
}
代码示例来源:origin: scipr-lab/dizk
static <GroupT extends AbstractGroup<GroupT>> GroupT distributedBosCosterMSM(
final JavaPairRDD<BigInteger, GroupT> input) {
return input.mapPartitions(partition -> {
final List<Tuple2<BigInteger, GroupT>> pairs = new ArrayList<>();
while (partition.hasNext()) {
pairs.add(partition.next());
}
return Collections.singletonList(bosCosterMSM(pairs)).iterator();
}).reduce(GroupT::add);
}
代码示例来源:origin: Stratio/Decision
@Override
public Void call(JavaPairRDD<StreamAction, Iterable<StratioStreamingMessage>> rdd) throws Exception {
if (!rdd.isEmpty()) {
rdd.mapPartitions(
new FlatMapFunction<Iterator<Tuple2<StreamAction, Iterable<StratioStreamingMessage>>>, Object>() {
@Override public Iterable<Object> call(
Iterator<Tuple2<StreamAction, Iterable<StratioStreamingMessage>>> tuple2Iterator)
throws Exception {
while (tuple2Iterator.hasNext()) {
process(tuple2Iterator.next()._2());
}
return new ArrayList<Object>();
}
}).count();
}
return null;
}
代码示例来源:origin: ypriverol/spark-java8
JavaRDD<SortedMap<Integer, String>> partitions = pairs.mapPartitions((Iterator<Tuple2<String,Integer>> iter) -> {
SortedMap<Integer, String> top10 = new TreeMap<>();
while (iter.hasNext()) {
代码示例来源:origin: DataSystemsLab/GeoSpark
.mapPartitions(new FlatMapFunction<Iterator<Tuple2<Integer, T>>, T>()
代码示例来源:origin: org.apache.crunch/crunch-spark
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
List<PCollectionImpl<?>> parents = getParents();
JavaRDD[] rdds = new JavaRDD[parents.size()];
for (int i = 0; i < rdds.length; i++) {
if (parents.get(i) instanceof PTableBase) {
JavaPairRDD prdd = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
rdds[i] = prdd.mapPartitions(new FlatMapPairDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
} else {
rdds[i] = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
}
}
return runtime.getSparkContext().union(rdds);
}
}
代码示例来源:origin: org.datasyslab/geospark
.mapPartitions(new FlatMapFunction<Iterator<Tuple2<Integer, T>>, T>()
代码示例来源:origin: apache/crunch
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
List<PCollectionImpl<?>> parents = getParents();
JavaRDD[] rdds = new JavaRDD[parents.size()];
for (int i = 0; i < rdds.length; i++) {
if (parents.get(i) instanceof PTableBase) {
JavaPairRDD prdd = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
rdds[i] = prdd.mapPartitions(new FlatMapPairDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
} else {
rdds[i] = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
}
}
return runtime.getSparkContext().union(rdds);
}
}
代码示例来源:origin: org.apache.pig/pig
join(streamIndexedJavaPairRDD, partitioner);
return resultKeyValue.mapPartitions(toValueFun);
} else if (innerFlags[0] && !innerFlags[1]) {
leftOuterJoin(streamIndexedJavaPairRDD, partitioner);
return resultKeyValue.mapPartitions(toValueFun);
} else if (!innerFlags[0] && innerFlags[1]) {
rightOuterJoin(streamIndexedJavaPairRDD, partitioner);
return resultKeyValue.mapPartitions(toValueFun);
} else {
fullOuterJoin(streamIndexedJavaPairRDD, partitioner);
return resultKeyValue.mapPartitions(toValueFun);
代码示例来源:origin: org.apache.pig/pig
public static RDD<Tuple> handleSecondarySort(
RDD<Tuple2<IndexedKey, Tuple>> rdd, POPackage pkgOp) {
JavaPairRDD<IndexedKey, Tuple> pairRDD = JavaPairRDD.fromRDD(rdd, SparkUtil.getManifest(IndexedKey.class),
SparkUtil.getManifest(Tuple.class));
int partitionNums = pairRDD.partitions().size();
//repartition to group tuples with same indexedkey to same partition
JavaPairRDD<IndexedKey, Tuple> sorted = pairRDD.repartitionAndSortWithinPartitions(
new IndexedKeyPartitioner(partitionNums));
//Package tuples with same indexedkey as the result: (key,(val1,val2,val3,...))
return sorted.mapPartitions(new AccumulateByKey(pkgOp), true).rdd();
}
代码示例来源:origin: org.apache.pig/pig
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors, POSort sortOperator)
throws IOException {
SparkUtil.assertPredecessorSize(predecessors, sortOperator, 1);
RDD<Tuple> rdd = predecessors.get(0);
int parallelism = SparkPigContext.get().getParallelism(predecessors, sortOperator);
RDD<Tuple2<Tuple, Object>> rddPair = rdd.map(new ToKeyValueFunction(),
SparkUtil.<Tuple, Object> getTuple2Manifest());
JavaPairRDD<Tuple, Object> r = new JavaPairRDD<Tuple, Object>(rddPair,
SparkUtil.getManifest(Tuple.class),
SparkUtil.getManifest(Object.class));
JavaPairRDD<Tuple, Object> sorted = r.sortByKey(
sortOperator.getMComparator(), true, parallelism);
JavaRDD<Tuple> mapped = sorted.mapPartitions(TO_VALUE_FUNCTION);
return mapped.rdd();
}
代码示例来源:origin: seznam/euphoria
.mapPartitions(
new StateReducer(
windowing,
内容来源于网络,如有侵权,请联系作者删除!