本文整理了Java中org.apache.spark.api.java.JavaPairRDD.repartition()
方法的一些代码示例,展示了JavaPairRDD.repartition()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.repartition()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:repartition
暂无
代码示例来源:origin: apache/tinkerpop
loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
else if (loadedGraphRDD.partitions().size() < this.workers) // ensures that the loaded graphRDD does not have less partitions than workers
loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
代码示例来源:origin: com.davidbracewell/mango
@Override
public MPairStream<T, U> repartition(int partitions) {
return new SparkPairStream<>(rdd.repartition(partitions));
}
代码示例来源:origin: org.rcsb/mmtf-spark
/**
* Get a half cartesian - using the first part of the input RDD as a string key.
* @param <K> the type of the value
* @param <V> the type of the value
* @param inputRDD the input rdd - keys are
* @param numPartitions the number of partitions to repartition
* @return the RDD of all non-repated comparisons
*/
public static <K extends Comparable<K>,V> JavaPairRDD<Tuple2<K, V>, Tuple2<K, V>> getHalfCartesian(JavaPairRDD<K, V> inputRDD, int numPartitions) {
JavaPairRDD<Tuple2<K, V>, Tuple2<K, V>> filteredRDD = inputRDD
.cartesian(inputRDD)
.filter(t -> t._1._1.compareTo(t._2._1)>0);
// If there is partitions
if (numPartitions!=0) {
return filteredRDD.repartition(numPartitions);
}
// If not then just return it un re-partitoned
return filteredRDD;
}
代码示例来源:origin: uber/hudi
private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean,
JavaSparkContext jsc) {
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
logger.info("Using cleanerParallelism: " + cleanerParallelism);
List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
.parallelize(partitionsToClean, cleanerParallelism)
.flatMapToPair(getFilesToDeleteFunc(this, config))
.repartition(cleanerParallelism) // repartition to remove skews
.mapPartitionsToPair(deleteFilesFunc(this)).reduceByKey(
// merge partition level clean stats below
(Function2<PartitionCleanStat, PartitionCleanStat, PartitionCleanStat>) (e1, e2) -> e1
.merge(e2)).collect();
Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats.stream()
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
HoodieCleanHelper cleaner = new HoodieCleanHelper(this, config);
// Return PartitionCleanStat for each partition passed.
return partitionsToClean.stream().map(partitionPath -> {
PartitionCleanStat partitionCleanStat =
(partitionCleanStatsMap.containsKey(partitionPath)) ? partitionCleanStatsMap
.get(partitionPath) : new PartitionCleanStat(partitionPath);
return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy())
.withPartitionPath(partitionPath)
.withEarliestCommitRetained(cleaner.getEarliestCommitToRetain())
.withDeletePathPattern(partitionCleanStat.deletePathPatterns)
.withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
.withFailedDeletes(partitionCleanStat.failedDeleteFiles).build();
}).collect(Collectors.toList());
}
代码示例来源:origin: com.uber.hoodie/hoodie-client
private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean,
JavaSparkContext jsc) {
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
logger.info("Using cleanerParallelism: " + cleanerParallelism);
List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
.parallelize(partitionsToClean, cleanerParallelism)
.flatMapToPair(getFilesToDeleteFunc(this, config))
.repartition(cleanerParallelism) // repartition to remove skews
.mapPartitionsToPair(deleteFilesFunc(this)).reduceByKey(
// merge partition level clean stats below
(Function2<PartitionCleanStat, PartitionCleanStat, PartitionCleanStat>) (e1, e2) -> e1
.merge(e2)).collect();
Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats.stream()
.collect(Collectors.toMap(e -> e._1(), e -> e._2()));
HoodieCleanHelper cleaner = new HoodieCleanHelper(this, config);
// Return PartitionCleanStat for each partition passed.
return partitionsToClean.stream().map(partitionPath -> {
PartitionCleanStat partitionCleanStat =
(partitionCleanStatsMap.containsKey(partitionPath)) ? partitionCleanStatsMap
.get(partitionPath) : new PartitionCleanStat(partitionPath);
return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy())
.withPartitionPath(partitionPath)
.withEarliestCommitRetained(cleaner.getEarliestCommitToRetain())
.withDeletePathPattern(partitionCleanStat.deletePathPatterns)
.withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
.withFailedDeletes(partitionCleanStat.failedDeleteFiles).build();
}).collect(Collectors.toList());
}
代码示例来源:origin: org.apache.tinkerpop/spark-gremlin
loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
else if (loadedGraphRDD.partitions().size() < this.workers) // ensures that the loaded graphRDD does not have less partitions than workers
loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
代码示例来源:origin: ai.grakn/grakn-kb
loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
代码示例来源:origin: cloudera-labs/envelope
private JavaPairRDD<Row, Row> getDummyRDD(int numPartitions) {
return Contexts.getSparkSession().range(numPartitions).javaRDD()
.map(new LongToRowFunction()).keyBy(new ItselfFunction<Row>()).repartition(numPartitions);
}
代码示例来源:origin: org.apache.beam/beam-runners-spark
/** An implementation of {@link Reshuffle} for the Spark runner. */
public static <K, V> JavaRDD<WindowedValue<KV<K, V>>> reshuffle(
JavaRDD<WindowedValue<KV<K, V>>> rdd, Coder<K> keyCoder, WindowedValueCoder<V> wvCoder) {
// Use coders to convert objects in the PCollection to byte arrays, so they
// can be transferred over the network for the shuffle.
return rdd.map(new ReifyTimestampsAndWindowsFunction<>())
.map(WindowingHelpers.unwindowFunction())
.mapToPair(TranslationUtils.toPairFunction())
.mapToPair(CoderHelpers.toByteFunction(keyCoder, wvCoder))
.repartition(rdd.getNumPartitions())
.mapToPair(CoderHelpers.fromByteFunction(keyCoder, wvCoder))
.map(TranslationUtils.fromPairFunction())
.map(TranslationUtils.toKVByWindowInValue());
}
}
内容来源于网络,如有侵权,请联系作者删除!