本文整理了Java中org.apache.spark.api.java.JavaPairRDD.combineByKey()
方法的一些代码示例,展示了JavaPairRDD.combineByKey()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.combineByKey()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:combineByKey
暂无
代码示例来源:origin: databricks/learning-spark
JavaPairRDD<String, AvgCount> avgCounts = rdd.combineByKey(createAcc, addAndCount, combine);
Map<String, AvgCount> countMap = avgCounts.collectAsMap();
for (Entry<String, AvgCount> entry : countMap.entrySet()) {
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
unionRDD.combineByKey(createCombiner, addAndCount, mergeCombiners);
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaPairRDD<String, SortedMap<Integer, Integer>> combined = pairs.combineByKey(
createCombiner,
mergeValue,
代码示例来源:origin: mahmoudparsian/data-algorithms-book
genes.combineByKey(createCombiner, addAndCount, mergeCombiners);
代码示例来源:origin: mahmoudparsian/data-algorithms-book
genes.combineByKey(createCombiner, addAndCount, mergeCombiners);
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaPairRDD<String, Map<String, Integer>> anagrams = rdd.combineByKey(
createCombiner,
mergeValue,
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaPairRDD<String, SortedMap<Integer, Integer>> combined = pairs.combineByKey(
createCombiner,
mergeValue,
代码示例来源:origin: mahmoudparsian/data-algorithms-book
unionRDD.combineByKey(createCombiner, addAndCount, mergeCombiners);
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaPairRDD<String, AvgCount> avgCounts = rdd.combineByKey(
createAcc,
addAndCount,
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaPairRDD<String, Map<String, Integer>> anagrams = rdd.combineByKey(
createCombiner,
mergeValue,
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaPairRDD<String, AvgCount> avgCounts = rdd.combineByKey(
createAcc,
addAndCount,
代码示例来源:origin: scipr-lab/dizk
protected static <FieldT extends AbstractFieldElementExpanded<FieldT>> JavaPairRDD<Long, ArrayList<Tuple2<Long, FieldT>>> getInputColumns(final Circuit<FieldT> circuit,
final JavaPairRDD<Long, FieldT> input,
final boolean transpose) {
final long inputSize = circuit.inputSize;
final Combiner<FieldT> combine = new Combiner<>();
return input.mapToPair(element -> {
final long group = transpose ? element._1 / inputSize : element._1 % inputSize;
final long index = transpose ? element._1 % inputSize : element._1 / inputSize;
return new Tuple2<>(group, new Tuple2<>(index, element._2));
}).combineByKey(combine.createGroup, combine.mergeElement, combine.mergeCombiner);
}
}
代码示例来源:origin: scipr-lab/dizk
public static <FieldT extends AbstractFieldElementExpanded<FieldT>> FieldT
parallelEvaluatePolynomial(
final JavaPairRDD<Long, FieldT> input,
final FieldT element,
final int partitionSize) {
final FieldT groupPartitionSize = element.pow(partitionSize);
final Combiner<FieldT> combine = new Combiner<>();
return input.mapToPair(item -> {
final long group = item._1 / partitionSize;
final long index = item._1 % partitionSize;
return new Tuple2<>(group, new Tuple2<>(index, item._2));
}).combineByKey(combine.createGroup, combine.mergeElement, combine.mergeCombiner).map(group -> {
final List<FieldT> subPolynomial = Utils.convertFromPairs(group._2, group._2.size());
return evaluatePolynomial(subPolynomial, element).mul(groupPartitionSize.pow(group._1));
}).reduce(FieldT::add);
}
代码示例来源:origin: scipr-lab/dizk
}).combineByKey(combine.createGroup, combine.mergeElement, combine.mergeCombiner);
代码示例来源:origin: spirom/learning-spark-with-java
pairsRDD.combineByKey(
value -> new Tuple2<>(value, 1),
(sumAndCount, value) -> new Tuple2<>(sumAndCount._1() + value, sumAndCount._2() + 1),
代码示例来源:origin: org.datavec/datavec-spark
grouped.combineByKey(createCombiner, mergeValue, mergeCombiners).values();
代码示例来源:origin: org.datavec/datavec-spark_2.11
grouped.combineByKey(createCombiner, mergeValue, mergeCombiners).values();
内容来源于网络,如有侵权,请联系作者删除!