org.apache.spark.api.java.JavaPairRDD.combineByKey()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(7.2k)|赞(0)|评价(0)|浏览(139)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.combineByKey()方法的一些代码示例,展示了JavaPairRDD.combineByKey()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.combineByKey()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:combineByKey

JavaPairRDD.combineByKey介绍

暂无

代码示例

代码示例来源:origin: databricks/learning-spark

JavaPairRDD<String, AvgCount> avgCounts = rdd.combineByKey(createAcc, addAndCount, combine);
Map<String, AvgCount> countMap = avgCounts.collectAsMap();
for (Entry<String, AvgCount> entry : countMap.entrySet()) {

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void combineByKey() {
 JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
 Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
 Function<Integer, Integer> createCombinerFunction = v1 -> v1;
 Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
 JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
 Map<Integer, Integer> results = combinedRDD.collectAsMap();
 ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
 assertEquals(expected, results);
 Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
  combinedRDD.rdd(),
  JavaConverters.collectionAsScalaIterableConverter(
   Collections.<RDD<?>>emptyList()).asScala().toSeq());
 combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(
   createCombinerFunction,
   mergeValueFunction,
   mergeValueFunction,
   defaultPartitioner,
   false,
   new KryoSerializer(new SparkConf()));
 results = combinedRDD.collectAsMap();
 assertEquals(expected, results);
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void combineByKey() {
 JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
 Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
 Function<Integer, Integer> createCombinerFunction = v1 -> v1;
 Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
 JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
 Map<Integer, Integer> results = combinedRDD.collectAsMap();
 ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
 assertEquals(expected, results);
 Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
  combinedRDD.rdd(),
  JavaConverters.collectionAsScalaIterableConverter(
   Collections.<RDD<?>>emptyList()).asScala().toSeq());
 combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(
   createCombinerFunction,
   mergeValueFunction,
   mergeValueFunction,
   defaultPartitioner,
   false,
   new KryoSerializer(new SparkConf()));
 results = combinedRDD.collectAsMap();
 assertEquals(expected, results);
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void combineByKey() {
 JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
 Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
 Function<Integer, Integer> createCombinerFunction = v1 -> v1;
 Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
 JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
 Map<Integer, Integer> results = combinedRDD.collectAsMap();
 ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
 assertEquals(expected, results);
 Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
  combinedRDD.rdd(),
  JavaConverters.collectionAsScalaIterableConverter(
   Collections.<RDD<?>>emptyList()).asScala().toSeq());
 combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(
   createCombinerFunction,
   mergeValueFunction,
   mergeValueFunction,
   defaultPartitioner,
   false,
   new KryoSerializer(new SparkConf()));
 results = combinedRDD.collectAsMap();
 assertEquals(expected, results);
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

unionRDD.combineByKey(createCombiner, addAndCount, mergeCombiners);

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaPairRDD<String, SortedMap<Integer, Integer>> combined = pairs.combineByKey(
    createCombiner,
    mergeValue,

代码示例来源:origin: mahmoudparsian/data-algorithms-book

genes.combineByKey(createCombiner, addAndCount, mergeCombiners);

代码示例来源:origin: mahmoudparsian/data-algorithms-book

genes.combineByKey(createCombiner, addAndCount, mergeCombiners);

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaPairRDD<String, Map<String, Integer>> anagrams = rdd.combineByKey(
    createCombiner,
    mergeValue,

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaPairRDD<String, SortedMap<Integer, Integer>> combined = pairs.combineByKey(
    createCombiner,
    mergeValue,

代码示例来源:origin: mahmoudparsian/data-algorithms-book

unionRDD.combineByKey(createCombiner, addAndCount, mergeCombiners);

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaPairRDD<String, AvgCount> avgCounts = rdd.combineByKey(
    createAcc, 
    addAndCount,

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaPairRDD<String, Map<String, Integer>> anagrams = rdd.combineByKey(
    createCombiner,
    mergeValue,

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaPairRDD<String, AvgCount> avgCounts = rdd.combineByKey(
    createAcc, 
    addAndCount,

代码示例来源:origin: scipr-lab/dizk

protected static <FieldT extends AbstractFieldElementExpanded<FieldT>> JavaPairRDD<Long, ArrayList<Tuple2<Long, FieldT>>> getInputColumns(final Circuit<FieldT> circuit,
                                                                       final JavaPairRDD<Long, FieldT> input,
                                                                       final boolean transpose) {
    final long inputSize = circuit.inputSize;
    final Combiner<FieldT> combine = new Combiner<>();

    return input.mapToPair(element -> {
      final long group = transpose ? element._1 / inputSize : element._1 % inputSize;
      final long index = transpose ? element._1 % inputSize : element._1 / inputSize;

      return new Tuple2<>(group, new Tuple2<>(index, element._2));
    }).combineByKey(combine.createGroup, combine.mergeElement, combine.mergeCombiner);
  }
}

代码示例来源:origin: scipr-lab/dizk

public static <FieldT extends AbstractFieldElementExpanded<FieldT>> FieldT
parallelEvaluatePolynomial(
    final JavaPairRDD<Long, FieldT> input,
    final FieldT element,
    final int partitionSize) {
  final FieldT groupPartitionSize = element.pow(partitionSize);
  final Combiner<FieldT> combine = new Combiner<>();
  return input.mapToPair(item -> {
    final long group = item._1 / partitionSize;
    final long index = item._1 % partitionSize;
    return new Tuple2<>(group, new Tuple2<>(index, item._2));
  }).combineByKey(combine.createGroup, combine.mergeElement, combine.mergeCombiner).map(group -> {
    final List<FieldT> subPolynomial = Utils.convertFromPairs(group._2, group._2.size());
    return evaluatePolynomial(subPolynomial, element).mul(groupPartitionSize.pow(group._1));
  }).reduce(FieldT::add);
}

代码示例来源:origin: scipr-lab/dizk

}).combineByKey(combine.createGroup, combine.mergeElement, combine.mergeCombiner);

代码示例来源:origin: spirom/learning-spark-with-java

pairsRDD.combineByKey(
  value -> new Tuple2<>(value, 1),
  (sumAndCount, value) -> new Tuple2<>(sumAndCount._1() + value, sumAndCount._2() + 1),

代码示例来源:origin: org.datavec/datavec-spark

grouped.combineByKey(createCombiner, mergeValue, mergeCombiners).values();

代码示例来源:origin: org.datavec/datavec-spark_2.11

grouped.combineByKey(createCombiner, mergeValue, mergeCombiners).values();

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法