org.apache.spark.api.java.JavaPairRDD.foldByKey()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(5.1k)|赞(0)|评价(0)|浏览(82)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.foldByKey()方法的一些代码示例,展示了JavaPairRDD.foldByKey()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.foldByKey()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:foldByKey

JavaPairRDD.foldByKey介绍

暂无

代码示例

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void foldByKey() {
 List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
  new Tuple2<>(2, 1),
  new Tuple2<>(2, 1),
  new Tuple2<>(1, 1),
  new Tuple2<>(3, 2),
  new Tuple2<>(3, 1)
 );
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
 Assert.assertEquals(1, sums.lookup(1).get(0).intValue());
 Assert.assertEquals(2, sums.lookup(2).get(0).intValue());
 Assert.assertEquals(3, sums.lookup(3).get(0).intValue());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void foldByKey() {
 List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
  new Tuple2<>(2, 1),
  new Tuple2<>(2, 1),
  new Tuple2<>(1, 1),
  new Tuple2<>(3, 2),
  new Tuple2<>(3, 1)
 );
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
 Assert.assertEquals(1, sums.lookup(1).get(0).intValue());
 Assert.assertEquals(2, sums.lookup(2).get(0).intValue());
 Assert.assertEquals(3, sums.lookup(3).get(0).intValue());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void foldByKey() {
 List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
  new Tuple2<>(2, 1),
  new Tuple2<>(2, 1),
  new Tuple2<>(1, 1),
  new Tuple2<>(3, 2),
  new Tuple2<>(3, 1)
 );
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
 Assert.assertEquals(1, sums.lookup(1).get(0).intValue());
 Assert.assertEquals(2, sums.lookup(2).get(0).intValue());
 Assert.assertEquals(3, sums.lookup(3).get(0).intValue());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@SuppressWarnings("unchecked")
@Test
public void foldByKey() {
 List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
  new Tuple2<>(2, 1),
  new Tuple2<>(2, 1),
  new Tuple2<>(1, 1),
  new Tuple2<>(3, 2),
  new Tuple2<>(3, 1)
 );
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
 assertEquals(1, sums.lookup(1).get(0).intValue());
 assertEquals(2, sums.lookup(2).get(0).intValue());
 assertEquals(3, sums.lookup(3).get(0).intValue());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@SuppressWarnings("unchecked")
@Test
public void foldByKey() {
 List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
  new Tuple2<>(2, 1),
  new Tuple2<>(2, 1),
  new Tuple2<>(1, 1),
  new Tuple2<>(3, 2),
  new Tuple2<>(3, 1)
 );
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
 assertEquals(1, sums.lookup(1).get(0).intValue());
 assertEquals(2, sums.lookup(2).get(0).intValue());
 assertEquals(3, sums.lookup(3).get(0).intValue());
}

代码示例来源:origin: org.apache.spark/spark-core

@SuppressWarnings("unchecked")
@Test
public void foldByKey() {
 List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
  new Tuple2<>(2, 1),
  new Tuple2<>(2, 1),
  new Tuple2<>(1, 1),
  new Tuple2<>(3, 2),
  new Tuple2<>(3, 1)
 );
 JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
 assertEquals(1, sums.lookup(1).get(0).intValue());
 assertEquals(2, sums.lookup(2).get(0).intValue());
 assertEquals(3, sums.lookup(3).get(0).intValue());
}

代码示例来源:origin: OryxProject/oryx

} else {
 aggregated = tuples.foldByKey(Double.NaN, (current, next) -> next);

代码示例来源:origin: OryxProject/oryx

/**
 * Combines {@link Rating}s with the same user/item into one, with score as the sum of
 * all of the scores.
 */
private JavaRDD<Rating> aggregateScores(JavaRDD<? extends Rating> original, double epsilon) {
 JavaPairRDD<Tuple2<Integer,Integer>,Double> tuples =
   original.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()));
 JavaPairRDD<Tuple2<Integer,Integer>,Double> aggregated;
 if (implicit) {
  // TODO can we avoid groupByKey? reduce, combine, fold don't seem viable since
  // they don't guarantee the delete elements are properly handled
  aggregated = tuples.groupByKey().mapValues(MLFunctions.SUM_WITH_NAN);
 } else {
  // For non-implicit, last wins.
  aggregated = tuples.foldByKey(Double.NaN, (current, next) -> next);
 }
 JavaPairRDD<Tuple2<Integer,Integer>,Double> noNaN =
   aggregated.filter(kv -> !Double.isNaN(kv._2()));
 if (logStrength) {
  return noNaN.map(userProductScore -> new Rating(
    userProductScore._1()._1(),
    userProductScore._1()._2(),
    Math.log1p(userProductScore._2() / epsilon)));
 } else {
  return noNaN.map(userProductScore -> new Rating(
    userProductScore._1()._1(),
    userProductScore._1()._2(),
    userProductScore._2()));
 }
}

代码示例来源:origin: spirom/learning-spark-with-java

pairsRDD.foldByKey(1, (x, y) -> x * y);

代码示例来源:origin: com.cloudera.oryx/oryx-app

} else {
 aggregated = tuples.foldByKey(Double.NaN, (current, next) -> next);

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法