本文整理了Java中org.apache.spark.api.java.JavaPairRDD.foldByKey()
方法的一些代码示例,展示了JavaPairRDD.foldByKey()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.foldByKey()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:foldByKey
暂无
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void foldByKey() {
List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
new Tuple2<>(2, 1),
new Tuple2<>(2, 1),
new Tuple2<>(1, 1),
new Tuple2<>(3, 2),
new Tuple2<>(3, 1)
);
JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
Assert.assertEquals(1, sums.lookup(1).get(0).intValue());
Assert.assertEquals(2, sums.lookup(2).get(0).intValue());
Assert.assertEquals(3, sums.lookup(3).get(0).intValue());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void foldByKey() {
List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
new Tuple2<>(2, 1),
new Tuple2<>(2, 1),
new Tuple2<>(1, 1),
new Tuple2<>(3, 2),
new Tuple2<>(3, 1)
);
JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
Assert.assertEquals(1, sums.lookup(1).get(0).intValue());
Assert.assertEquals(2, sums.lookup(2).get(0).intValue());
Assert.assertEquals(3, sums.lookup(3).get(0).intValue());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void foldByKey() {
List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
new Tuple2<>(2, 1),
new Tuple2<>(2, 1),
new Tuple2<>(1, 1),
new Tuple2<>(3, 2),
new Tuple2<>(3, 1)
);
JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
Assert.assertEquals(1, sums.lookup(1).get(0).intValue());
Assert.assertEquals(2, sums.lookup(2).get(0).intValue());
Assert.assertEquals(3, sums.lookup(3).get(0).intValue());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void foldByKey() {
List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
new Tuple2<>(2, 1),
new Tuple2<>(2, 1),
new Tuple2<>(1, 1),
new Tuple2<>(3, 2),
new Tuple2<>(3, 1)
);
JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
assertEquals(1, sums.lookup(1).get(0).intValue());
assertEquals(2, sums.lookup(2).get(0).intValue());
assertEquals(3, sums.lookup(3).get(0).intValue());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void foldByKey() {
List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
new Tuple2<>(2, 1),
new Tuple2<>(2, 1),
new Tuple2<>(1, 1),
new Tuple2<>(3, 2),
new Tuple2<>(3, 1)
);
JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
assertEquals(1, sums.lookup(1).get(0).intValue());
assertEquals(2, sums.lookup(2).get(0).intValue());
assertEquals(3, sums.lookup(3).get(0).intValue());
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void foldByKey() {
List<Tuple2<Integer, Integer>> pairs = Arrays.asList(
new Tuple2<>(2, 1),
new Tuple2<>(2, 1),
new Tuple2<>(1, 1),
new Tuple2<>(3, 2),
new Tuple2<>(3, 1)
);
JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> sums = rdd.foldByKey(0, (a, b) -> a + b);
assertEquals(1, sums.lookup(1).get(0).intValue());
assertEquals(2, sums.lookup(2).get(0).intValue());
assertEquals(3, sums.lookup(3).get(0).intValue());
}
代码示例来源:origin: OryxProject/oryx
} else {
aggregated = tuples.foldByKey(Double.NaN, (current, next) -> next);
代码示例来源:origin: OryxProject/oryx
/**
* Combines {@link Rating}s with the same user/item into one, with score as the sum of
* all of the scores.
*/
private JavaRDD<Rating> aggregateScores(JavaRDD<? extends Rating> original, double epsilon) {
JavaPairRDD<Tuple2<Integer,Integer>,Double> tuples =
original.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()));
JavaPairRDD<Tuple2<Integer,Integer>,Double> aggregated;
if (implicit) {
// TODO can we avoid groupByKey? reduce, combine, fold don't seem viable since
// they don't guarantee the delete elements are properly handled
aggregated = tuples.groupByKey().mapValues(MLFunctions.SUM_WITH_NAN);
} else {
// For non-implicit, last wins.
aggregated = tuples.foldByKey(Double.NaN, (current, next) -> next);
}
JavaPairRDD<Tuple2<Integer,Integer>,Double> noNaN =
aggregated.filter(kv -> !Double.isNaN(kv._2()));
if (logStrength) {
return noNaN.map(userProductScore -> new Rating(
userProductScore._1()._1(),
userProductScore._1()._2(),
Math.log1p(userProductScore._2() / epsilon)));
} else {
return noNaN.map(userProductScore -> new Rating(
userProductScore._1()._1(),
userProductScore._1()._2(),
userProductScore._2()));
}
}
代码示例来源:origin: spirom/learning-spark-with-java
pairsRDD.foldByKey(1, (x, y) -> x * y);
代码示例来源:origin: com.cloudera.oryx/oryx-app
} else {
aggregated = tuples.foldByKey(Double.NaN, (current, next) -> next);
内容来源于网络,如有侵权,请联系作者删除!