本文整理了Java中org.apache.spark.api.java.JavaRDD.keyBy()
方法的一些代码示例,展示了JavaRDD.keyBy()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.keyBy()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:keyBy
暂无
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
assertEquals(new Tuple2<>("1", 1), s.get(0));
assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
assertEquals(new Tuple2<>("1", 1), s.get(0));
assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
Assert.assertEquals(new Tuple2<>("1", 1), s.get(0));
Assert.assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void keyBy() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2));
List<Tuple2<String, Integer>> s = rdd.keyBy(Object::toString).collect();
assertEquals(new Tuple2<>("1", 1), s.get(0));
assertEquals(new Tuple2<>("2", 2), s.get(1));
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: cloudera-labs/envelope
@Override
public Dataset<Row> derive(Map<String, Dataset<Row>> dependencies) throws Exception {
if (!dependencies.containsKey(intoDependency)) {
throw new RuntimeException("Nest deriver points to non-existent nest-into dependency");
}
Dataset<Row> into = dependencies.get(intoDependency);
if (!dependencies.containsKey(fromDependency)) {
throw new RuntimeException("Nest deriver points to non-existent nest-from dependency");
}
Dataset<Row> from = dependencies.get(fromDependency);
ExtractFieldsFunction extractFieldsFunction = new ExtractFieldsFunction(keyFieldNames);
JavaPairRDD<List<Object>, Row> keyedIntoRDD = into.javaRDD().keyBy(extractFieldsFunction);
JavaPairRDD<List<Object>, Row> keyedFromRDD = from.javaRDD().keyBy(extractFieldsFunction);
NestFunction nestFunction = new NestFunction();
JavaRDD<Row> nestedRDD = keyedIntoRDD.cogroup(keyedFromRDD).values().map(nestFunction);
StructType nestedSchema = into.schema().add(nestedFieldName, DataTypes.createArrayType(from.schema()));
Dataset<Row> nested = into.sqlContext().createDataFrame(nestedRDD, nestedSchema);
return nested;
}
代码示例来源:origin: uk.gov.gchq.gaffer/parquet-store
isEntity, groupByColumns, columnToPaths, aggregatorJson, gafferGroupObjectConverter);
final JavaRDD<Row> aggregatedRDD = data.javaRDD()
.keyBy(keyExtractor)
.reduceByKey(aggregator)
.values();
代码示例来源:origin: cloudera-labs/envelope
private JavaRDD<Row> planMutationsByKey(Dataset<Row> arriving, List<String> keyFieldNames,
Config plannerConfig, Config outputConfig) {
JavaPairRDD<Row, Row> keyedArriving =
arriving.javaRDD().keyBy(new ExtractKeyFunction(keyFieldNames, accumulators));
JavaPairRDD<Row, Iterable<Row>> arrivingByKey =
keyedArriving.groupByKey(getPartitioner(keyedArriving));
JavaPairRDD<Row, Tuple2<Iterable<Row>, Iterable<Row>>> arrivingAndExistingByKey =
arrivingByKey.mapPartitionsToPair(new JoinExistingForKeysFunction(outputConfig, keyFieldNames, accumulators));
JavaRDD<Row> planned =
arrivingAndExistingByKey.flatMap(new PlanForKeyFunction(plannerConfig, accumulators));
return planned;
}
代码示例来源:origin: cloudera-labs/envelope
private JavaPairRDD<Row, Row> getDummyRDD(int numPartitions) {
return Contexts.getSparkSession().range(numPartitions).javaRDD()
.map(new LongToRowFunction()).keyBy(new ItselfFunction<Row>()).repartition(numPartitions);
}
代码示例来源:origin: org.qcri.rheem/rheem-iejoin
.keyBy(new extractData<Type0, Type1, Input>(get0Pivot_, get0Ref_)).sortByKey(new Data.Comparator<Type0, Type1>(list1ASC, list1ASCSec));
JavaPairRDD<Data<Type0, Type1>, Tuple2<Long, Input>> keyedDataRDD2 = inputRDD2UID
.keyBy(new extractData<Type0, Type1, Input>(get1Pivot_, get1Ref_)).sortByKey(new Data.Comparator<Type0, Type1>(list2ASC, list2ASCSec));
.keyBy(in -> in.getPartitionID());
JavaPairRDD<Long, List2AttributesObjectSkinny<Type0, Type1>> listObjectDataRDD2Indexd = listObjectDataRDD2
.keyBy(in -> in.getPartitionID());
r1RowIDS = inputRDD1UID.keyBy(in -> in._1());
r2RowIDS = inputRDD2UID.keyBy(in -> in._1());
代码示例来源:origin: org.qcri.rheem/rheem-iejoin
.keyBy(new extractData<Type0, Type1, Input>(get0Pivot_, get0Ref_)).sortByKey(new Data.Comparator<Type0, Type1>(list1ASC, list1ASCSec));
.keyBy(input -> input.getPartitionID());
r1RowIDS = inputRDD1UID.keyBy(in -> in._1());
r2RowIDS = r1RowIDS;
内容来源于网络,如有侵权,请联系作者删除!