本文整理了Java中org.apache.spark.api.java.JavaPairRDD.rdd()
方法的一些代码示例,展示了JavaPairRDD.rdd()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.rdd()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:rdd
暂无
代码示例来源:origin: apache/drill
public static String rddGraphToString(JavaPairRDD rdd) {
StringBuilder sb = new StringBuilder();
rddToString(rdd.rdd(), sb, "");
return sb.toString();
}
代码示例来源:origin: OryxProject/oryx
@Override
public void runUpdate(JavaSparkContext sparkContext,
long timestamp,
JavaPairRDD<K,M> newData,
JavaPairRDD<K,M> pastData,
String modelDirString,
TopicProducer<String,U> modelUpdateTopic) {
scalaUpdate.configureUpdate(sparkContext.sc(),
timestamp,
newData.rdd(),
pastData == null ? null : pastData.rdd(),
modelDirString,
modelUpdateTopic);
}
代码示例来源:origin: OryxProject/oryx
@Override
public Iterable<U> buildUpdates(JavaPairRDD<K, M> newData) {
return JavaConversions.asJavaIterable(scalaManager.buildUpdates(newData.rdd()));
}
代码示例来源:origin: OryxProject/oryx
private static JavaPairRDD<Integer,Iterable<Rating>> predictAll(
MatrixFactorizationModel mfModel,
JavaRDD<Rating> data,
JavaPairRDD<Integer,Integer> userProducts) {
@SuppressWarnings("unchecked")
RDD<Tuple2<Object,Object>> userProductsRDD =
(RDD<Tuple2<Object,Object>>) (RDD<?>) userProducts.rdd();
return data.wrapRDD(mfModel.predict(userProductsRDD)).groupBy(Rating::user);
}
代码示例来源:origin: OryxProject/oryx
private static RDD<Tuple2<Object,double[]>> readAndConvertFeatureRDD(
JavaPairRDD<String,float[]> javaRDD,
Broadcast<? extends Map<String,Integer>> bIdToIndex) {
RDD<Tuple2<Integer,double[]>> scalaRDD = javaRDD.mapToPair(t ->
new Tuple2<>(bIdToIndex.value().get(t._1()), t._2())
).mapValues(f -> {
double[] d = new double[f.length];
for (int i = 0; i < d.length; i++) {
d[i] = f[i];
}
return d;
}
).rdd();
// This mimics the persistence level establish by ALS training methods
scalaRDD.persist(StorageLevel.MEMORY_AND_DISK());
@SuppressWarnings("unchecked")
RDD<Tuple2<Object,double[]>> objKeyRDD = (RDD<Tuple2<Object,double[]>>) (RDD<?>) scalaRDD;
return objKeyRDD;
}
代码示例来源:origin: apache/hive
@Override
public JavaPairRDD<HiveKey, BytesWritable> shuffle(
JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
JavaPairRDD<HiveKey, BytesWritable> rdd;
if (totalOrder) {
if (numPartitions > 0) {
if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) {
input.persist(StorageLevel.DISK_ONLY());
sparkPlan.addCachedRDDId(input.id());
}
rdd = input.sortByKey(true, numPartitions);
} else {
rdd = input.sortByKey(true);
}
} else {
Partitioner partitioner = new HashPartitioner(numPartitions);
rdd = input.repartitionAndSortWithinPartitions(partitioner);
}
if (shuffleSerializer != null) {
if (rdd.rdd() instanceof ShuffledRDD) {
((ShuffledRDD) rdd.rdd()).setSerializer(shuffleSerializer);
}
}
return rdd;
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void combineByKey() {
JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
Function<Integer, Integer> createCombinerFunction = v1 -> v1;
Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
Map<Integer, Integer> results = combinedRDD.collectAsMap();
ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
assertEquals(expected, results);
Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
combinedRDD.rdd(),
JavaConverters.collectionAsScalaIterableConverter(
Collections.<RDD<?>>emptyList()).asScala().toSeq());
combinedRDD = originalRDD.keyBy(keyFunction)
.combineByKey(
createCombinerFunction,
mergeValueFunction,
mergeValueFunction,
defaultPartitioner,
false,
new KryoSerializer(new SparkConf()));
results = combinedRDD.collectAsMap();
assertEquals(expected, results);
}
代码示例来源:origin: apache/hive
null);
SparkPlan sparkPlan = sparkPlanGenerator.generate(sparkTask.getWork());
RDD<Tuple2<HiveKey, BytesWritable>> reducerRdd = sparkPlan.generateGraph().rdd();
代码示例来源:origin: com.basho.riak/spark-riak-connector
@Override
public RiakRDD<Tuple2<K, V>> rdd() {
return (RiakRDD<Tuple2<K, V>>) super.rdd();
}
代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-unshaded
@Override
public CassandraRDD<Tuple2<K, V>> rdd() {
return (CassandraRDD<Tuple2<K, V>>) super.rdd();
}
代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-java_2.10
@Override
public CassandraRDD<Tuple2<K, V>> rdd() {
return (CassandraRDD<Tuple2<K, V>>) super.rdd();
}
代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-java_2.10
/**
* A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
* JavaPairRDD} instance.
*/
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
return new PairRDDJavaFunctions<>(rdd.rdd());
}
代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-unshaded
/**
* A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
* JavaPairRDD} instance.
*/
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
return new PairRDDJavaFunctions<>(rdd.rdd());
}
代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-java
/**
* A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
* JavaPairRDD} instance.
*/
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
return new PairRDDJavaFunctions<>(rdd.rdd());
}
代码示例来源:origin: com.datastax.spark/spark-cassandra-connector
/**
* A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
* JavaPairRDD} instance.
*/
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
return new PairRDDJavaFunctions<>(rdd.rdd());
}
代码示例来源:origin: com.datastax.spark/spark-cassandra-connector_2.10
/**
* A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
* JavaPairRDD} instance.
*/
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
return new PairRDDJavaFunctions<>(rdd.rdd());
}
代码示例来源:origin: apache/crunch
@Override
public JavaRDDLike<?, ?> getJavaRDDLike(SparkRuntime runtime) {
if (!runtime.isValid(rdd)) {
rdd = getJavaRDDLikeInternal(runtime);
rdd.rdd().setName(getName());
StorageLevel sl = runtime.getStorageLevel(this);
if (sl != null) {
rdd.rdd().persist(sl);
}
}
return rdd;
}
代码示例来源:origin: org.apache.crunch/crunch-spark
@Override
public JavaRDDLike<?, ?> getJavaRDDLike(SparkRuntime runtime) {
if (!runtime.isValid(rdd)) {
rdd = getJavaRDDLikeInternal(runtime);
rdd.rdd().setName(getName());
StorageLevel sl = runtime.getStorageLevel(this);
if (sl != null) {
rdd.rdd().persist(sl);
}
}
return rdd;
}
内容来源于网络,如有侵权,请联系作者删除!