org.apache.spark.api.java.JavaPairRDD.rdd()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(9.1k)|赞(0)|评价(0)|浏览(167)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.rdd()方法的一些代码示例,展示了JavaPairRDD.rdd()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.rdd()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:rdd

JavaPairRDD.rdd介绍

暂无

代码示例

代码示例来源:origin: apache/drill

public static String rddGraphToString(JavaPairRDD rdd) {
 StringBuilder sb = new StringBuilder();
 rddToString(rdd.rdd(), sb, "");
 return sb.toString();
}

代码示例来源:origin: OryxProject/oryx

@Override
public void runUpdate(JavaSparkContext sparkContext,
           long timestamp,
           JavaPairRDD<K,M> newData,
           JavaPairRDD<K,M> pastData,
           String modelDirString,
           TopicProducer<String,U> modelUpdateTopic) {
 scalaUpdate.configureUpdate(sparkContext.sc(),
               timestamp,
               newData.rdd(),
               pastData == null ? null : pastData.rdd(),
               modelDirString,
               modelUpdateTopic);
}

代码示例来源:origin: OryxProject/oryx

@Override
public Iterable<U> buildUpdates(JavaPairRDD<K, M> newData) {
 return JavaConversions.asJavaIterable(scalaManager.buildUpdates(newData.rdd()));
}

代码示例来源:origin: OryxProject/oryx

private static JavaPairRDD<Integer,Iterable<Rating>> predictAll(
  MatrixFactorizationModel mfModel,
  JavaRDD<Rating> data,
  JavaPairRDD<Integer,Integer> userProducts) {
 @SuppressWarnings("unchecked")
 RDD<Tuple2<Object,Object>> userProductsRDD =
   (RDD<Tuple2<Object,Object>>) (RDD<?>) userProducts.rdd();
 return data.wrapRDD(mfModel.predict(userProductsRDD)).groupBy(Rating::user);
}

代码示例来源:origin: OryxProject/oryx

private static RDD<Tuple2<Object,double[]>> readAndConvertFeatureRDD(
  JavaPairRDD<String,float[]> javaRDD,
  Broadcast<? extends Map<String,Integer>> bIdToIndex) {
 RDD<Tuple2<Integer,double[]>> scalaRDD = javaRDD.mapToPair(t ->
   new Tuple2<>(bIdToIndex.value().get(t._1()), t._2())
 ).mapValues(f -> {
   double[] d = new double[f.length];
   for (int i = 0; i < d.length; i++) {
    d[i] = f[i];
   }
   return d;
  }
 ).rdd();
 // This mimics the persistence level establish by ALS training methods
 scalaRDD.persist(StorageLevel.MEMORY_AND_DISK());
 @SuppressWarnings("unchecked")
 RDD<Tuple2<Object,double[]>> objKeyRDD = (RDD<Tuple2<Object,double[]>>) (RDD<?>) scalaRDD;
 return objKeyRDD;
}

代码示例来源:origin: apache/hive

@Override
public JavaPairRDD<HiveKey, BytesWritable> shuffle(
  JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
 JavaPairRDD<HiveKey, BytesWritable> rdd;
 if (totalOrder) {
  if (numPartitions > 0) {
   if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) {
    input.persist(StorageLevel.DISK_ONLY());
    sparkPlan.addCachedRDDId(input.id());
   }
   rdd = input.sortByKey(true, numPartitions);
  } else {
   rdd = input.sortByKey(true);
  }
 } else {
  Partitioner partitioner = new HashPartitioner(numPartitions);
  rdd = input.repartitionAndSortWithinPartitions(partitioner);
 }
 if (shuffleSerializer != null) {
  if (rdd.rdd() instanceof ShuffledRDD) {
   ((ShuffledRDD) rdd.rdd()).setSerializer(shuffleSerializer);
  }
 }
 return rdd;
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void combineByKey() {
 JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
 Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
 Function<Integer, Integer> createCombinerFunction = v1 -> v1;
 Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
 JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
 Map<Integer, Integer> results = combinedRDD.collectAsMap();
 ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
 assertEquals(expected, results);
 Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
  combinedRDD.rdd(),
  JavaConverters.collectionAsScalaIterableConverter(
   Collections.<RDD<?>>emptyList()).asScala().toSeq());
 combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(
   createCombinerFunction,
   mergeValueFunction,
   mergeValueFunction,
   defaultPartitioner,
   false,
   new KryoSerializer(new SparkConf()));
 results = combinedRDD.collectAsMap();
 assertEquals(expected, results);
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void combineByKey() {
 JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
 Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
 Function<Integer, Integer> createCombinerFunction = v1 -> v1;
 Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
 JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
 Map<Integer, Integer> results = combinedRDD.collectAsMap();
 ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
 assertEquals(expected, results);
 Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
  combinedRDD.rdd(),
  JavaConverters.collectionAsScalaIterableConverter(
   Collections.<RDD<?>>emptyList()).asScala().toSeq());
 combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(
   createCombinerFunction,
   mergeValueFunction,
   mergeValueFunction,
   defaultPartitioner,
   false,
   new KryoSerializer(new SparkConf()));
 results = combinedRDD.collectAsMap();
 assertEquals(expected, results);
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void combineByKey() {
 JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
 Function<Integer, Integer> keyFunction = v1 -> v1 % 3;
 Function<Integer, Integer> createCombinerFunction = v1 -> v1;
 Function2<Integer, Integer, Integer> mergeValueFunction = (v1, v2) -> v1 + v2;
 JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
 Map<Integer, Integer> results = combinedRDD.collectAsMap();
 ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
 assertEquals(expected, results);
 Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
  combinedRDD.rdd(),
  JavaConverters.collectionAsScalaIterableConverter(
   Collections.<RDD<?>>emptyList()).asScala().toSeq());
 combinedRDD = originalRDD.keyBy(keyFunction)
  .combineByKey(
   createCombinerFunction,
   mergeValueFunction,
   mergeValueFunction,
   defaultPartitioner,
   false,
   new KryoSerializer(new SparkConf()));
 results = combinedRDD.collectAsMap();
 assertEquals(expected, results);
}

代码示例来源:origin: apache/hive

null);
SparkPlan sparkPlan = sparkPlanGenerator.generate(sparkTask.getWork());
RDD<Tuple2<HiveKey, BytesWritable>> reducerRdd = sparkPlan.generateGraph().rdd();

代码示例来源:origin: com.basho.riak/spark-riak-connector

@Override
public RiakRDD<Tuple2<K, V>> rdd() {
  return (RiakRDD<Tuple2<K, V>>) super.rdd();
}

代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-unshaded

@Override
public CassandraRDD<Tuple2<K, V>> rdd() {
  return (CassandraRDD<Tuple2<K, V>>) super.rdd();
}

代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-java_2.10

@Override
public CassandraRDD<Tuple2<K, V>> rdd() {
  return (CassandraRDD<Tuple2<K, V>>) super.rdd();
}

代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-java_2.10

/**
 * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
 * JavaPairRDD} instance.
 */
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
  return new PairRDDJavaFunctions<>(rdd.rdd());
}

代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-unshaded

/**
 * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
 * JavaPairRDD} instance.
 */
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
  return new PairRDDJavaFunctions<>(rdd.rdd());
}

代码示例来源:origin: com.datastax.spark/spark-cassandra-connector-java

/**
 * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
 * JavaPairRDD} instance.
 */
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
  return new PairRDDJavaFunctions<>(rdd.rdd());
}

代码示例来源:origin: com.datastax.spark/spark-cassandra-connector

/**
 * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
 * JavaPairRDD} instance.
 */
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
  return new PairRDDJavaFunctions<>(rdd.rdd());
}

代码示例来源:origin: com.datastax.spark/spark-cassandra-connector_2.10

/**
 * A static factory method to create a {@link PairRDDJavaFunctions} based on an existing {@link
 * JavaPairRDD} instance.
 */
public static <K, V> PairRDDJavaFunctions<K, V> javaFunctions(JavaPairRDD<K, V> rdd) {
  return new PairRDDJavaFunctions<>(rdd.rdd());
}

代码示例来源:origin: apache/crunch

@Override
public JavaRDDLike<?, ?> getJavaRDDLike(SparkRuntime runtime) {
 if (!runtime.isValid(rdd)) {
  rdd = getJavaRDDLikeInternal(runtime);
  rdd.rdd().setName(getName());
  StorageLevel sl = runtime.getStorageLevel(this);
  if (sl != null) {
   rdd.rdd().persist(sl);
  }
 }
 return rdd;
}

代码示例来源:origin: org.apache.crunch/crunch-spark

@Override
public JavaRDDLike<?, ?> getJavaRDDLike(SparkRuntime runtime) {
 if (!runtime.isValid(rdd)) {
  rdd = getJavaRDDLikeInternal(runtime);
  rdd.rdd().setName(getName());
  StorageLevel sl = runtime.getStorageLevel(this);
  if (sl != null) {
   rdd.rdd().persist(sl);
  }
 }
 return rdd;
}

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法