org.apache.spark.api.java.JavaSparkContext.emptyRDD()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(12.4k)|赞(0)|评价(0)|浏览(155)

本文整理了Java中org.apache.spark.api.java.JavaSparkContext.emptyRDD()方法的一些代码示例,展示了JavaSparkContext.emptyRDD()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.emptyRDD()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:emptyRDD

JavaSparkContext.emptyRDD介绍

暂无

代码示例

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void isEmpty() {
 assertTrue(sc.emptyRDD().isEmpty());
 assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty());
 assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void isEmpty() {
 assertTrue(sc.emptyRDD().isEmpty());
 assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty());
 assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void isEmpty() {
 assertTrue(sc.emptyRDD().isEmpty());
 assertTrue(sc.parallelize(new ArrayList<Integer>()).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1)).isEmpty());
 assertTrue(sc.parallelize(Arrays.asList(1, 2, 3), 3).filter(i -> i < 0).isEmpty());
 assertFalse(sc.parallelize(Arrays.asList(1, 2, 3)).filter(i -> i > 1).isEmpty());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@SuppressWarnings("unchecked")
@Test
public void intersection() {
 List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5);
 List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8);
 JavaRDD<Integer> s1 = sc.parallelize(ints1);
 JavaRDD<Integer> s2 = sc.parallelize(ints2);
 JavaRDD<Integer> intersections = s1.intersection(s2);
 assertEquals(3, intersections.count());
 JavaRDD<Integer> empty = sc.emptyRDD();
 JavaRDD<Integer> emptyIntersection = empty.intersection(s2);
 assertEquals(0, emptyIntersection.count());
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dIntersection = d1.intersection(d2);
 assertEquals(2, dIntersection.count());
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2);
 assertEquals(2, pIntersection.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@SuppressWarnings("unchecked")
@Test
public void intersection() {
 List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5);
 List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8);
 JavaRDD<Integer> s1 = sc.parallelize(ints1);
 JavaRDD<Integer> s2 = sc.parallelize(ints2);
 JavaRDD<Integer> intersections = s1.intersection(s2);
 assertEquals(3, intersections.count());
 JavaRDD<Integer> empty = sc.emptyRDD();
 JavaRDD<Integer> emptyIntersection = empty.intersection(s2);
 assertEquals(0, emptyIntersection.count());
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dIntersection = d1.intersection(d2);
 assertEquals(2, dIntersection.count());
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2);
 assertEquals(2, pIntersection.count());
}

代码示例来源:origin: org.apache.spark/spark-core

@SuppressWarnings("unchecked")
@Test
public void intersection() {
 List<Integer> ints1 = Arrays.asList(1, 10, 2, 3, 4, 5);
 List<Integer> ints2 = Arrays.asList(1, 6, 2, 3, 7, 8);
 JavaRDD<Integer> s1 = sc.parallelize(ints1);
 JavaRDD<Integer> s2 = sc.parallelize(ints2);
 JavaRDD<Integer> intersections = s1.intersection(s2);
 assertEquals(3, intersections.count());
 JavaRDD<Integer> empty = sc.emptyRDD();
 JavaRDD<Integer> emptyIntersection = empty.intersection(s2);
 assertEquals(0, emptyIntersection.count());
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dIntersection = d1.intersection(d2);
 assertEquals(2, dIntersection.count());
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pIntersection = p1.intersection(p2);
 assertEquals(2, pIntersection.count());
}

代码示例来源:origin: apache/tinkerpop

/**
   * Read a memoryRDD from the storage location.
   * The default implementation returns an empty RDD.
   *
   * @param configuration the configuration for the {@link org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer}
   * @param memoryKey     the memory key of the memoryRDD
   * @param sparkContext  the Spark context with the requisite methods for generating a {@link JavaPairRDD}
   * @param <K>           the key class of the memoryRDD
   * @param <V>           the value class of the memoryRDD
   * @return the memoryRDD with respective key/value pairs.
   */
  public default <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) {
    return sparkContext.<Tuple2<K, V>>emptyRDD().mapToPair(t -> t);
  }
}

代码示例来源:origin: apache/tinkerpop

@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) {
  if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
    throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from");
  Spark.create(sparkContext.sc());
  final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open());
  return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD());
}

代码示例来源:origin: nerdammer/spash

@Override
public JavaRDD<T> toRDD(JavaSparkContext sc) {
  return sc.emptyRDD();
}

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

/**
   * Read a memoryRDD from the storage location.
   * The default implementation returns an empty RDD.
   *
   * @param configuration the configuration for the {@link org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer}
   * @param memoryKey     the memory key of the memoryRDD
   * @param sparkContext  the Spark context with the requisite methods for generating a {@link JavaPairRDD}
   * @param <K>           the key class of the memoryRDD
   * @param <V>           the value class of the memoryRDD
   * @return the memoryRDD with respective key/value pairs.
   */
  public default <K, V> JavaPairRDD<K, V> readMemoryRDD(final Configuration configuration, final String memoryKey, final JavaSparkContext sparkContext) {
    return sparkContext.<Tuple2<K, V>>emptyRDD().mapToPair(t -> t);
  }
}

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

public <K, V> JavaPairRDD<K, V> createRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, String sourceName,
                     Class<K> keyClass, Class<V> valueClass) {
 Set<String> inputNames = sourceInputs.get(sourceName);
 if (inputNames == null || inputNames.isEmpty()) {
  // should never happen if validation happened correctly at pipeline configure time
  throw new IllegalArgumentException(
   sourceName + " has no input. Please check that the source calls setInput at some input.");
 }
 JavaPairRDD<K, V> inputRDD = JavaPairRDD.fromJavaRDD(jsc.<Tuple2<K, V>>emptyRDD());
 for (String inputName : inputNames) {
  inputRDD = inputRDD.union(createInputRDD(sec, jsc, inputName, keyClass, valueClass));
 }
 return inputRDD;
}

代码示例来源:origin: com.davidbracewell/mango

@Override
public <T> SparkStream<T> empty() {
 return new SparkStream<>(sparkContext().emptyRDD());
}

代码示例来源:origin: com.uber.hoodie/hoodie-client

@Override
public JavaRDD<WriteStatus> compact(JavaSparkContext jsc,
  HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable, HoodieWriteConfig config,
  String compactionInstantTime) throws IOException {
 if (compactionPlan == null || (compactionPlan.getOperations() == null)
   || (compactionPlan.getOperations().isEmpty())) {
  return jsc.emptyRDD();
 }
 HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
 // Compacting is very similar to applying updates to existing file
 HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
 List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(
     CompactionOperation::convertFromAvroRecordInstance).collect(toList());
 log.info("Compactor compacting " + operations + " files");
 return jsc.parallelize(operations, operations.size())
   .map(s -> compact(table, metaClient, config, s, compactionInstantTime))
   .flatMap(writeStatusesItr -> writeStatusesItr.iterator());
}

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext) {
  if (!configuration.containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
    throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_INPUT_LOCATION + " to read the persisted RDD from");
  Spark.create(sparkContext.sc());
  final Optional<String> graphLocation = Constants.getSearchGraphLocation(configuration.getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), SparkContextStorage.open());
  return graphLocation.isPresent() ? JavaPairRDD.fromJavaRDD((JavaRDD) Spark.getRDD(graphLocation.get()).toJavaRDD()) : JavaPairRDD.fromJavaRDD(sparkContext.emptyRDD());
}

代码示例来源:origin: uber/hudi

@Override
public JavaRDD<WriteStatus> compact(JavaSparkContext jsc,
  HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable, HoodieWriteConfig config,
  String compactionInstantTime) throws IOException {
 if (compactionPlan == null || (compactionPlan.getOperations() == null)
   || (compactionPlan.getOperations().isEmpty())) {
  return jsc.emptyRDD();
 }
 HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
 // Compacting is very similar to applying updates to existing file
 HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
 List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(
     CompactionOperation::convertFromAvroRecordInstance).collect(toList());
 log.info("Compactor compacting " + operations + " files");
 return jsc.parallelize(operations, operations.size())
   .map(s -> compact(table, metaClient, config, s, compactionInstantTime))
   .flatMap(List::iterator);
}

代码示例来源:origin: uber/marmaray

public JavaRDD<DI> getRDD(final int filterKey) {
  final long count = getCount(filterKey);
  log.info("#records for :{} = {}", filterKey, count);
  if (count > 0) {
    return getRDD(new FilterFunction<>(filterKey));
  } else {
    return (new JavaSparkContext(inputRDD.rdd().sparkContext())).emptyRDD();
  }
}

代码示例来源:origin: uber/hudi

@Test
public void testTagLocationWithEmptyRDD() throws Exception {
 // We have some records to be tagged (two different partitions)
 JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD();
 // Also create the metadata and config
 HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
 HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
 HoodieTable table = HoodieTable.getHoodieTable(metadata, config, jsc);
 // Let's tag
 HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config);
 try {
  bloomIndex.tagLocation(recordRDD, jsc, table);
 } catch (IllegalArgumentException e) {
  fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices " + "required");
 }
}

相关文章

微信公众号

最新文章

更多