org.apache.spark.api.java.JavaRDD.setName()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(8.0k)|赞(0)|评价(0)|浏览(83)

本文整理了Java中org.apache.spark.api.java.JavaRDD.setName()方法的一些代码示例,展示了JavaRDD.setName()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.setName()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:setName

JavaRDD.setName介绍

暂无

代码示例

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void testGetPersistentRDDs() {
 java.util.Map<Integer, JavaRDD<?>> cachedRddsMap = sc.getPersistentRDDs();
 assertTrue(cachedRddsMap.isEmpty());
 JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("a", "b")).setName("RDD1").cache();
 JavaRDD<String> rdd2 = sc.parallelize(Arrays.asList("c", "d")).setName("RDD2").cache();
 cachedRddsMap = sc.getPersistentRDDs();
 assertEquals(2, cachedRddsMap.size());
 assertEquals("RDD1", cachedRddsMap.get(0).name());
 assertEquals("RDD2", cachedRddsMap.get(1).name());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void testGetPersistentRDDs() {
 java.util.Map<Integer, JavaRDD<?>> cachedRddsMap = sc.getPersistentRDDs();
 assertTrue(cachedRddsMap.isEmpty());
 JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("a", "b")).setName("RDD1").cache();
 JavaRDD<String> rdd2 = sc.parallelize(Arrays.asList("c", "d")).setName("RDD2").cache();
 cachedRddsMap = sc.getPersistentRDDs();
 assertEquals(2, cachedRddsMap.size());
 assertEquals("RDD1", cachedRddsMap.get(0).name());
 assertEquals("RDD2", cachedRddsMap.get(1).name());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void testGetPersistentRDDs() {
 java.util.Map<Integer, JavaRDD<?>> cachedRddsMap = sc.getPersistentRDDs();
 assertTrue(cachedRddsMap.isEmpty());
 JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("a", "b")).setName("RDD1").cache();
 JavaRDD<String> rdd2 = sc.parallelize(Arrays.asList("c", "d")).setName("RDD2").cache();
 cachedRddsMap = sc.getPersistentRDDs();
 assertEquals(2, cachedRddsMap.size());
 assertEquals("RDD1", cachedRddsMap.get(0).name());
 assertEquals("RDD2", cachedRddsMap.get(1).name());
}

代码示例来源:origin: apache/tinkerpop

@Override
public boolean cp(final String sourceLocation, final String targetLocation) {
  final List<String> rdds = Spark.getRDDs().stream().filter(r -> r.name().startsWith(sourceLocation)).map(RDD::name).collect(Collectors.toList());
  if (rdds.size() == 0)
    return false;
  for (final String rdd : rdds) {
    Spark.getRDD(rdd).toJavaRDD().filter(a -> true).setName(rdd.equals(sourceLocation) ? targetLocation : rdd.replace(sourceLocation, targetLocation)).cache().count();
    // TODO: this should use the original storage level
  }
  return true;
}

代码示例来源:origin: org.apache.beam/beam-runners-spark

@Override
 public void setName(String name) {
  rdd.setName(name);
 }
}

代码示例来源:origin: org.qcri.rheem/rheem-spark

/**
 * Utility method to name an RDD according to this instance's name.
 *
 * @param rdd that should be renamed
 * @see #getName()
 */
default void name(JavaRDD<?> rdd) {
  if (this.getName() != null) {
    rdd.setName(this.getName());
  } else {
    rdd.setName(this.toString());
  }
}

代码示例来源:origin: seznam/euphoria

@Override
 @SuppressWarnings("unchecked")
 public JavaRDD<?> translate(Union operator, SparkExecutorContext context) {
  final List<JavaRDD<?>> inputs = context.getInputs(operator);
  if (inputs.size() < 2) {
   throw new IllegalStateException("Union operator needs at least 2 inputs");
  }
  return inputs
    .stream()
    .reduce(
      (l, r) ->
        ((JavaRDD<Object>) l)
          .union((JavaRDD<Object>) r)
          .setName(operator.getName()))
    .orElseThrow(() -> new IllegalArgumentException("Unable to reduce inputs."));
 }
}

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

@Override
public boolean cp(final String sourceLocation, final String targetLocation) {
  final List<String> rdds = Spark.getRDDs().stream().filter(r -> r.name().startsWith(sourceLocation)).map(RDD::name).collect(Collectors.toList());
  if (rdds.size() == 0)
    return false;
  for (final String rdd : rdds) {
    Spark.getRDD(rdd).toJavaRDD().filter(a -> true).setName(rdd.equals(sourceLocation) ? targetLocation : rdd.replace(sourceLocation, targetLocation)).cache().count();
    // TODO: this should use the original storage level
  }
  return true;
}

代码示例来源:origin: uber/marmaray

public final void execute() {
  this.forkFunction.registerAccumulators(this.inputRDD.rdd().sparkContext());
  // Converts JavaRDD<T> -> JavaRDD<List<Integer>, T>
  JavaRDD<ForkData<DI>> forkedData = this.inputRDD.flatMap(this.forkFunction)
    .persist(this.persistLevel);
  final String jobName = SparkJobTracker.getJobName(this.inputRDD.rdd().sparkContext());
  forkedData.setName(String.format("%s-%s", jobName, forkedData.id()));
  // deliberately calling count so that DAG gets executed.
  final long processedRecords = forkedData.count();
  final Optional<RDDInfo> rddInfo = SparkUtil.getRddInfo(forkedData.context(), forkedData.id());
  log.info("#processed records :{} name:{}", processedRecords, forkedData.name());
  if (rddInfo.isPresent()) {
    final long size = rddInfo.get().diskSize() + rddInfo.get().memSize();
    log.info("rddInfo -> name:{} partitions:{} size:{}", forkedData.name(), rddInfo.get().numPartitions(),
      size);
  }
  this.groupRDD = Optional.of(forkedData);
}

代码示例来源:origin: seznam/euphoria

out.setName(op.getName() + "-persisted").persist(storageLevel);

代码示例来源:origin: seznam/euphoria

@Override
 @SuppressWarnings("unchecked")
 public JavaRDD<?> translate(FlatMap operator, SparkExecutorContext context) {

  final JavaRDD<?> input = context.getSingleInput(operator);
  final UnaryFunctor<?, ?> mapper = operator.getFunctor();
  final ExtractEventTime<?> evtTimeFn = operator.getEventTimeExtractor();

  LazyAccumulatorProvider accumulators =
    new LazyAccumulatorProvider(context.getAccumulatorFactory(), context.getSettings());
  if (evtTimeFn != null) {
   return input
     .flatMap(new EventTimeAssigningUnaryFunctor(mapper, evtTimeFn, accumulators))
     .setName(operator.getName() + "::event-time-and-apply-udf");
  } else {
   return input
     .flatMap(new UnaryFunctorWrapper(mapper, accumulators))
     .setName(operator.getName() + "::apply-udf");
  }
 }
}

代码示例来源:origin: seznam/euphoria

@Override
 public JavaRDD<?> translate(FlowUnfolder.InputOperator operator, SparkExecutorContext context) {

  // get original datasource from operator
  DataSource<?> ds = operator.output().getSource();

  try {
   final long desiredSplitSize = context.getSettings()
     .getLong(DESIRED_SPLIT_SIZE, DEFAULT_DESIRED_SPLIT_SIZE);
   final Configuration conf = DataSourceInputFormat.configure(
     new Configuration(), ds, desiredSplitSize);

   @SuppressWarnings("unchecked")
   JavaPairRDD<Object, Object> pairs =
     context
       .getExecutionEnvironment()
       .newAPIHadoopRDD(conf, DataSourceInputFormat.class, Object.class, Object.class)
       .setName(operator.getName() + "::read");

   // map values to WindowedElement
   return pairs
     .values()
     .map(v -> new SparkElement<>(GlobalWindowing.Window.get(), 0L,  v))
     .setName(operator.getName() + "::windowed-element");

  } catch (IOException e) {
   throw new RuntimeException(e);
  }
 }
}

代码示例来源:origin: seznam/euphoria

new LazyAccumulatorProvider(
      context.getAccumulatorFactory(), context.getSettings())))
.setName(operator.getName() + "::apply-udf-and-wrap-in-spark-element");

代码示例来源:origin: seznam/euphoria

.mapPartitions(
  iterator -> new JoinIterator<>(new BatchJoinIterator<>(iterator), operator.getType()))
.setName(operator.getName() + "::create-iterator")
.flatMap(
  new FlatMapFunctionWithCollector<>(
    new LazyAccumulatorProvider(
      context.getAccumulatorFactory(), context.getSettings())))
.setName(operator.getName() + "::apply-udf-and-wrap-in-spark-element");

代码示例来源:origin: seznam/euphoria

new LazyAccumulatorProvider(
      context.getAccumulatorFactory(), context.getSettings())))
.setName(operator.getName() + "::apply-udf");

代码示例来源:origin: seznam/euphoria

return new SparkElement<>(kw.window(), kw.timestamp(), Pair.of(kw.key(), el));
   })
 .setName(operator.getName() + "::wrap-in-spark-element");
   return new SparkElement<>(kw.window(), kw.timestamp(), Pair.of(kw.key(), el));
  })
.setName(operator.getName() + "::wrap-in-spark-element");

相关文章

微信公众号

最新文章

更多