本文整理了Java中org.apache.spark.api.java.JavaRDD.setName()
方法的一些代码示例,展示了JavaRDD.setName()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.setName()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:setName
暂无
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void testGetPersistentRDDs() {
java.util.Map<Integer, JavaRDD<?>> cachedRddsMap = sc.getPersistentRDDs();
assertTrue(cachedRddsMap.isEmpty());
JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("a", "b")).setName("RDD1").cache();
JavaRDD<String> rdd2 = sc.parallelize(Arrays.asList("c", "d")).setName("RDD2").cache();
cachedRddsMap = sc.getPersistentRDDs();
assertEquals(2, cachedRddsMap.size());
assertEquals("RDD1", cachedRddsMap.get(0).name());
assertEquals("RDD2", cachedRddsMap.get(1).name());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void testGetPersistentRDDs() {
java.util.Map<Integer, JavaRDD<?>> cachedRddsMap = sc.getPersistentRDDs();
assertTrue(cachedRddsMap.isEmpty());
JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("a", "b")).setName("RDD1").cache();
JavaRDD<String> rdd2 = sc.parallelize(Arrays.asList("c", "d")).setName("RDD2").cache();
cachedRddsMap = sc.getPersistentRDDs();
assertEquals(2, cachedRddsMap.size());
assertEquals("RDD1", cachedRddsMap.get(0).name());
assertEquals("RDD2", cachedRddsMap.get(1).name());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void testGetPersistentRDDs() {
java.util.Map<Integer, JavaRDD<?>> cachedRddsMap = sc.getPersistentRDDs();
assertTrue(cachedRddsMap.isEmpty());
JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("a", "b")).setName("RDD1").cache();
JavaRDD<String> rdd2 = sc.parallelize(Arrays.asList("c", "d")).setName("RDD2").cache();
cachedRddsMap = sc.getPersistentRDDs();
assertEquals(2, cachedRddsMap.size());
assertEquals("RDD1", cachedRddsMap.get(0).name());
assertEquals("RDD2", cachedRddsMap.get(1).name());
}
代码示例来源:origin: apache/tinkerpop
@Override
public boolean cp(final String sourceLocation, final String targetLocation) {
final List<String> rdds = Spark.getRDDs().stream().filter(r -> r.name().startsWith(sourceLocation)).map(RDD::name).collect(Collectors.toList());
if (rdds.size() == 0)
return false;
for (final String rdd : rdds) {
Spark.getRDD(rdd).toJavaRDD().filter(a -> true).setName(rdd.equals(sourceLocation) ? targetLocation : rdd.replace(sourceLocation, targetLocation)).cache().count();
// TODO: this should use the original storage level
}
return true;
}
代码示例来源:origin: org.apache.beam/beam-runners-spark
@Override
public void setName(String name) {
rdd.setName(name);
}
}
代码示例来源:origin: org.qcri.rheem/rheem-spark
/**
* Utility method to name an RDD according to this instance's name.
*
* @param rdd that should be renamed
* @see #getName()
*/
default void name(JavaRDD<?> rdd) {
if (this.getName() != null) {
rdd.setName(this.getName());
} else {
rdd.setName(this.toString());
}
}
代码示例来源:origin: seznam/euphoria
@Override
@SuppressWarnings("unchecked")
public JavaRDD<?> translate(Union operator, SparkExecutorContext context) {
final List<JavaRDD<?>> inputs = context.getInputs(operator);
if (inputs.size() < 2) {
throw new IllegalStateException("Union operator needs at least 2 inputs");
}
return inputs
.stream()
.reduce(
(l, r) ->
((JavaRDD<Object>) l)
.union((JavaRDD<Object>) r)
.setName(operator.getName()))
.orElseThrow(() -> new IllegalArgumentException("Unable to reduce inputs."));
}
}
代码示例来源:origin: org.apache.tinkerpop/spark-gremlin
@Override
public boolean cp(final String sourceLocation, final String targetLocation) {
final List<String> rdds = Spark.getRDDs().stream().filter(r -> r.name().startsWith(sourceLocation)).map(RDD::name).collect(Collectors.toList());
if (rdds.size() == 0)
return false;
for (final String rdd : rdds) {
Spark.getRDD(rdd).toJavaRDD().filter(a -> true).setName(rdd.equals(sourceLocation) ? targetLocation : rdd.replace(sourceLocation, targetLocation)).cache().count();
// TODO: this should use the original storage level
}
return true;
}
代码示例来源:origin: uber/marmaray
public final void execute() {
this.forkFunction.registerAccumulators(this.inputRDD.rdd().sparkContext());
// Converts JavaRDD<T> -> JavaRDD<List<Integer>, T>
JavaRDD<ForkData<DI>> forkedData = this.inputRDD.flatMap(this.forkFunction)
.persist(this.persistLevel);
final String jobName = SparkJobTracker.getJobName(this.inputRDD.rdd().sparkContext());
forkedData.setName(String.format("%s-%s", jobName, forkedData.id()));
// deliberately calling count so that DAG gets executed.
final long processedRecords = forkedData.count();
final Optional<RDDInfo> rddInfo = SparkUtil.getRddInfo(forkedData.context(), forkedData.id());
log.info("#processed records :{} name:{}", processedRecords, forkedData.name());
if (rddInfo.isPresent()) {
final long size = rddInfo.get().diskSize() + rddInfo.get().memSize();
log.info("rddInfo -> name:{} partitions:{} size:{}", forkedData.name(), rddInfo.get().numPartitions(),
size);
}
this.groupRDD = Optional.of(forkedData);
}
代码示例来源:origin: seznam/euphoria
out.setName(op.getName() + "-persisted").persist(storageLevel);
代码示例来源:origin: seznam/euphoria
@Override
@SuppressWarnings("unchecked")
public JavaRDD<?> translate(FlatMap operator, SparkExecutorContext context) {
final JavaRDD<?> input = context.getSingleInput(operator);
final UnaryFunctor<?, ?> mapper = operator.getFunctor();
final ExtractEventTime<?> evtTimeFn = operator.getEventTimeExtractor();
LazyAccumulatorProvider accumulators =
new LazyAccumulatorProvider(context.getAccumulatorFactory(), context.getSettings());
if (evtTimeFn != null) {
return input
.flatMap(new EventTimeAssigningUnaryFunctor(mapper, evtTimeFn, accumulators))
.setName(operator.getName() + "::event-time-and-apply-udf");
} else {
return input
.flatMap(new UnaryFunctorWrapper(mapper, accumulators))
.setName(operator.getName() + "::apply-udf");
}
}
}
代码示例来源:origin: seznam/euphoria
@Override
public JavaRDD<?> translate(FlowUnfolder.InputOperator operator, SparkExecutorContext context) {
// get original datasource from operator
DataSource<?> ds = operator.output().getSource();
try {
final long desiredSplitSize = context.getSettings()
.getLong(DESIRED_SPLIT_SIZE, DEFAULT_DESIRED_SPLIT_SIZE);
final Configuration conf = DataSourceInputFormat.configure(
new Configuration(), ds, desiredSplitSize);
@SuppressWarnings("unchecked")
JavaPairRDD<Object, Object> pairs =
context
.getExecutionEnvironment()
.newAPIHadoopRDD(conf, DataSourceInputFormat.class, Object.class, Object.class)
.setName(operator.getName() + "::read");
// map values to WindowedElement
return pairs
.values()
.map(v -> new SparkElement<>(GlobalWindowing.Window.get(), 0L, v))
.setName(operator.getName() + "::windowed-element");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
代码示例来源:origin: seznam/euphoria
new LazyAccumulatorProvider(
context.getAccumulatorFactory(), context.getSettings())))
.setName(operator.getName() + "::apply-udf-and-wrap-in-spark-element");
代码示例来源:origin: seznam/euphoria
.mapPartitions(
iterator -> new JoinIterator<>(new BatchJoinIterator<>(iterator), operator.getType()))
.setName(operator.getName() + "::create-iterator")
.flatMap(
new FlatMapFunctionWithCollector<>(
new LazyAccumulatorProvider(
context.getAccumulatorFactory(), context.getSettings())))
.setName(operator.getName() + "::apply-udf-and-wrap-in-spark-element");
代码示例来源:origin: seznam/euphoria
new LazyAccumulatorProvider(
context.getAccumulatorFactory(), context.getSettings())))
.setName(operator.getName() + "::apply-udf");
代码示例来源:origin: seznam/euphoria
return new SparkElement<>(kw.window(), kw.timestamp(), Pair.of(kw.key(), el));
})
.setName(operator.getName() + "::wrap-in-spark-element");
return new SparkElement<>(kw.window(), kw.timestamp(), Pair.of(kw.key(), el));
})
.setName(operator.getName() + "::wrap-in-spark-element");
内容来源于网络,如有侵权,请联系作者删除!