本文整理了Java中org.apache.spark.api.java.JavaRDD.union()
方法的一些代码示例,展示了JavaRDD.union()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.union()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:union
暂无
代码示例来源:origin: OryxProject/oryx
private Pair<JavaRDD<M>,JavaRDD<M>> splitTrainTest(JavaRDD<M> newData, JavaRDD<M> pastData) {
Objects.requireNonNull(newData);
if (testFraction <= 0.0) {
return new Pair<>(pastData == null ? newData : newData.union(pastData), null);
}
if (testFraction >= 1.0) {
return new Pair<>(pastData, newData);
}
if (empty(newData)) {
return new Pair<>(pastData, null);
}
Pair<JavaRDD<M>,JavaRDD<M>> newTrainTest = splitNewDataToTrainTest(newData);
JavaRDD<M> newTrainData = newTrainTest.getFirst();
return new Pair<>(pastData == null ? newTrainData : newTrainData.union(pastData),
newTrainTest.getSecond());
}
代码示例来源:origin: OryxProject/oryx
KMeansPMMLUtils.validatePMMLVsSchema(model, inputSchema);
JavaRDD<Vector> evalData =
parsedToVectorRDD(trainData.union(testData).map(MLFunctions.PARSE_FN));
List<ClusterInfo> clusterInfoList = KMeansPMMLUtils.read(model);
代码示例来源:origin: databricks/learning-spark
JavaRDD<LabeledPoint> trainingData = positiveExamples.union(negativeExamples);
代码示例来源:origin: OryxProject/oryx
@Override
public void publishAdditionalModelData(JavaSparkContext sparkContext,
PMML pmml,
JavaRDD<String> newData,
JavaRDD<String> pastData,
Path modelParentPath,
TopicProducer<String, String> modelUpdateTopic) {
// Send item updates first, before users. That way, user-based endpoints like /recommend
// may take longer to not return 404, but when they do, the result will be more complete.
log.info("Sending item / Y data as model updates");
String yPathString = AppPMMLUtils.getExtensionValue(pmml, "Y");
JavaPairRDD<String,float[]> productRDD = readFeaturesRDD(sparkContext, new Path(modelParentPath, yPathString));
String updateBroker = modelUpdateTopic.getUpdateBroker();
String topic = modelUpdateTopic.getTopic();
// For now, there is no use in sending known users for each item
productRDD.foreachPartition(new EnqueueFeatureVecsFn("Y", updateBroker, topic));
log.info("Sending user / X data as model updates");
String xPathString = AppPMMLUtils.getExtensionValue(pmml, "X");
JavaPairRDD<String,float[]> userRDD = readFeaturesRDD(sparkContext, new Path(modelParentPath, xPathString));
if (noKnownItems) {
userRDD.foreachPartition(new EnqueueFeatureVecsFn("X", updateBroker, topic));
} else {
log.info("Sending known item data with model updates");
JavaRDD<String[]> allData =
(pastData == null ? newData : newData.union(pastData)).map(MLFunctions.PARSE_FN);
JavaPairRDD<String,Collection<String>> knownItems = knownsRDD(allData, true);
userRDD.join(knownItems).foreachPartition(
new EnqueueFeatureVecsAndKnownItemsFn("X", updateBroker, topic));
}
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
JavaRDD<LabeledPoint> trainingData = spamTrainingData.union(nonSpamTrainingData);
代码示例来源:origin: seznam/euphoria
@Override
@SuppressWarnings("unchecked")
public JavaRDD<?> translate(Union operator, SparkExecutorContext context) {
final List<JavaRDD<?>> inputs = context.getInputs(operator);
if (inputs.size() < 2) {
throw new IllegalStateException("Union operator needs at least 2 inputs");
}
return inputs
.stream()
.reduce(
(l, r) ->
((JavaRDD<Object>) l)
.union((JavaRDD<Object>) r)
.setName(operator.getName()))
.orElseThrow(() -> new IllegalArgumentException("Unable to reduce inputs."));
}
}
代码示例来源:origin: co.cask.cdap/hydrator-spark-core2
@SuppressWarnings("unchecked")
@Override
public SparkCollection<T> union(SparkCollection<T> other) {
return wrap(rdd.union((JavaRDD<T>) other.getUnderlying()));
}
代码示例来源:origin: nerdammer/spash
@Override
public JavaRDD<T> toRDD(JavaSparkContext sc) {
return this.one.toRDD(sc).union(this.two.toRDD(sc));
}
代码示例来源:origin: com.davidbracewell/mango
@Override
public SparkStream<T> union(@NonNull MStream<T> other) {
if (other.isReusable() && other.isEmpty()) {
return this;
} else if (isReusable() && this.isEmpty()) {
return new SparkStream<>(other);
} else if (other instanceof SparkStream) {
return new SparkStream<>(rdd.union(Cast.<SparkStream<T>>as(other).rdd));
}
SparkStream<T> stream = new SparkStream<>(other);
return new SparkStream<>(rdd.union(stream.rdd));
}
代码示例来源:origin: com.cloudera.oryx/oryx-ml
private Pair<JavaRDD<M>,JavaRDD<M>> splitTrainTest(JavaRDD<M> newData, JavaRDD<M> pastData) {
Objects.requireNonNull(newData);
if (testFraction <= 0.0) {
return new Pair<>(pastData == null ? newData : newData.union(pastData), null);
}
if (testFraction >= 1.0) {
return new Pair<>(pastData, newData);
}
if (empty(newData)) {
return new Pair<>(pastData, null);
}
Pair<JavaRDD<M>,JavaRDD<M>> newTrainTest = splitNewDataToTrainTest(newData);
JavaRDD<M> newTrainData = newTrainTest.getFirst();
return new Pair<>(pastData == null ? newTrainData : newTrainData.union(pastData),
newTrainTest.getSecond());
}
代码示例来源:origin: org.apache.spark/spark-streaming_2.11
PairFunction<Integer, Integer, Integer> mapToTuple =
(PairFunction<Integer, Integer, Integer>) i -> new Tuple2<>(i, i);
return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3);
代码示例来源:origin: org.apache.spark/spark-streaming_2.10
PairFunction<Integer, Integer, Integer> mapToTuple =
(PairFunction<Integer, Integer, Integer>) i -> new Tuple2<>(i, i);
return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3);
代码示例来源:origin: org.apache.spark/spark-streaming_2.10
PairFunction<Integer, Integer, Integer> mapToTuple =
(Integer i) -> new Tuple2<>(i, i);
return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3);
});
JavaTestUtils.attachTestOutputStream(transformed2);
代码示例来源:origin: org.apache.spark/spark-streaming_2.11
PairFunction<Integer, Integer, Integer> mapToTuple =
(Integer i) -> new Tuple2<>(i, i);
return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3);
});
JavaTestUtils.attachTestOutputStream(transformed2);
代码示例来源:origin: org.qcri.rheem/rheem-spark
@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
ChannelInstance[] inputs,
ChannelInstance[] outputs,
SparkExecutor sparkExecutor,
OptimizationContext.OperatorContext operatorContext) {
assert inputs.length == this.getNumInputs();
assert outputs.length == this.getNumOutputs();
RddChannel.Instance input0 = (RddChannel.Instance) inputs[0];
RddChannel.Instance input1 = (RddChannel.Instance) inputs[1];
RddChannel.Instance output = (RddChannel.Instance) outputs[0];
final JavaRDD<Type> inputRdd0 = input0.provideRdd();
final JavaRDD<Type> inputRdd1 = input1.provideRdd();
final JavaRDD<Type> outputRdd = inputRdd0.union(inputRdd1);
this.name(outputRdd);
output.accept(outputRdd, sparkExecutor);
return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext);
}
内容来源于网络,如有侵权,请联系作者删除!