org.apache.spark.api.java.JavaRDD.union()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(6.9k)|赞(0)|评价(0)|浏览(96)

本文整理了Java中org.apache.spark.api.java.JavaRDD.union()方法的一些代码示例,展示了JavaRDD.union()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.union()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:union

JavaRDD.union介绍

暂无

代码示例

代码示例来源:origin: OryxProject/oryx

private Pair<JavaRDD<M>,JavaRDD<M>> splitTrainTest(JavaRDD<M> newData, JavaRDD<M> pastData) {
 Objects.requireNonNull(newData);
 if (testFraction <= 0.0) {
  return new Pair<>(pastData == null ? newData : newData.union(pastData), null);
 }
 if (testFraction >= 1.0) {
  return new Pair<>(pastData, newData);
 }
 if (empty(newData)) {
  return new Pair<>(pastData, null);
 }
 Pair<JavaRDD<M>,JavaRDD<M>> newTrainTest = splitNewDataToTrainTest(newData);
 JavaRDD<M> newTrainData = newTrainTest.getFirst();
 return new Pair<>(pastData == null ? newTrainData : newTrainData.union(pastData),
          newTrainTest.getSecond());
}

代码示例来源:origin: OryxProject/oryx

KMeansPMMLUtils.validatePMMLVsSchema(model, inputSchema);
JavaRDD<Vector> evalData =
  parsedToVectorRDD(trainData.union(testData).map(MLFunctions.PARSE_FN));
List<ClusterInfo> clusterInfoList = KMeansPMMLUtils.read(model);

代码示例来源:origin: databricks/learning-spark

JavaRDD<LabeledPoint> trainingData = positiveExamples.union(negativeExamples);

代码示例来源:origin: OryxProject/oryx

@Override
public void publishAdditionalModelData(JavaSparkContext sparkContext,
                    PMML pmml,
                    JavaRDD<String> newData,
                    JavaRDD<String> pastData,
                    Path modelParentPath,
                    TopicProducer<String, String> modelUpdateTopic) {
 // Send item updates first, before users. That way, user-based endpoints like /recommend
 // may take longer to not return 404, but when they do, the result will be more complete.
 log.info("Sending item / Y data as model updates");
 String yPathString = AppPMMLUtils.getExtensionValue(pmml, "Y");
 JavaPairRDD<String,float[]> productRDD = readFeaturesRDD(sparkContext, new Path(modelParentPath, yPathString));
 String updateBroker = modelUpdateTopic.getUpdateBroker();
 String topic = modelUpdateTopic.getTopic();
 // For now, there is no use in sending known users for each item
 productRDD.foreachPartition(new EnqueueFeatureVecsFn("Y", updateBroker, topic));
 log.info("Sending user / X data as model updates");
 String xPathString = AppPMMLUtils.getExtensionValue(pmml, "X");
 JavaPairRDD<String,float[]> userRDD = readFeaturesRDD(sparkContext, new Path(modelParentPath, xPathString));
 if (noKnownItems) {
  userRDD.foreachPartition(new EnqueueFeatureVecsFn("X", updateBroker, topic));
 } else {
  log.info("Sending known item data with model updates");
  JavaRDD<String[]> allData =
    (pastData == null ? newData : newData.union(pastData)).map(MLFunctions.PARSE_FN);
  JavaPairRDD<String,Collection<String>> knownItems = knownsRDD(allData, true);
  userRDD.join(knownItems).foreachPartition(
    new EnqueueFeatureVecsAndKnownItemsFn("X", updateBroker, topic));
 }
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaRDD<LabeledPoint> trainingData = spamTrainingData.union(nonSpamTrainingData);

代码示例来源:origin: seznam/euphoria

@Override
 @SuppressWarnings("unchecked")
 public JavaRDD<?> translate(Union operator, SparkExecutorContext context) {
  final List<JavaRDD<?>> inputs = context.getInputs(operator);
  if (inputs.size() < 2) {
   throw new IllegalStateException("Union operator needs at least 2 inputs");
  }
  return inputs
    .stream()
    .reduce(
      (l, r) ->
        ((JavaRDD<Object>) l)
          .union((JavaRDD<Object>) r)
          .setName(operator.getName()))
    .orElseThrow(() -> new IllegalArgumentException("Unable to reduce inputs."));
 }
}

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

@SuppressWarnings("unchecked")
@Override
public SparkCollection<T> union(SparkCollection<T> other) {
 return wrap(rdd.union((JavaRDD<T>) other.getUnderlying()));
}

代码示例来源:origin: nerdammer/spash

@Override
public JavaRDD<T> toRDD(JavaSparkContext sc) {
  return this.one.toRDD(sc).union(this.two.toRDD(sc));
}

代码示例来源:origin: com.davidbracewell/mango

@Override
public SparkStream<T> union(@NonNull MStream<T> other) {
 if (other.isReusable() && other.isEmpty()) {
   return this;
 } else if (isReusable() && this.isEmpty()) {
   return new SparkStream<>(other);
 } else if (other instanceof SparkStream) {
   return new SparkStream<>(rdd.union(Cast.<SparkStream<T>>as(other).rdd));
 }
 SparkStream<T> stream = new SparkStream<>(other);
 return new SparkStream<>(rdd.union(stream.rdd));
}

代码示例来源:origin: com.cloudera.oryx/oryx-ml

private Pair<JavaRDD<M>,JavaRDD<M>> splitTrainTest(JavaRDD<M> newData, JavaRDD<M> pastData) {
 Objects.requireNonNull(newData);
 if (testFraction <= 0.0) {
  return new Pair<>(pastData == null ? newData : newData.union(pastData), null);
 }
 if (testFraction >= 1.0) {
  return new Pair<>(pastData, newData);
 }
 if (empty(newData)) {
  return new Pair<>(pastData, null);
 }
 Pair<JavaRDD<M>,JavaRDD<M>> newTrainTest = splitNewDataToTrainTest(newData);
 JavaRDD<M> newTrainData = newTrainTest.getFirst();
 return new Pair<>(pastData == null ? newTrainData : newTrainData.union(pastData),
          newTrainTest.getSecond());
}

代码示例来源:origin: org.apache.spark/spark-streaming_2.11

PairFunction<Integer, Integer, Integer> mapToTuple =
  (PairFunction<Integer, Integer, Integer>) i -> new Tuple2<>(i, i);
return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3);

代码示例来源:origin: org.apache.spark/spark-streaming_2.10

PairFunction<Integer, Integer, Integer> mapToTuple =
  (PairFunction<Integer, Integer, Integer>) i -> new Tuple2<>(i, i);
return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3);

代码示例来源:origin: org.apache.spark/spark-streaming_2.10

PairFunction<Integer, Integer, Integer> mapToTuple =
  (Integer i) -> new Tuple2<>(i, i);
 return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3);
});
JavaTestUtils.attachTestOutputStream(transformed2);

代码示例来源:origin: org.apache.spark/spark-streaming_2.11

PairFunction<Integer, Integer, Integer> mapToTuple =
  (Integer i) -> new Tuple2<>(i, i);
 return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3);
});
JavaTestUtils.attachTestOutputStream(transformed2);

代码示例来源:origin: org.qcri.rheem/rheem-spark

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    SparkExecutor sparkExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert inputs.length == this.getNumInputs();
  assert outputs.length == this.getNumOutputs();
  RddChannel.Instance input0 = (RddChannel.Instance) inputs[0];
  RddChannel.Instance input1 = (RddChannel.Instance) inputs[1];
  RddChannel.Instance output = (RddChannel.Instance) outputs[0];
  final JavaRDD<Type> inputRdd0 = input0.provideRdd();
  final JavaRDD<Type> inputRdd1 = input1.provideRdd();
  final JavaRDD<Type> outputRdd = inputRdd0.union(inputRdd1);
  this.name(outputRdd);
  output.accept(outputRdd, sparkExecutor);
  return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext);
}

相关文章

微信公众号

最新文章

更多