org.apache.spark.api.java.JavaRDD.context()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(8.8k)|赞(0)|评价(0)|浏览(97)

本文整理了Java中org.apache.spark.api.java.JavaRDD.context()方法的一些代码示例,展示了JavaRDD.context()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.context()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:context

JavaRDD.context介绍

暂无

代码示例

代码示例来源:origin: com.davidbracewell/mango

/**
* Gets the streaming context of a given spark stream
*
* @param stream the stream whose context we want
* @return the spark streaming context
*/
public static SparkStreamingContext contextOf(@NonNull SparkStream<?> stream) {
 return contextOf(stream.getRDD().context());
}

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

@Override
public JavaRDD<U> transform(SparkExecutionPluginContext context, JavaRDD<T> input) throws Exception {
 lazyInit(JavaSparkContext.fromSparkContext(input.context()));
 return delegate.transform(context, input);
}

代码示例来源:origin: com.davidbracewell/mango

@Override
public <U> SparkPairStream<T, U> zip(@NonNull MStream<U> other) {
 if (other instanceof SparkStream) {
   return new SparkPairStream<>(rdd.zip(Cast.<SparkStream<U>>as(other).rdd));
 }
 JavaSparkContext jsc = new JavaSparkContext(rdd.context());
 return new SparkPairStream<>(rdd.zip(jsc.parallelize(other.collect(), rdd.partitions().size())));
}

代码示例来源:origin: uber/marmaray

public final void execute() {
  this.forkFunction.registerAccumulators(this.inputRDD.rdd().sparkContext());
  // Converts JavaRDD<T> -> JavaRDD<List<Integer>, T>
  JavaRDD<ForkData<DI>> forkedData = this.inputRDD.flatMap(this.forkFunction)
    .persist(this.persistLevel);
  final String jobName = SparkJobTracker.getJobName(this.inputRDD.rdd().sparkContext());
  forkedData.setName(String.format("%s-%s", jobName, forkedData.id()));
  // deliberately calling count so that DAG gets executed.
  final long processedRecords = forkedData.count();
  final Optional<RDDInfo> rddInfo = SparkUtil.getRddInfo(forkedData.context(), forkedData.id());
  log.info("#processed records :{} name:{}", processedRecords, forkedData.name());
  if (rddInfo.isPresent()) {
    final long size = rddInfo.get().diskSize() + rddInfo.get().memSize();
    log.info("rddInfo -> name:{} partitions:{} size:{}", forkedData.name(), rddInfo.get().numPartitions(),
      size);
  }
  this.groupRDD = Optional.of(forkedData);
}

代码示例来源:origin: org.qcri.rheem/rheem-spark

final SparkContext sparkContext = inputRdd.context();

代码示例来源:origin: org.qcri.rheem/rheem-spark

final SparkContext sparkContext = inputRdd.context();

代码示例来源:origin: org.datavec/datavec-spark

/**
 * Creates a data frame from a collection of writables
 * rdd given a schema
 *
 * @param schema the schema to use
 * @param data   the data to convert
 * @return the dataframe object
 */
public static DataRowsFacade toDataFrame(Schema schema, JavaRDD<List<Writable>> data) {
  JavaSparkContext sc = new JavaSparkContext(data.context());
  SQLContext sqlContext = new SQLContext(sc);
  JavaRDD<Row> rows = data.map(new ToRow(schema));
  return dataRows(sqlContext.createDataFrame(rows, fromSchema(schema)));
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

/**
 * Creates a data frame from a collection of writables
 * rdd given a schema
 *
 * @param schema the schema to use
 * @param data   the data to convert
 * @return the dataframe object
 */
public static DataRowsFacade toDataFrame(Schema schema, JavaRDD<List<Writable>> data) {
  JavaSparkContext sc = new JavaSparkContext(data.context());
  SQLContext sqlContext = new SQLContext(sc);
  JavaRDD<Row> rows = data.map(new ToRow(schema));
  return dataRows(sqlContext.createDataFrame(rows, fromSchema(schema)));
}

代码示例来源:origin: uber/marmaray

public final RDDWrapper<OD> map(@NonNull final JavaRDD<ID> data) {
  final ForkOperator<IData> converter =
    new ForkOperator<>(data.map(r -> RawDataHelper.getRawData(r)),
      new DataConversionFunction(), this.conf);
  converter.execute();
  // Write error records.
  ErrorTableUtil.writeErrorRecordsToErrorTable(data.context(), this.conf, Optional.absent(),
    new RDDWrapper<>(converter.getRDD(ERROR_RECORD).map(r -> (ErrorData) r), converter.getCount(ERROR_RECORD)),
    errorExtractor);
  return new RDDWrapper<>(converter.getRDD(VALID_RECORD).map(r -> ((ValidData<OD>) r).getData()),
      converter.getCount(VALID_RECORD));
}

代码示例来源:origin: org.datavec/datavec-spark

/**
 * Convert the given sequence data set to a DataFrame.<br>
 * <b>Note</b>: The resulting DataFrame has two additional columns added to it:<br>
 * - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
 * - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
 * of this record in the original time series.<br>
 * These two columns are required if the data is to be converted back into a sequence at a later point, for example
 * using {@link #toRecordsSequence(DataRowsFacade)}
 *
 * @param schema Schema for the data
 * @param data   Sequence data to convert to a DataFrame
 * @return The dataframe object
 */
public static DataRowsFacade toDataFrameSequence(Schema schema, JavaRDD<List<List<Writable>>> data) {
  JavaSparkContext sc = new JavaSparkContext(data.context());
  SQLContext sqlContext = new SQLContext(sc);
  JavaRDD<Row> rows = data.flatMap(new SequenceToRows(schema));
  return dataRows(sqlContext.createDataFrame(rows, fromSchemaSequence(schema)));
}

代码示例来源:origin: jgperrin/net.jgp.labs.spark

@Override
public void call(JavaRDD<String> rdd) throws Exception {
 JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
  private static final long serialVersionUID = 5167089361335095997L;
  @Override
  public Row call(String msg) {
   Row row = RowFactory.create(msg);
   return row;
  }
 });
 // Create Schema
 StructType schema = DataTypes.createStructType(
   new StructField[] { DataTypes.createStructField("Message",
     DataTypes.StringType, true) });
 // Get Spark 2.0 session
 SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context()
   .getConf());
 Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
 msgDataFrame.show();
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

/**
 * Convert the given sequence data set to a DataFrame.<br>
 * <b>Note</b>: The resulting DataFrame has two additional columns added to it:<br>
 * - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
 * - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
 * of this record in the original time series.<br>
 * These two columns are required if the data is to be converted back into a sequence at a later point, for example
 * using {@link #toRecordsSequence(DataRowsFacade)}
 *
 * @param schema Schema for the data
 * @param data   Sequence data to convert to a DataFrame
 * @return The dataframe object
 */
public static DataRowsFacade toDataFrameSequence(Schema schema, JavaRDD<List<List<Writable>>> data) {
  JavaSparkContext sc = new JavaSparkContext(data.context());
  SQLContext sqlContext = new SQLContext(sc);
  JavaRDD<Row> rows = data.flatMap(new SequenceToRows(schema));
  return dataRows(sqlContext.createDataFrame(rows, fromSchemaSequence(schema)));
}

代码示例来源:origin: jgperrin/net.jgp.labs.spark

@Override
 public void call(JavaRDD<String> rdd) {
  JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
   private static final long serialVersionUID = 5167089361335095997L;
   @Override
   public Row call(String msg) {
    Row row = RowFactory.create(msg);
    return row;
   }
  });
  // Create Schema
  StructType schema = DataTypes.createStructType(
    new StructField[] { DataTypes.createStructField("Message",
      DataTypes.StringType, true) });
  // Get Spark 2.0 session
  SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context()
    .getConf());
  Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
  msgDataFrame.show();
 }
});

代码示例来源:origin: co.cask.cdap/hydrator-spark-core2

@Override
 public JavaRDD<U> call(JavaRDD<T> data, Time batchTime) throws Exception {
  SparkExecutionPluginContext sparkPluginContext =
   new SparkStreamingExecutionContext(sec, JavaSparkContext.fromSparkContext(data.context()),
                     batchTime.milliseconds(), stageSpec);
  String stageName = stageSpec.getName();
  data = data.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null));
  return compute.transform(sparkPluginContext, data)
   .map(new CountingFunction<U>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName)));
 }
}

代码示例来源:origin: spirom/learning-spark-with-java

final LongAccumulator recordCounter = RecordCounter.getInstance(new JavaSparkContext(rdd.context()));
long records = rdd.count();
recordCounter.add(records);

代码示例来源:origin: org.datasyslab/geospark

verifyPartitioningMatch(leftRDD, rightRDD);
SparkContext sparkContext = leftRDD.spatialPartitionedRDD.context();
GeoSparkMetric buildCount = GeoSparkMetrics.createMetric(sparkContext, "buildCount");
GeoSparkMetric streamCount = GeoSparkMetrics.createMetric(sparkContext, "streamCount");

代码示例来源:origin: DataSystemsLab/GeoSpark

verifyPartitioningMatch(leftRDD, rightRDD);
SparkContext sparkContext = leftRDD.spatialPartitionedRDD.context();
GeoSparkMetric buildCount = GeoSparkMetrics.createMetric(sparkContext, "buildCount");
GeoSparkMetric streamCount = GeoSparkMetrics.createMetric(sparkContext, "streamCount");

相关文章

微信公众号

最新文章

更多