本文整理了Java中org.apache.spark.api.java.JavaRDD.context()
方法的一些代码示例,展示了JavaRDD.context()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.context()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:context
暂无
代码示例来源:origin: com.davidbracewell/mango
/**
* Gets the streaming context of a given spark stream
*
* @param stream the stream whose context we want
* @return the spark streaming context
*/
public static SparkStreamingContext contextOf(@NonNull SparkStream<?> stream) {
return contextOf(stream.getRDD().context());
}
代码示例来源:origin: co.cask.cdap/hydrator-spark-core2
@Override
public JavaRDD<U> transform(SparkExecutionPluginContext context, JavaRDD<T> input) throws Exception {
lazyInit(JavaSparkContext.fromSparkContext(input.context()));
return delegate.transform(context, input);
}
代码示例来源:origin: com.davidbracewell/mango
@Override
public <U> SparkPairStream<T, U> zip(@NonNull MStream<U> other) {
if (other instanceof SparkStream) {
return new SparkPairStream<>(rdd.zip(Cast.<SparkStream<U>>as(other).rdd));
}
JavaSparkContext jsc = new JavaSparkContext(rdd.context());
return new SparkPairStream<>(rdd.zip(jsc.parallelize(other.collect(), rdd.partitions().size())));
}
代码示例来源:origin: uber/marmaray
public final void execute() {
this.forkFunction.registerAccumulators(this.inputRDD.rdd().sparkContext());
// Converts JavaRDD<T> -> JavaRDD<List<Integer>, T>
JavaRDD<ForkData<DI>> forkedData = this.inputRDD.flatMap(this.forkFunction)
.persist(this.persistLevel);
final String jobName = SparkJobTracker.getJobName(this.inputRDD.rdd().sparkContext());
forkedData.setName(String.format("%s-%s", jobName, forkedData.id()));
// deliberately calling count so that DAG gets executed.
final long processedRecords = forkedData.count();
final Optional<RDDInfo> rddInfo = SparkUtil.getRddInfo(forkedData.context(), forkedData.id());
log.info("#processed records :{} name:{}", processedRecords, forkedData.name());
if (rddInfo.isPresent()) {
final long size = rddInfo.get().diskSize() + rddInfo.get().memSize();
log.info("rddInfo -> name:{} partitions:{} size:{}", forkedData.name(), rddInfo.get().numPartitions(),
size);
}
this.groupRDD = Optional.of(forkedData);
}
代码示例来源:origin: org.qcri.rheem/rheem-spark
final SparkContext sparkContext = inputRdd.context();
代码示例来源:origin: org.qcri.rheem/rheem-spark
final SparkContext sparkContext = inputRdd.context();
代码示例来源:origin: org.datavec/datavec-spark
/**
* Creates a data frame from a collection of writables
* rdd given a schema
*
* @param schema the schema to use
* @param data the data to convert
* @return the dataframe object
*/
public static DataRowsFacade toDataFrame(Schema schema, JavaRDD<List<Writable>> data) {
JavaSparkContext sc = new JavaSparkContext(data.context());
SQLContext sqlContext = new SQLContext(sc);
JavaRDD<Row> rows = data.map(new ToRow(schema));
return dataRows(sqlContext.createDataFrame(rows, fromSchema(schema)));
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
/**
* Creates a data frame from a collection of writables
* rdd given a schema
*
* @param schema the schema to use
* @param data the data to convert
* @return the dataframe object
*/
public static DataRowsFacade toDataFrame(Schema schema, JavaRDD<List<Writable>> data) {
JavaSparkContext sc = new JavaSparkContext(data.context());
SQLContext sqlContext = new SQLContext(sc);
JavaRDD<Row> rows = data.map(new ToRow(schema));
return dataRows(sqlContext.createDataFrame(rows, fromSchema(schema)));
}
代码示例来源:origin: uber/marmaray
public final RDDWrapper<OD> map(@NonNull final JavaRDD<ID> data) {
final ForkOperator<IData> converter =
new ForkOperator<>(data.map(r -> RawDataHelper.getRawData(r)),
new DataConversionFunction(), this.conf);
converter.execute();
// Write error records.
ErrorTableUtil.writeErrorRecordsToErrorTable(data.context(), this.conf, Optional.absent(),
new RDDWrapper<>(converter.getRDD(ERROR_RECORD).map(r -> (ErrorData) r), converter.getCount(ERROR_RECORD)),
errorExtractor);
return new RDDWrapper<>(converter.getRDD(VALID_RECORD).map(r -> ((ValidData<OD>) r).getData()),
converter.getCount(VALID_RECORD));
}
代码示例来源:origin: org.datavec/datavec-spark
/**
* Convert the given sequence data set to a DataFrame.<br>
* <b>Note</b>: The resulting DataFrame has two additional columns added to it:<br>
* - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
* - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
* of this record in the original time series.<br>
* These two columns are required if the data is to be converted back into a sequence at a later point, for example
* using {@link #toRecordsSequence(DataRowsFacade)}
*
* @param schema Schema for the data
* @param data Sequence data to convert to a DataFrame
* @return The dataframe object
*/
public static DataRowsFacade toDataFrameSequence(Schema schema, JavaRDD<List<List<Writable>>> data) {
JavaSparkContext sc = new JavaSparkContext(data.context());
SQLContext sqlContext = new SQLContext(sc);
JavaRDD<Row> rows = data.flatMap(new SequenceToRows(schema));
return dataRows(sqlContext.createDataFrame(rows, fromSchemaSequence(schema)));
}
代码示例来源:origin: jgperrin/net.jgp.labs.spark
@Override
public void call(JavaRDD<String> rdd) throws Exception {
JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
private static final long serialVersionUID = 5167089361335095997L;
@Override
public Row call(String msg) {
Row row = RowFactory.create(msg);
return row;
}
});
// Create Schema
StructType schema = DataTypes.createStructType(
new StructField[] { DataTypes.createStructField("Message",
DataTypes.StringType, true) });
// Get Spark 2.0 session
SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context()
.getConf());
Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
msgDataFrame.show();
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
/**
* Convert the given sequence data set to a DataFrame.<br>
* <b>Note</b>: The resulting DataFrame has two additional columns added to it:<br>
* - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
* - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
* of this record in the original time series.<br>
* These two columns are required if the data is to be converted back into a sequence at a later point, for example
* using {@link #toRecordsSequence(DataRowsFacade)}
*
* @param schema Schema for the data
* @param data Sequence data to convert to a DataFrame
* @return The dataframe object
*/
public static DataRowsFacade toDataFrameSequence(Schema schema, JavaRDD<List<List<Writable>>> data) {
JavaSparkContext sc = new JavaSparkContext(data.context());
SQLContext sqlContext = new SQLContext(sc);
JavaRDD<Row> rows = data.flatMap(new SequenceToRows(schema));
return dataRows(sqlContext.createDataFrame(rows, fromSchemaSequence(schema)));
}
代码示例来源:origin: jgperrin/net.jgp.labs.spark
@Override
public void call(JavaRDD<String> rdd) {
JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
private static final long serialVersionUID = 5167089361335095997L;
@Override
public Row call(String msg) {
Row row = RowFactory.create(msg);
return row;
}
});
// Create Schema
StructType schema = DataTypes.createStructType(
new StructField[] { DataTypes.createStructField("Message",
DataTypes.StringType, true) });
// Get Spark 2.0 session
SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context()
.getConf());
Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
msgDataFrame.show();
}
});
代码示例来源:origin: co.cask.cdap/hydrator-spark-core2
@Override
public JavaRDD<U> call(JavaRDD<T> data, Time batchTime) throws Exception {
SparkExecutionPluginContext sparkPluginContext =
new SparkStreamingExecutionContext(sec, JavaSparkContext.fromSparkContext(data.context()),
batchTime.milliseconds(), stageSpec);
String stageName = stageSpec.getName();
data = data.map(new CountingFunction<T>(stageName, sec.getMetrics(), "records.in", null));
return compute.transform(sparkPluginContext, data)
.map(new CountingFunction<U>(stageName, sec.getMetrics(), "records.out", sec.getDataTracer(stageName)));
}
}
代码示例来源:origin: spirom/learning-spark-with-java
final LongAccumulator recordCounter = RecordCounter.getInstance(new JavaSparkContext(rdd.context()));
long records = rdd.count();
recordCounter.add(records);
代码示例来源:origin: org.datasyslab/geospark
verifyPartitioningMatch(leftRDD, rightRDD);
SparkContext sparkContext = leftRDD.spatialPartitionedRDD.context();
GeoSparkMetric buildCount = GeoSparkMetrics.createMetric(sparkContext, "buildCount");
GeoSparkMetric streamCount = GeoSparkMetrics.createMetric(sparkContext, "streamCount");
代码示例来源:origin: DataSystemsLab/GeoSpark
verifyPartitioningMatch(leftRDD, rightRDD);
SparkContext sparkContext = leftRDD.spatialPartitionedRDD.context();
GeoSparkMetric buildCount = GeoSparkMetrics.createMetric(sparkContext, "buildCount");
GeoSparkMetric streamCount = GeoSparkMetrics.createMetric(sparkContext, "streamCount");
内容来源于网络,如有侵权,请联系作者删除!