本文整理了Java中org.apache.spark.SparkContext.newAPIHadoopRDD()
方法的一些代码示例,展示了SparkContext.newAPIHadoopRDD()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。SparkContext.newAPIHadoopRDD()
方法的具体详情如下:
包路径:org.apache.spark.SparkContext
类名称:SparkContext
方法名:newAPIHadoopRDD
暂无
代码示例来源:origin: Impetus/Kundera
public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
final Class clazz = m.getEntityClazz();
SparkContext sc = sparkClient.sparkContext;
Configuration config = new Configuration();
config.set(
"mongo.input.uri",
buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
m.getTableName()));
JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
Object.class, BSONObject.class).toJavaRDD();
JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
{
@Override
public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
{
BSONObject obj = arg._2();
Object javaObject = generateJavaObjectFromBSON(obj, clazz);
return Arrays.asList(javaObject);
}
});
sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}
代码示例来源:origin: apache/incubator-rya
GraphXInputFormat.setScanAuthorizations(job, authorizations);
return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXInputFormat.class, Object.class, RyaTypeWritable.class);
代码示例来源:origin: apache/incubator-rya
String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(TABLE_LAYOUT.SPO, tablePrefix);
InputFormatBase.setInputTableName(job, tableName);
return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXEdgeInputFormat.class, Object.class, Edge.class);
代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library
private RDD<Element> doOperationUsingElementInputFormat(final GetRDDOfAllElements operation,
final Context context,
final AccumuloStore accumuloStore)
throws OperationException {
final Configuration conf = getConfiguration(operation);
addIterators(accumuloStore, conf, context.getUser(), operation);
final String useBatchScannerRDD = operation.getOption(USE_BATCH_SCANNER_RDD);
if (Boolean.parseBoolean(useBatchScannerRDD)) {
InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
}
final RDD<Tuple2<Element, NullWritable>> pairRDD = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext().newAPIHadoopRDD(conf,
ElementInputFormat.class,
Element.class,
NullWritable.class);
return pairRDD.map(new FirstElement(), ELEMENT_CLASS_TAG);
}
代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library
private RDD<Element> doOperation(final GetRDDOfElements operation,
final Context context,
final AccumuloStore accumuloStore)
throws OperationException {
final Configuration conf = getConfiguration(operation);
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
sparkContext.hadoopConfiguration().addResource(conf);
// Use batch scan option when performing seeded operation
InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
addIterators(accumuloStore, conf, context.getUser(), operation);
addRanges(accumuloStore, conf, operation);
final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
ElementInputFormat.class,
Element.class,
NullWritable.class);
return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}
代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library
private RDD<Element> doOperation(final GetRDDOfElementsInRanges operation,
final Context context,
final AccumuloStore accumuloStore)
throws OperationException {
final Configuration conf = getConfiguration(operation);
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
sparkContext.hadoopConfiguration().addResource(conf);
// Use batch scan option when performing seeded operation
InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
addIterators(accumuloStore, conf, context.getUser(), operation);
addRangesFromPairs(accumuloStore, conf, operation);
final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
ElementInputFormat.class,
Element.class,
NullWritable.class);
return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}
}
代码示例来源:origin: locationtech/geowave
sc.newAPIHadoopRDD(
conf,
GeoWaveInputFormat.class,
代码示例来源:origin: org.apache.pig/pig
RDD<Tuple2<Text, Tuple>> hadoopRDD = sparkContext.newAPIHadoopRDD(
jobConf, PigInputFormatSpark.class, Text.class, Tuple.class);
内容来源于网络,如有侵权,请联系作者删除!