本文整理了Java中org.apache.spark.SparkContext.hadoopConfiguration()
方法的一些代码示例,展示了SparkContext.hadoopConfiguration()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。SparkContext.hadoopConfiguration()
方法的具体详情如下:
包路径:org.apache.spark.SparkContext
类名称:SparkContext
方法名:hadoopConfiguration
暂无
代码示例来源:origin: apache/kylin
public static void modifySparkHadoopConfiguration(SparkContext sc) throws Exception {
sc.hadoopConfiguration().set("dfs.replication", "2"); // cuboid intermediate files, replication=2
sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.DefaultCodec"); // or org.apache.hadoop.io.compress.SnappyCodec
}
代码示例来源:origin: org.apache.kylin/kylin-engine-spark
public static void modifySparkHadoopConfiguration(SparkContext sc) throws Exception {
sc.hadoopConfiguration().set("dfs.replication", "2"); // cuboid intermediate files, replication=2
sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.DefaultCodec"); // or org.apache.hadoop.io.compress.SnappyCodec
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
/**
* Read a UTF-8 format String from HDFS (or local)
*
* @param path Path to write the string
* @param sc Spark context
*/
public static String readStringFromFile(String path, SparkContext sc) throws IOException {
return readStringFromFile(path, sc.hadoopConfiguration());
}
代码示例来源:origin: org.datavec/datavec-spark
/**
* Read an object from HDFS (or local) using default Java object serialization
*
* @param path File to read
* @param type Class of the object to read
* @param sc Spark context
* @param <T> Type of the object to read
*/
public static <T> T readObjectFromFile(String path, Class<T> type, SparkContext sc) throws IOException {
return readObjectFromFile(path, type, sc.hadoopConfiguration());
}
代码示例来源:origin: org.datavec/datavec-spark
/**
* Write an object to HDFS (or local) using default Java object serialization
*
* @param path Path to write the object to
* @param toWrite Object to write
* @param sc Spark context
*/
public static void writeObjectToFile(String path, Object toWrite, SparkContext sc) throws IOException {
writeObjectToFile(path, toWrite, sc.hadoopConfiguration());
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
/**
* Write a String to a file (on HDFS or local) in UTF-8 format
*
* @param path Path to write to
* @param toWrite String to write
* @param sc Spark context
*/
public static void writeStringToFile(String path, String toWrite, SparkContext sc) throws IOException {
writeStringToFile(path, toWrite, sc.hadoopConfiguration());
}
代码示例来源:origin: org.datavec/datavec-spark
/**
* Write a String to a file (on HDFS or local) in UTF-8 format
*
* @param path Path to write to
* @param toWrite String to write
* @param sc Spark context
*/
public static void writeStringToFile(String path, String toWrite, SparkContext sc) throws IOException {
writeStringToFile(path, toWrite, sc.hadoopConfiguration());
}
代码示例来源:origin: org.datavec/datavec-spark
/**
* Read a UTF-8 format String from HDFS (or local)
*
* @param path Path to write the string
* @param sc Spark context
*/
public static String readStringFromFile(String path, SparkContext sc) throws IOException {
return readStringFromFile(path, sc.hadoopConfiguration());
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
/**
* Write an object to HDFS (or local) using default Java object serialization
*
* @param path Path to write the object to
* @param toWrite Object to write
* @param sc Spark context
*/
public static void writeObjectToFile(String path, Object toWrite, SparkContext sc) throws IOException {
writeObjectToFile(path, toWrite, sc.hadoopConfiguration());
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
/**
* Read an object from HDFS (or local) using default Java object serialization
*
* @param path File to read
* @param type Class of the object to read
* @param sc Spark context
* @param <T> Type of the object to read
*/
public static <T> T readObjectFromFile(String path, Class<T> type, SparkContext sc) throws IOException {
return readObjectFromFile(path, type, sc.hadoopConfiguration());
}
代码示例来源:origin: uber/marmaray
private void updateSparkContext(@NonNull final SparkArgs sparkArgs,
@NonNull final SparkContext sc) {
for (SparkListener sparkListener : getSparkEventListeners()) {
sc.addSparkListener(sparkListener);
}
sc.hadoopConfiguration().addResource(sparkArgs.getHadoopConfiguration());
}
代码示例来源:origin: Netflix/iceberg
protected Configuration lazyConf() {
if (lazyConf == null) {
this.lazyConf = lazySparkSession().sparkContext().hadoopConfiguration();
}
return lazyConf;
}
}
代码示例来源:origin: Netflix/iceberg
public static Seq<CatalogTablePartition> partitions(SparkSession spark, String name) {
List<String> parts = Lists.newArrayList(Splitter.on('.').limit(2).split(name));
String db = parts.size() == 1 ? "default" : parts.get(0);
String table = parts.get(parts.size() == 1 ? 0 : 1);
HiveClient client = HiveUtils$.MODULE$.newClientForMetadata(
spark.sparkContext().conf(),
spark.sparkContext().hadoopConfiguration());
client.getPartitions(db, table, Option.empty());
return client.getPartitions(db, table, Option.empty());
}
}
代码示例来源:origin: Stratio/deep-spark
/**
* Returns a Cells RDD from S3 fileSystem.
* @param config Amazon S3 ExtractorConfig.
* @return RDD of Cells.
*/
public RDD<Cells> createS3RDD(ExtractorConfig<Cells> config) {
Serializable bucket = config.getValues().get(ExtractorConstants.S3_BUCKET);
Serializable path = config.getValues().get(ExtractorConstants.FS_FILE_PATH);
final TextFileDataTable textFileDataTable = UtilFS.createTextFileMetaDataFromConfig(config, this);
String filePath = path.toString();
if (config.getExtractorImplClassName().equals(ExtractorConstants.S3)) {
filePath = ExtractorConstants.S3_PREFIX + bucket.toString() + path.toString();
}
Configuration hadoopConf = this.sc().hadoopConfiguration();
hadoopConf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem");
hadoopConf.set("fs.s3n.awsAccessKeyId", config.getString(ExtractorConstants.S3_ACCESS_KEY_ID));
hadoopConf.set("fs.s3n.awsSecretAccessKey", config.getString(ExtractorConstants.S3_SECRET_ACCESS_KEY));
return createRDDFromFilePath(filePath, textFileDataTable);
}
代码示例来源:origin: com.stratio.deep/deep-core
/**
* Returns a Cells RDD from S3 fileSystem.
* @param config Amazon S3 ExtractorConfig.
* @return RDD of Cells.
*/
public RDD<Cells> createS3RDD(ExtractorConfig<Cells> config) {
Serializable bucket = config.getValues().get(ExtractorConstants.S3_BUCKET);
Serializable path = config.getValues().get(ExtractorConstants.FS_FILE_PATH);
final TextFileDataTable textFileDataTable = UtilFS.createTextFileMetaDataFromConfig(config, this);
String filePath = path.toString();
if (config.getExtractorImplClassName().equals(ExtractorConstants.S3)) {
filePath = ExtractorConstants.S3_PREFIX + bucket.toString() + path.toString();
}
Configuration hadoopConf = this.sc().hadoopConfiguration();
hadoopConf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem");
hadoopConf.set("fs.s3n.awsAccessKeyId", config.getString(ExtractorConstants.S3_ACCESS_KEY_ID));
hadoopConf.set("fs.s3n.awsSecretAccessKey", config.getString(ExtractorConstants.S3_SECRET_ACCESS_KEY));
return createRDDFromFilePath(filePath, textFileDataTable);
}
代码示例来源:origin: uk.gov.gchq.gaffer/parquet-store
public static void configureSparkForAddElements(final SparkSession spark, final ParquetStoreProperties props) {
final Integer numberOfOutputFiles = props.getAddElementsOutputFilesPerGroup();
String shufflePartitions = spark.conf().getOption("spark.sql.shuffle.partitions").get();
if (null == shufflePartitions) {
shufflePartitions = SQLConf.SHUFFLE_PARTITIONS().defaultValueString();
}
if (numberOfOutputFiles > Integer.parseInt(shufflePartitions)) {
LOGGER.debug("Setting the number of Spark shuffle partitions to {}", numberOfOutputFiles);
spark.conf().set("spark.sql.shuffle.partitions", numberOfOutputFiles);
}
final Configuration hadoopConf = spark.sparkContext().hadoopConfiguration();
configureSparkConfForAddElements(hadoopConf, props);
}
代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library
private RDD<Element> doOperation(final GetRDDOfAllElements operation,
final Context context,
final AccumuloStore accumuloStore)
throws OperationException {
SparkSession sparkSession = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties());
if (sparkSession == null) {
throw new OperationException("This operation requires an active SparkSession.");
}
sparkSession.sparkContext().hadoopConfiguration().addResource(getConfiguration(operation));
final String useRFileReaderRDD = operation.getOption(USE_RFILE_READER_RDD);
if (Boolean.parseBoolean(useRFileReaderRDD)) {
return doOperationUsingRFileReaderRDD(operation, context, accumuloStore);
} else {
return doOperationUsingElementInputFormat(operation, context, accumuloStore);
}
}
代码示例来源:origin: uber/marmaray
private void assertExpectationsOnSparkContext(
@NonNull final SparkArgs sparkArgs,
@NonNull final SparkContext sc) {
final String registeredAvroSchemaStr = sc.conf().getAvroSchema().head()._2();
final Schema expectedAvroSchema = sparkArgs.getAvroSchemas().get().get(0);
Assert.assertEquals(expectedAvroSchema.toString(), registeredAvroSchemaStr);
Assert.assertEquals("foo_bar", sc.appName());
Assert.assertEquals("512", sc.hadoopConfiguration().get("mapreduce.map.memory.mb"));
}
代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library
private RDD<Element> doOperation(final GetRDDOfElements operation,
final Context context,
final AccumuloStore accumuloStore)
throws OperationException {
final Configuration conf = getConfiguration(operation);
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
sparkContext.hadoopConfiguration().addResource(conf);
// Use batch scan option when performing seeded operation
InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
addIterators(accumuloStore, conf, context.getUser(), operation);
addRanges(accumuloStore, conf, operation);
final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
ElementInputFormat.class,
Element.class,
NullWritable.class);
return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}
代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library
private RDD<Element> doOperation(final GetRDDOfElementsInRanges operation,
final Context context,
final AccumuloStore accumuloStore)
throws OperationException {
final Configuration conf = getConfiguration(operation);
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
sparkContext.hadoopConfiguration().addResource(conf);
// Use batch scan option when performing seeded operation
InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
addIterators(accumuloStore, conf, context.getUser(), operation);
addRangesFromPairs(accumuloStore, conf, operation);
final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
ElementInputFormat.class,
Element.class,
NullWritable.class);
return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}
}
内容来源于网络,如有侵权,请联系作者删除!