本文整理了Java中org.apache.spark.api.java.JavaSparkContext.getConf()
方法的一些代码示例,展示了JavaSparkContext.getConf()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.getConf()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:getConf
暂无
代码示例来源:origin: apache/hive
static org.apache.spark.serializer.KryoSerializer getInstance(JavaSparkContext sc,
Configuration conf) {
if (INSTANCE == null) {
synchronized (ShuffleKryoSerializer.class) {
if (INSTANCE == null) {
try {
INSTANCE = (org.apache.spark.serializer.KryoSerializer) Thread.currentThread().getContextClassLoader().loadClass(
HIVE_SHUFFLE_KRYO_SERIALIZER).getConstructor(SparkConf.class).newInstance(
sc.getConf());
return INSTANCE;
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | ClassNotFoundException e) {
throw new IllegalStateException(
"Unable to create kryo serializer for shuffle RDDs using " +
"class " + HIVE_SHUFFLE_KRYO_SERIALIZER, e);
}
} else {
return INSTANCE;
}
}
}
return INSTANCE;
}
}
代码示例来源:origin: Alluxio/alluxio
/**
* Implements Spark with Alluxio integration checker.
*
* @param sc current JavaSparkContext
* @param reportWriter save user-facing messages to a generated file
* @return performIntegrationChecks results
*/
private Status run(JavaSparkContext sc, PrintWriter reportWriter, AlluxioConfiguration conf) {
// Check whether Spark driver can recognize Alluxio classes and filesystem
Status driverStatus = CheckerUtils.performIntegrationChecks();
String driverAddress = sc.getConf().get("spark.driver.host");
switch (driverStatus) {
case FAIL_TO_FIND_CLASS:
reportWriter.printf("Spark driver: %s failed to recognize Alluxio classes.%n%n",
driverAddress);
return driverStatus;
case FAIL_TO_FIND_FS:
reportWriter.printf("Spark driver: %s failed to recognize Alluxio filesystem.%n%n",
driverAddress);
return driverStatus;
default:
reportWriter.printf("Spark driver: %s can recognize Alluxio filesystem.%n%n",
driverAddress);
break;
}
if (!CheckerUtils.supportAlluxioHA(reportWriter, conf)) {
return Status.FAIL_TO_SUPPORT_HA;
}
return runSparkJob(sc, reportWriter);
}
代码示例来源:origin: apache/kylin
private static JavaRDD<String[]> getOtherFormatHiveInput(JavaSparkContext sc, String hiveTable) {
SparkSession sparkSession = SparkSession.builder().config(sc.getConf()).enableHiveSupport().getOrCreate();
final Dataset intermediateTable = sparkSession.table(hiveTable);
return intermediateTable.javaRDD().map(new Function<Row, String[]>() {
@Override
public String[] call(Row row) throws Exception {
String[] result = new String[row.size()];
for (int i = 0; i < row.size(); i++) {
final Object o = row.get(i);
if (o != null) {
result[i] = o.toString();
} else {
result[i] = null;
}
}
return result;
}
});
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
SparkConf sparkConf = context.getConf();
代码示例来源:origin: mahmoudparsian/data-algorithms-book
SparkConf sparkConf = context.getConf();
代码示例来源:origin: mahmoudparsian/data-algorithms-book
SparkConf sparkConf = context.getConf();
代码示例来源:origin: datacleaner/DataCleaner
private static Configuration getHadoopConfigurationIfYarnMode(final JavaSparkContext sparkContext) {
final String sparkMaster = sparkContext.getConf().get("spark.master");
if (Strings.isNullOrEmpty(sparkMaster) || "local".equals(sparkMaster)) {
return null;
}
return sparkContext.hadoopConfiguration();
}
代码示例来源:origin: usc-isi-i2/Web-Karma
public static JavaRDD<String> reduceJSON(JavaSparkContext jsc,
JavaRDD<String> input, final Properties karmaSettings) {
return reduceJSON(jsc, input, jsc.getConf().getInt("spark.default.parallelism", 1), karmaSettings);
}
代码示例来源:origin: usc-isi-i2/Web-Karma
public static JavaPairRDD<String, String> reduceJSON(JavaSparkContext sc,
JavaPairRDD<String, String> input, final Properties karmaSettings) {
return reduceJSON(sc, input, sc.getConf().getInt("spark.default.parallelism", 1), karmaSettings);
}
public static JavaPairRDD<String, String> reduceJSON(JavaSparkContext sc,
代码示例来源:origin: uber/hudi
private static void doCompactValidate(JavaSparkContext jsc, String basePath, String compactionInstant,
String outputPath, int parallelism, String sparkMaster, String sparkMemory) throws Exception {
HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
cfg.basePath = basePath;
cfg.operation = Operation.VALIDATE;
cfg.outputPath = outputPath;
cfg.compactionInstantTime = compactionInstant;
cfg.parallelism = parallelism;
if ((null != sparkMaster) && (!sparkMaster.isEmpty())) {
jsc.getConf().setMaster(sparkMaster);
}
jsc.getConf().set("spark.executor.memory", sparkMemory);
new HoodieCompactionAdminTool(cfg).run(jsc);
}
代码示例来源:origin: uber/hudi
private static void doCompactRepair(JavaSparkContext jsc, String basePath, String compactionInstant,
String outputPath, int parallelism, String sparkMaster, String sparkMemory, boolean dryRun) throws Exception {
HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
cfg.basePath = basePath;
cfg.operation = Operation.REPAIR;
cfg.outputPath = outputPath;
cfg.compactionInstantTime = compactionInstant;
cfg.parallelism = parallelism;
cfg.dryRun = dryRun;
if ((null != sparkMaster) && (!sparkMaster.isEmpty())) {
jsc.getConf().setMaster(sparkMaster);
}
jsc.getConf().set("spark.executor.memory", sparkMemory);
new HoodieCompactionAdminTool(cfg).run(jsc);
}
代码示例来源:origin: uber/hudi
private static void doCompactUnschedule(JavaSparkContext jsc, String basePath, String compactionInstant,
String outputPath, int parallelism, String sparkMaster, String sparkMemory, boolean skipValidation,
boolean dryRun) throws Exception {
HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
cfg.basePath = basePath;
cfg.operation = Operation.UNSCHEDULE_PLAN;
cfg.outputPath = outputPath;
cfg.compactionInstantTime = compactionInstant;
cfg.parallelism = parallelism;
cfg.dryRun = dryRun;
cfg.skipValidation = skipValidation;
if ((null != sparkMaster) && (!sparkMaster.isEmpty())) {
jsc.getConf().setMaster(sparkMaster);
}
jsc.getConf().set("spark.executor.memory", sparkMemory);
new HoodieCompactionAdminTool(cfg).run(jsc);
}
代码示例来源:origin: uber/hudi
private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePath, String fileId,
String outputPath, int parallelism, String sparkMaster, String sparkMemory, boolean skipValidation,
boolean dryRun) throws Exception {
HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
cfg.basePath = basePath;
cfg.operation = Operation.UNSCHEDULE_FILE;
cfg.outputPath = outputPath;
cfg.fileId = fileId;
cfg.parallelism = parallelism;
cfg.dryRun = dryRun;
cfg.skipValidation = skipValidation;
if ((null != sparkMaster) && (!sparkMaster.isEmpty())) {
jsc.getConf().setMaster(sparkMaster);
}
jsc.getConf().set("spark.executor.memory", sparkMemory);
new HoodieCompactionAdminTool(cfg).run(jsc);
}
代码示例来源:origin: uber/hudi
private static int compact(JavaSparkContext jsc, String basePath, String tableName, String compactionInstant,
int parallelism, String schemaFile, String sparkMemory, int retry, boolean schedule) throws Exception {
HoodieCompactor.Config cfg = new HoodieCompactor.Config();
cfg.basePath = basePath;
cfg.tableName = tableName;
cfg.compactionInstantTime = compactionInstant;
// TODO: Make this configurable along with strategy specific config - For now, this is a generic enough strategy
cfg.strategyClassName = UnBoundedCompactionStrategy.class.getCanonicalName();
cfg.parallelism = parallelism;
cfg.schemaFile = schemaFile;
cfg.runSchedule = schedule;
jsc.getConf().set("spark.executor.memory", sparkMemory);
return new HoodieCompactor(cfg).compact(jsc, retry);
}
代码示例来源:origin: uber/marmaray
protected String getAppId() {
return jsc.get().getConf().getAppId();
}
}
代码示例来源:origin: uber/hudi
private static int dataLoad(JavaSparkContext jsc, String command,
String srcPath, String targetPath, String tableName,
String tableType, String rowKey, String partitionKey, int parallelism, String schemaFile, String sparkMaster,
String sparkMemory, int retry) throws Exception {
HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
cfg.command = command;
cfg.srcPath = srcPath;
cfg.targetPath = targetPath;
cfg.tableName = tableName;
cfg.tableType = tableType;
cfg.rowKey = rowKey;
cfg.partitionKey = partitionKey;
cfg.parallelism = parallelism;
cfg.schemaFile = schemaFile;
jsc.getConf().set("spark.executor.memory", sparkMemory);
return new HDFSParquetImporter(cfg).dataImport(jsc, retry);
}
代码示例来源:origin: org.qcri.rheem/rheem-spark
public SparkExecutor(SparkPlatform platform, Job job) {
super(job);
this.platform = platform;
this.sparkContextReference = this.platform.getSparkContext(job);
this.sparkContextReference.noteObtainedReference();
this.sc = this.sparkContextReference.get();
if (this.sc.getConf().contains("spark.executor.cores")) {
this.numDefaultPartitions = 2 * this.sc.getConf().getInt("spark.executor.cores", -1);
} else {
this.numDefaultPartitions =
(int) (2 * this.getConfiguration().getLongProperty("rheem.spark.machines")
* this.getConfiguration().getLongProperty("rheem.spark.cores-per-machine"));
}
}
代码示例来源:origin: uber/hudi
private void setPutBatchSize(JavaRDD<WriteStatus> writeStatusRDD,
final JavaSparkContext jsc) {
if (config.getHbaseIndexPutBatchSizeAutoCompute()) {
SparkConf conf = jsc.getConf();
int maxExecutors = conf.getInt(DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME, 1);
if (conf.getBoolean(DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME, false)) {
maxExecutors = Math.max(maxExecutors, conf.getInt(
DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME, 1));
}
/*
Each writeStatus represents status information from a write done in one of the IOHandles.
If a writeStatus has any insert, it implies that the corresponding task contacts HBase for
doing puts, since we only do puts for inserts from HBaseIndex.
*/
int hbasePutAccessParallelism = getHBasePutAccessParallelism(writeStatusRDD);
multiPutBatchSize = putBatchSizeCalculator
.getBatchSize(
getNumRegionServersAliveForTable(),
maxQpsPerRegionServer,
hbasePutAccessParallelism,
maxExecutors,
SLEEP_TIME_MILLISECONDS,
qpsFraction);
}
}
代码示例来源:origin: com.uber.hoodie/hoodie-client
private void setPutBatchSize(JavaRDD<WriteStatus> writeStatusRDD,
final JavaSparkContext jsc) {
if (config.getHbaseIndexPutBatchSizeAutoCompute()) {
SparkConf conf = jsc.getConf();
int maxExecutors = conf.getInt(DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME, 1);
if (conf.getBoolean(DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME, false)) {
maxExecutors = Math.max(maxExecutors, conf.getInt(
DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME, 1));
}
/*
Each writeStatus represents status information from a write done in one of the IOHandles.
If a writeStatus has any insert, it implies that the corresponding task contacts HBase for
doing puts, since we only do puts for inserts from HBaseIndex.
*/
int hbasePutAccessParallelism = getHBasePutAccessParallelism(writeStatusRDD);
multiPutBatchSize = putBatchSizeCalculator
.getBatchSize(
getNumRegionServersAliveForTable(),
maxQpsPerRegionServer,
hbasePutAccessParallelism,
maxExecutors,
SLEEP_TIME_MILLISECONDS,
qpsFraction);
}
}
代码示例来源:origin: org.apache.kylin/kylin-engine-spark
private static JavaRDD<String[]> getOtherFormatHiveInput(JavaSparkContext sc, String hiveTable) {
SparkSession sparkSession = SparkSession.builder().config(sc.getConf()).enableHiveSupport().getOrCreate();
final Dataset intermediateTable = sparkSession.table(hiveTable);
return intermediateTable.javaRDD().map(new Function<Row, String[]>() {
@Override
public String[] call(Row row) throws Exception {
String[] result = new String[row.size()];
for (int i = 0; i < row.size(); i++) {
final Object o = row.get(i);
if (o != null) {
result[i] = o.toString();
} else {
result[i] = null;
}
}
return result;
}
});
}
内容来源于网络,如有侵权,请联系作者删除!