org.apache.spark.api.java.JavaSparkContext.getConf()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(11.2k)|赞(0)|评价(0)|浏览(121)

本文整理了Java中org.apache.spark.api.java.JavaSparkContext.getConf()方法的一些代码示例,展示了JavaSparkContext.getConf()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.getConf()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:getConf

JavaSparkContext.getConf介绍

暂无

代码示例

代码示例来源:origin: apache/hive

static org.apache.spark.serializer.KryoSerializer getInstance(JavaSparkContext sc,
                                Configuration conf) {
  if (INSTANCE == null) {
   synchronized (ShuffleKryoSerializer.class) {
    if (INSTANCE == null) {
     try {
      INSTANCE = (org.apache.spark.serializer.KryoSerializer) Thread.currentThread().getContextClassLoader().loadClass(
          HIVE_SHUFFLE_KRYO_SERIALIZER).getConstructor(SparkConf.class).newInstance(
          sc.getConf());
      return INSTANCE;
     } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | ClassNotFoundException e) {
      throw new IllegalStateException(
          "Unable to create kryo serializer for shuffle RDDs using " +
              "class " + HIVE_SHUFFLE_KRYO_SERIALIZER, e);
     }
    } else {
     return INSTANCE;
    }
   }
  }
  return INSTANCE;
 }
}

代码示例来源:origin: Alluxio/alluxio

/**
 * Implements Spark with Alluxio integration checker.
 *
 * @param sc current JavaSparkContext
 * @param reportWriter save user-facing messages to a generated file
 * @return performIntegrationChecks results
 */
private Status run(JavaSparkContext sc, PrintWriter reportWriter, AlluxioConfiguration conf) {
 // Check whether Spark driver can recognize Alluxio classes and filesystem
 Status driverStatus = CheckerUtils.performIntegrationChecks();
 String driverAddress = sc.getConf().get("spark.driver.host");
 switch (driverStatus) {
  case FAIL_TO_FIND_CLASS:
   reportWriter.printf("Spark driver: %s failed to recognize Alluxio classes.%n%n",
     driverAddress);
   return driverStatus;
  case FAIL_TO_FIND_FS:
   reportWriter.printf("Spark driver: %s failed to recognize Alluxio filesystem.%n%n",
     driverAddress);
   return driverStatus;
  default:
   reportWriter.printf("Spark driver: %s can recognize Alluxio filesystem.%n%n",
     driverAddress);
   break;
 }
 if (!CheckerUtils.supportAlluxioHA(reportWriter, conf)) {
  return Status.FAIL_TO_SUPPORT_HA;
 }
 return runSparkJob(sc, reportWriter);
}

代码示例来源:origin: apache/kylin

private static JavaRDD<String[]> getOtherFormatHiveInput(JavaSparkContext sc, String hiveTable) {
  SparkSession sparkSession = SparkSession.builder().config(sc.getConf()).enableHiveSupport().getOrCreate();
  final Dataset intermediateTable = sparkSession.table(hiveTable);
  return intermediateTable.javaRDD().map(new Function<Row, String[]>() {
    @Override
    public String[] call(Row row) throws Exception {
      String[] result = new String[row.size()];
      for (int i = 0; i < row.size(); i++) {
        final Object o = row.get(i);
        if (o != null) {
          result[i] = o.toString();
        } else {
          result[i] = null;
        }
      }
      return result;
    }
  });
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

SparkConf sparkConf = context.getConf();

代码示例来源:origin: mahmoudparsian/data-algorithms-book

SparkConf sparkConf = context.getConf();

代码示例来源:origin: mahmoudparsian/data-algorithms-book

SparkConf sparkConf = context.getConf();

代码示例来源:origin: datacleaner/DataCleaner

private static Configuration getHadoopConfigurationIfYarnMode(final JavaSparkContext sparkContext) {
  final String sparkMaster = sparkContext.getConf().get("spark.master");
  if (Strings.isNullOrEmpty(sparkMaster) || "local".equals(sparkMaster)) {
    return null;
  }
  return sparkContext.hadoopConfiguration();
}

代码示例来源:origin: usc-isi-i2/Web-Karma

public static JavaRDD<String> reduceJSON(JavaSparkContext jsc, 
    JavaRDD<String> input, final Properties karmaSettings) {
  return reduceJSON(jsc, input, jsc.getConf().getInt("spark.default.parallelism", 1), karmaSettings);
}

代码示例来源:origin: usc-isi-i2/Web-Karma

public static JavaPairRDD<String, String> reduceJSON(JavaSparkContext sc, 
    JavaPairRDD<String, String> input, final Properties karmaSettings) {
  return reduceJSON(sc, input, sc.getConf().getInt("spark.default.parallelism", 1), karmaSettings);
}
public static JavaPairRDD<String, String> reduceJSON(JavaSparkContext sc,

代码示例来源:origin: uber/hudi

private static void doCompactValidate(JavaSparkContext jsc, String basePath, String compactionInstant,
  String outputPath, int parallelism, String sparkMaster, String sparkMemory) throws Exception {
 HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
 cfg.basePath = basePath;
 cfg.operation = Operation.VALIDATE;
 cfg.outputPath = outputPath;
 cfg.compactionInstantTime = compactionInstant;
 cfg.parallelism = parallelism;
 if ((null != sparkMaster) && (!sparkMaster.isEmpty())) {
  jsc.getConf().setMaster(sparkMaster);
 }
 jsc.getConf().set("spark.executor.memory", sparkMemory);
 new HoodieCompactionAdminTool(cfg).run(jsc);
}

代码示例来源:origin: uber/hudi

private static void doCompactRepair(JavaSparkContext jsc, String basePath, String compactionInstant,
  String outputPath, int parallelism, String sparkMaster, String sparkMemory, boolean dryRun) throws Exception {
 HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
 cfg.basePath = basePath;
 cfg.operation = Operation.REPAIR;
 cfg.outputPath = outputPath;
 cfg.compactionInstantTime = compactionInstant;
 cfg.parallelism = parallelism;
 cfg.dryRun = dryRun;
 if ((null != sparkMaster) && (!sparkMaster.isEmpty())) {
  jsc.getConf().setMaster(sparkMaster);
 }
 jsc.getConf().set("spark.executor.memory", sparkMemory);
 new HoodieCompactionAdminTool(cfg).run(jsc);
}

代码示例来源:origin: uber/hudi

private static void doCompactUnschedule(JavaSparkContext jsc, String basePath, String compactionInstant,
  String outputPath, int parallelism, String sparkMaster, String sparkMemory, boolean skipValidation,
  boolean dryRun) throws Exception {
 HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
 cfg.basePath = basePath;
 cfg.operation = Operation.UNSCHEDULE_PLAN;
 cfg.outputPath = outputPath;
 cfg.compactionInstantTime = compactionInstant;
 cfg.parallelism = parallelism;
 cfg.dryRun = dryRun;
 cfg.skipValidation = skipValidation;
 if ((null != sparkMaster) && (!sparkMaster.isEmpty())) {
  jsc.getConf().setMaster(sparkMaster);
 }
 jsc.getConf().set("spark.executor.memory", sparkMemory);
 new HoodieCompactionAdminTool(cfg).run(jsc);
}

代码示例来源:origin: uber/hudi

private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePath, String fileId,
  String outputPath, int parallelism, String sparkMaster, String sparkMemory, boolean skipValidation,
  boolean dryRun) throws Exception {
 HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
 cfg.basePath = basePath;
 cfg.operation = Operation.UNSCHEDULE_FILE;
 cfg.outputPath = outputPath;
 cfg.fileId = fileId;
 cfg.parallelism = parallelism;
 cfg.dryRun = dryRun;
 cfg.skipValidation = skipValidation;
 if ((null != sparkMaster) && (!sparkMaster.isEmpty())) {
  jsc.getConf().setMaster(sparkMaster);
 }
 jsc.getConf().set("spark.executor.memory", sparkMemory);
 new HoodieCompactionAdminTool(cfg).run(jsc);
}

代码示例来源:origin: uber/hudi

private static int compact(JavaSparkContext jsc, String basePath, String tableName, String compactionInstant,
  int parallelism, String schemaFile, String sparkMemory, int retry, boolean schedule) throws Exception {
 HoodieCompactor.Config cfg = new HoodieCompactor.Config();
 cfg.basePath = basePath;
 cfg.tableName = tableName;
 cfg.compactionInstantTime = compactionInstant;
 // TODO: Make this configurable along with strategy specific config - For now, this is a generic enough strategy
 cfg.strategyClassName = UnBoundedCompactionStrategy.class.getCanonicalName();
 cfg.parallelism = parallelism;
 cfg.schemaFile = schemaFile;
 cfg.runSchedule = schedule;
 jsc.getConf().set("spark.executor.memory", sparkMemory);
 return new HoodieCompactor(cfg).compact(jsc, retry);
}

代码示例来源:origin: uber/marmaray

protected String getAppId() {
    return jsc.get().getConf().getAppId();
  }
}

代码示例来源:origin: uber/hudi

private static int dataLoad(JavaSparkContext jsc, String command,
  String srcPath, String targetPath, String tableName,
  String tableType, String rowKey, String partitionKey, int parallelism, String schemaFile, String sparkMaster,
  String sparkMemory, int retry) throws Exception {
 HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
 cfg.command = command;
 cfg.srcPath = srcPath;
 cfg.targetPath = targetPath;
 cfg.tableName = tableName;
 cfg.tableType = tableType;
 cfg.rowKey = rowKey;
 cfg.partitionKey = partitionKey;
 cfg.parallelism = parallelism;
 cfg.schemaFile = schemaFile;
 jsc.getConf().set("spark.executor.memory", sparkMemory);
 return new HDFSParquetImporter(cfg).dataImport(jsc, retry);
}

代码示例来源:origin: org.qcri.rheem/rheem-spark

public SparkExecutor(SparkPlatform platform, Job job) {
  super(job);
  this.platform = platform;
  this.sparkContextReference = this.platform.getSparkContext(job);
  this.sparkContextReference.noteObtainedReference();
  this.sc = this.sparkContextReference.get();
  if (this.sc.getConf().contains("spark.executor.cores")) {
    this.numDefaultPartitions = 2 * this.sc.getConf().getInt("spark.executor.cores", -1);
  } else {
    this.numDefaultPartitions =
        (int) (2 * this.getConfiguration().getLongProperty("rheem.spark.machines")
            * this.getConfiguration().getLongProperty("rheem.spark.cores-per-machine"));
  }
}

代码示例来源:origin: uber/hudi

private void setPutBatchSize(JavaRDD<WriteStatus> writeStatusRDD,
  final JavaSparkContext jsc) {
 if (config.getHbaseIndexPutBatchSizeAutoCompute()) {
  SparkConf conf = jsc.getConf();
  int maxExecutors = conf.getInt(DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME, 1);
  if (conf.getBoolean(DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME, false)) {
   maxExecutors = Math.max(maxExecutors, conf.getInt(
     DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME, 1));
  }
  /*
   Each writeStatus represents status information from a write done in one of the IOHandles.
   If a writeStatus has any insert, it implies that the corresponding task contacts HBase for
   doing puts, since we only do puts for inserts from HBaseIndex.
   */
  int hbasePutAccessParallelism = getHBasePutAccessParallelism(writeStatusRDD);
  multiPutBatchSize = putBatchSizeCalculator
    .getBatchSize(
      getNumRegionServersAliveForTable(),
      maxQpsPerRegionServer,
      hbasePutAccessParallelism,
      maxExecutors,
      SLEEP_TIME_MILLISECONDS,
      qpsFraction);
 }
}

代码示例来源:origin: com.uber.hoodie/hoodie-client

private void setPutBatchSize(JavaRDD<WriteStatus> writeStatusRDD,
  final JavaSparkContext jsc) {
 if (config.getHbaseIndexPutBatchSizeAutoCompute()) {
  SparkConf conf = jsc.getConf();
  int maxExecutors = conf.getInt(DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME, 1);
  if (conf.getBoolean(DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME, false)) {
   maxExecutors = Math.max(maxExecutors, conf.getInt(
     DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME, 1));
  }
  /*
   Each writeStatus represents status information from a write done in one of the IOHandles.
   If a writeStatus has any insert, it implies that the corresponding task contacts HBase for
   doing puts, since we only do puts for inserts from HBaseIndex.
   */
  int hbasePutAccessParallelism = getHBasePutAccessParallelism(writeStatusRDD);
  multiPutBatchSize = putBatchSizeCalculator
    .getBatchSize(
      getNumRegionServersAliveForTable(),
      maxQpsPerRegionServer,
      hbasePutAccessParallelism,
      maxExecutors,
      SLEEP_TIME_MILLISECONDS,
      qpsFraction);
 }
}

代码示例来源:origin: org.apache.kylin/kylin-engine-spark

private static JavaRDD<String[]> getOtherFormatHiveInput(JavaSparkContext sc, String hiveTable) {
  SparkSession sparkSession = SparkSession.builder().config(sc.getConf()).enableHiveSupport().getOrCreate();
  final Dataset intermediateTable = sparkSession.table(hiveTable);
  return intermediateTable.javaRDD().map(new Function<Row, String[]>() {
    @Override
    public String[] call(Row row) throws Exception {
      String[] result = new String[row.size()];
      for (int i = 0; i < row.size(); i++) {
        final Object o = row.get(i);
        if (o != null) {
          result[i] = o.toString();
        } else {
          result[i] = null;
        }
      }
      return result;
    }
  });
}

相关文章

微信公众号

最新文章

更多