org.apache.spark.SparkContext.hadoopConfiguration()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(11.1k)|赞(0)|评价(0)|浏览(311)

本文整理了Java中org.apache.spark.SparkContext.hadoopConfiguration()方法的一些代码示例,展示了SparkContext.hadoopConfiguration()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。SparkContext.hadoopConfiguration()方法的具体详情如下:
包路径:org.apache.spark.SparkContext
类名称:SparkContext
方法名:hadoopConfiguration

SparkContext.hadoopConfiguration介绍

暂无

代码示例

代码示例来源:origin: apache/kylin

public static void modifySparkHadoopConfiguration(SparkContext sc) throws Exception {
  sc.hadoopConfiguration().set("dfs.replication", "2"); // cuboid intermediate files, replication=2
  sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
  sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
  sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.DefaultCodec"); // or org.apache.hadoop.io.compress.SnappyCodec
}

代码示例来源:origin: org.apache.kylin/kylin-engine-spark

public static void modifySparkHadoopConfiguration(SparkContext sc) throws Exception {
  sc.hadoopConfiguration().set("dfs.replication", "2"); // cuboid intermediate files, replication=2
  sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
  sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
  sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.DefaultCodec"); // or org.apache.hadoop.io.compress.SnappyCodec
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

/**
 * Read a UTF-8 format String from HDFS (or local)
 *
 * @param path    Path to write the string
 * @param sc      Spark context
 */
public static String readStringFromFile(String path, SparkContext sc) throws IOException {
  return readStringFromFile(path, sc.hadoopConfiguration());
}

代码示例来源:origin: org.datavec/datavec-spark

/**
 * Read an object from HDFS (or local) using default Java object serialization
 *
 * @param path    File to read
 * @param type    Class of the object to read
 * @param sc      Spark context
 * @param <T>     Type of the object to read
 */
public static <T> T readObjectFromFile(String path, Class<T> type, SparkContext sc) throws IOException {
  return readObjectFromFile(path, type, sc.hadoopConfiguration());
}

代码示例来源:origin: org.datavec/datavec-spark

/**
 * Write an object to HDFS (or local) using default Java object serialization
 *
 * @param path       Path to write the object to
 * @param toWrite    Object to write
 * @param sc         Spark context
 */
public static void writeObjectToFile(String path, Object toWrite, SparkContext sc) throws IOException {
  writeObjectToFile(path, toWrite, sc.hadoopConfiguration());
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

/**
 * Write a String to a file (on HDFS or local) in UTF-8 format
 *
 * @param path       Path to write to
 * @param toWrite    String to write
 * @param sc         Spark context
 */
public static void writeStringToFile(String path, String toWrite, SparkContext sc) throws IOException {
  writeStringToFile(path, toWrite, sc.hadoopConfiguration());
}

代码示例来源:origin: org.datavec/datavec-spark

/**
 * Write a String to a file (on HDFS or local) in UTF-8 format
 *
 * @param path       Path to write to
 * @param toWrite    String to write
 * @param sc         Spark context
 */
public static void writeStringToFile(String path, String toWrite, SparkContext sc) throws IOException {
  writeStringToFile(path, toWrite, sc.hadoopConfiguration());
}

代码示例来源:origin: org.datavec/datavec-spark

/**
 * Read a UTF-8 format String from HDFS (or local)
 *
 * @param path    Path to write the string
 * @param sc      Spark context
 */
public static String readStringFromFile(String path, SparkContext sc) throws IOException {
  return readStringFromFile(path, sc.hadoopConfiguration());
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

/**
 * Write an object to HDFS (or local) using default Java object serialization
 *
 * @param path       Path to write the object to
 * @param toWrite    Object to write
 * @param sc         Spark context
 */
public static void writeObjectToFile(String path, Object toWrite, SparkContext sc) throws IOException {
  writeObjectToFile(path, toWrite, sc.hadoopConfiguration());
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

/**
 * Read an object from HDFS (or local) using default Java object serialization
 *
 * @param path    File to read
 * @param type    Class of the object to read
 * @param sc      Spark context
 * @param <T>     Type of the object to read
 */
public static <T> T readObjectFromFile(String path, Class<T> type, SparkContext sc) throws IOException {
  return readObjectFromFile(path, type, sc.hadoopConfiguration());
}

代码示例来源:origin: uber/marmaray

private void updateSparkContext(@NonNull final SparkArgs sparkArgs,
  @NonNull final SparkContext sc) {
  for (SparkListener sparkListener : getSparkEventListeners()) {
    sc.addSparkListener(sparkListener);
  }
  sc.hadoopConfiguration().addResource(sparkArgs.getHadoopConfiguration());
}

代码示例来源:origin: Netflix/iceberg

protected Configuration lazyConf() {
  if (lazyConf == null) {
   this.lazyConf = lazySparkSession().sparkContext().hadoopConfiguration();
  }
  return lazyConf;
 }
}

代码示例来源:origin: Netflix/iceberg

public static Seq<CatalogTablePartition> partitions(SparkSession spark, String name) {
  List<String> parts = Lists.newArrayList(Splitter.on('.').limit(2).split(name));
  String db = parts.size() == 1 ? "default" : parts.get(0);
  String table = parts.get(parts.size() == 1 ? 0 : 1);

  HiveClient client = HiveUtils$.MODULE$.newClientForMetadata(
    spark.sparkContext().conf(),
    spark.sparkContext().hadoopConfiguration());
  client.getPartitions(db, table, Option.empty());

  return client.getPartitions(db, table, Option.empty());
 }
}

代码示例来源:origin: Stratio/deep-spark

/**
 * Returns a Cells RDD from S3 fileSystem.
 * @param config Amazon S3 ExtractorConfig.
 * @return RDD of Cells.
 */
public RDD<Cells> createS3RDD(ExtractorConfig<Cells> config) {
  Serializable bucket = config.getValues().get(ExtractorConstants.S3_BUCKET);
  Serializable path = config.getValues().get(ExtractorConstants.FS_FILE_PATH);
  final TextFileDataTable textFileDataTable = UtilFS.createTextFileMetaDataFromConfig(config, this);
  String filePath = path.toString();
  if (config.getExtractorImplClassName().equals(ExtractorConstants.S3)) {
    filePath = ExtractorConstants.S3_PREFIX + bucket.toString() + path.toString();
  }
  Configuration hadoopConf = this.sc().hadoopConfiguration();
  hadoopConf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem");
  hadoopConf.set("fs.s3n.awsAccessKeyId", config.getString(ExtractorConstants.S3_ACCESS_KEY_ID));
  hadoopConf.set("fs.s3n.awsSecretAccessKey", config.getString(ExtractorConstants.S3_SECRET_ACCESS_KEY));
  return createRDDFromFilePath(filePath, textFileDataTable);
}

代码示例来源:origin: com.stratio.deep/deep-core

/**
 * Returns a Cells RDD from S3 fileSystem.
 * @param config Amazon S3 ExtractorConfig.
 * @return RDD of Cells.
 */
public RDD<Cells> createS3RDD(ExtractorConfig<Cells> config) {
  Serializable bucket = config.getValues().get(ExtractorConstants.S3_BUCKET);
  Serializable path = config.getValues().get(ExtractorConstants.FS_FILE_PATH);
  final TextFileDataTable textFileDataTable = UtilFS.createTextFileMetaDataFromConfig(config, this);
  String filePath = path.toString();
  if (config.getExtractorImplClassName().equals(ExtractorConstants.S3)) {
    filePath = ExtractorConstants.S3_PREFIX + bucket.toString() + path.toString();
  }
  Configuration hadoopConf = this.sc().hadoopConfiguration();
  hadoopConf.set("fs.s3n.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem");
  hadoopConf.set("fs.s3n.awsAccessKeyId", config.getString(ExtractorConstants.S3_ACCESS_KEY_ID));
  hadoopConf.set("fs.s3n.awsSecretAccessKey", config.getString(ExtractorConstants.S3_SECRET_ACCESS_KEY));
  return createRDDFromFilePath(filePath, textFileDataTable);
}

代码示例来源:origin: uk.gov.gchq.gaffer/parquet-store

public static void configureSparkForAddElements(final SparkSession spark, final ParquetStoreProperties props) {
  final Integer numberOfOutputFiles = props.getAddElementsOutputFilesPerGroup();
  String shufflePartitions = spark.conf().getOption("spark.sql.shuffle.partitions").get();
  if (null == shufflePartitions) {
    shufflePartitions = SQLConf.SHUFFLE_PARTITIONS().defaultValueString();
  }
  if (numberOfOutputFiles > Integer.parseInt(shufflePartitions)) {
    LOGGER.debug("Setting the number of Spark shuffle partitions to {}", numberOfOutputFiles);
    spark.conf().set("spark.sql.shuffle.partitions", numberOfOutputFiles);
  }
  final Configuration hadoopConf = spark.sparkContext().hadoopConfiguration();
  configureSparkConfForAddElements(hadoopConf, props);
}

代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library

private RDD<Element> doOperation(final GetRDDOfAllElements operation,
                 final Context context,
                 final AccumuloStore accumuloStore)
    throws OperationException {
  SparkSession sparkSession = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties());
  if (sparkSession == null) {
    throw new OperationException("This operation requires an active SparkSession.");
  }
  sparkSession.sparkContext().hadoopConfiguration().addResource(getConfiguration(operation));
  final String useRFileReaderRDD = operation.getOption(USE_RFILE_READER_RDD);
  if (Boolean.parseBoolean(useRFileReaderRDD)) {
    return doOperationUsingRFileReaderRDD(operation, context, accumuloStore);
  } else {
    return doOperationUsingElementInputFormat(operation, context, accumuloStore);
  }
}

代码示例来源:origin: uber/marmaray

private void assertExpectationsOnSparkContext(
  @NonNull final SparkArgs sparkArgs,
  @NonNull final SparkContext sc) {
  final String registeredAvroSchemaStr = sc.conf().getAvroSchema().head()._2();
  final Schema expectedAvroSchema = sparkArgs.getAvroSchemas().get().get(0);
  Assert.assertEquals(expectedAvroSchema.toString(), registeredAvroSchemaStr);
  Assert.assertEquals("foo_bar", sc.appName());
  Assert.assertEquals("512", sc.hadoopConfiguration().get("mapreduce.map.memory.mb"));
}

代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library

private RDD<Element> doOperation(final GetRDDOfElements operation,
                 final Context context,
                 final AccumuloStore accumuloStore)
    throws OperationException {
  final Configuration conf = getConfiguration(operation);
  final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
  sparkContext.hadoopConfiguration().addResource(conf);
  // Use batch scan option when performing seeded operation
  InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
  addIterators(accumuloStore, conf, context.getUser(), operation);
  addRanges(accumuloStore, conf, operation);
  final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
      ElementInputFormat.class,
      Element.class,
      NullWritable.class);
  return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}

代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library

private RDD<Element> doOperation(final GetRDDOfElementsInRanges operation,
                   final Context context,
                   final AccumuloStore accumuloStore)
      throws OperationException {
    final Configuration conf = getConfiguration(operation);
    final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
    sparkContext.hadoopConfiguration().addResource(conf);
    // Use batch scan option when performing seeded operation
    InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
    addIterators(accumuloStore, conf, context.getUser(), operation);
    addRangesFromPairs(accumuloStore, conf, operation);
    final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
        ElementInputFormat.class,
        Element.class,
        NullWritable.class);
    return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
  }
}

相关文章