org.apache.spark.sql.DataFrameWriter.save()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(8.6k)|赞(0)|评价(0)|浏览(165)

本文整理了Java中org.apache.spark.sql.DataFrameWriter.save()方法的一些代码示例,展示了DataFrameWriter.save()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。DataFrameWriter.save()方法的具体详情如下:
包路径:org.apache.spark.sql.DataFrameWriter
类名称:DataFrameWriter
方法名:save

DataFrameWriter.save介绍

暂无

代码示例

代码示例来源:origin: org.apache.spark/spark-sql_2.10

@Test
public void testSaveModeAPI() {
 spark
   .range(10)
   .write()
   .format("org.apache.spark.sql.test")
   .mode(SaveMode.ErrorIfExists)
   .save();
}

代码示例来源:origin: com.couchbase.client/spark-connector

public void couchbase() {
  dfw.save();
}

代码示例来源:origin: org.apache.spark/spark-sql

@Test
public void saveAndLoad() {
 Map<String, String> options = new HashMap<>();
 options.put("path", path.toString());
 df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save();
 Dataset<Row> loadedDF = spark.read().format("json").options(options).load();
 checkAnswer(loadedDF, df.collectAsList());
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

@Test
public void saveAndLoad() {
 Map<String, String> options = new HashMap<>();
 options.put("path", path.toString());
 df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save();
 Dataset<Row> loadedDF = spark.read().format("json").options(options).load();
 checkAnswer(loadedDF, df.collectAsList());
}

代码示例来源:origin: org.apache.spark/spark-sql_2.10

@Test
public void saveAndLoad() {
 Map<String, String> options = new HashMap<>();
 options.put("path", path.toString());
 df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save();
 Dataset<Row> loadedDF = spark.read().format("json").options(options).load();
 checkAnswer(loadedDF, df.collectAsList());
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

@Test
public void testSaveModeAPI() {
 spark
   .range(10)
   .write()
   .format("org.apache.spark.sql.test")
   .mode(SaveMode.ErrorIfExists)
   .save();
}

代码示例来源:origin: org.apache.spark/spark-sql

@Test
 public void saveAndLoadWithSchema() {
  Map<String, String> options = new HashMap<>();
  options.put("path", path.toString());
  df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save();

  List<StructField> fields = new ArrayList<>();
  fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
  StructType schema = DataTypes.createStructType(fields);
  Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load();

  checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList());
 }
}

代码示例来源:origin: org.apache.spark/spark-sql_2.10

@Test
 public void saveAndLoadWithSchema() {
  Map<String, String> options = new HashMap<>();
  options.put("path", path.toString());
  df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save();

  List<StructField> fields = new ArrayList<>();
  fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
  StructType schema = DataTypes.createStructType(fields);
  Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load();

  checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList());
 }
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

@Test
 public void saveAndLoadWithSchema() {
  Map<String, String> options = new HashMap<>();
  options.put("path", path.toString());
  df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save();

  List<StructField> fields = new ArrayList<>();
  fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
  StructType schema = DataTypes.createStructType(fields);
  Dataset<Row> loadedDF = spark.read().format("json").schema(schema).options(options).load();

  checkAnswer(loadedDF, spark.sql("SELECT b FROM jsonTable").collectAsList());
 }
}

代码示例来源:origin: org.apache.spark/spark-sql

@Test
public void testSaveModeAPI() {
 spark
   .range(10)
   .write()
   .format("org.apache.spark.sql.test")
   .mode(SaveMode.ErrorIfExists)
   .save();
}

代码示例来源:origin: org.apache.spark/spark-sql_2.10

@Test
public void testFormatAPI() {
 spark
   .read()
   .format("org.apache.spark.sql.test")
   .load()
   .write()
   .format("org.apache.spark.sql.test")
   .save();
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

@Test
public void testFormatAPI() {
 spark
   .read()
   .format("org.apache.spark.sql.test")
   .load()
   .write()
   .format("org.apache.spark.sql.test")
   .save();
}

代码示例来源:origin: org.apache.spark/spark-sql

@Test
public void testFormatAPI() {
 spark
   .read()
   .format("org.apache.spark.sql.test")
   .load()
   .write()
   .format("org.apache.spark.sql.test")
   .save();
}

代码示例来源:origin: com.couchbase.client/spark-connector

public void couchbase(Map<String, String> options) {
  prepare(options);
  dfw.save();
}

代码示例来源:origin: org.apache.spark/spark-sql_2.10

@Test
public void testOptionsAPI() {
 HashMap<String, String> map = new HashMap<String, String>();
 map.put("e", "1");
 spark
   .read()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .text()
   .write()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .format("org.apache.spark.sql.test")
   .save();
}

代码示例来源:origin: org.apache.spark/spark-sql

@Test
public void testOptionsAPI() {
 HashMap<String, String> map = new HashMap<String, String>();
 map.put("e", "1");
 spark
   .read()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .text()
   .write()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .format("org.apache.spark.sql.test")
   .save();
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

@Test
public void testOptionsAPI() {
 HashMap<String, String> map = new HashMap<String, String>();
 map.put("e", "1");
 spark
   .read()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .text()
   .write()
   .option("a", "1")
   .option("b", 1)
   .option("c", 1.0)
   .option("d", true)
   .options(map)
   .format("org.apache.spark.sql.test")
   .save();
}

代码示例来源:origin: amidst/toolbox

public static void writeDataToFolder(DataSpark data, String path, SQLContext sqlContext, String formatFile) throws Exception {
  data.getDataFrame(sqlContext).write().mode(SaveMode.Overwrite).format(formatFile).save(path);
}

代码示例来源:origin: KeithSSmith/spark-compaction

public void compact(String[] args) throws IOException {
  this.setCompressionAndSerializationOptions(this.parseCli(args));
  this.outputCompressionProperties(this.outputCompression);
  
  // Defining Spark Context with a generic Spark Configuration.
  SparkConf sparkConf = new SparkConf().setAppName("Spark Compaction");
  JavaSparkContext sc = new JavaSparkContext(sparkConf);
      
  if (this.outputSerialization.equals(TEXT)) {
    JavaRDD<String> textFile = sc.textFile(this.concatInputPath(inputPath));
    textFile.coalesce(this.splitSize).saveAsTextFile(outputPath);
  } else if (this.outputSerialization.equals(PARQUET)) {
    SQLContext sqlContext = new SQLContext(sc);
    DataFrame parquetFile = sqlContext.read().parquet(this.concatInputPath(inputPath));
    parquetFile.coalesce(this.splitSize).write().parquet(outputPath);
  } else if (this.outputSerialization.equals(AVRO)) {
    // For this to work the files must end in .avro
    SQLContext sqlContext = new SQLContext(sc);
    DataFrame avroFile = sqlContext.read().format("com.databricks.spark.avro").load(this.concatInputPath(inputPath));
    avroFile.coalesce(this.splitSize).write().format("com.databricks.spark.avro").save(outputPath);
  } else {
    System.out.println("Did not match any serialization type: text, parquet, or avro.  Recieved: " +
        this.outputSerialization);
  }
}

代码示例来源:origin: Netflix/iceberg

private File buildPartitionedTable(String desc, PartitionSpec spec, String udf, String partitionColumn) {
 File location = new File(parent, desc);
 Table byId = TABLES.create(SCHEMA, spec, location.toString());
 // do not combine splits because the tests expect a split per partition
 byId.updateProperties().set("read.split.target-size", "1").commit();
 // copy the unpartitioned table into the partitioned table to produce the partitioned data
 Dataset<Row> allRows = spark.read()
   .format("iceberg")
   .load(unpartitioned.toString());
 allRows
   .coalesce(1) // ensure only 1 file per partition is written
   .withColumn("part", callUDF(udf, column(partitionColumn)))
   .sortWithinPartitions("part")
   .drop("part")
   .write()
   .format("iceberg")
   .mode("append")
   .save(byId.location());
 return location;
}

相关文章