org.apache.spark.api.java.JavaSparkContext.textFile()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(10.5k)|赞(0)|评价(0)|浏览(123)

本文整理了Java中org.apache.spark.api.java.JavaSparkContext.textFile()方法的一些代码示例,展示了JavaSparkContext.textFile()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.textFile()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:textFile

JavaSparkContext.textFile介绍

暂无

代码示例

代码示例来源:origin: stackoverflow.com

JavaSparkContext sc;
JavaRDD<String> data = sc.textFile("path/input.csv");
JavaSQLContext sqlContext = new JavaSQLContext(sc);

JavaRDD<Record> rdd_records = sc.textFile(data).map(
 new Function<String, Record>() {
   public Record call(String line) throws Exception {
     // Here you can use JSON
     // Gson gson = new Gson();
     // gson.fromJson(line, Record.class);
     String[] fields = line.split(",");
     Record sd = new Record(fields[0], fields[1], fields[2].trim(), fields[3]);
     return sd;
   }
});

代码示例来源:origin: stackoverflow.com

/*** SimpleApp.java ***/
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.Function;

public class SimpleApp {
 public static void main(String[] args) {
  String logFile = "$YOUR_SPARK_HOME/README.md"; // Should be some file on your system
  JavaSparkContext sc = new JavaSparkContext("local", "Simple App",
   "$YOUR_SPARK_HOME", new String[]{"target/simple-project-1.0.jar"});
  JavaRDD<String> logData = sc.textFile(logFile).cache();

  long numAs = logData.filter(new Function<String, Boolean>() {
   public Boolean call(String s) { return s.contains("a"); }
  }).count();

  long numBs = logData.filter(new Function<String, Boolean>() {
   public Boolean call(String s) { return s.contains("b"); }
  }).count();

  System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
 }
}

代码示例来源:origin: OryxProject/oryx

private static JavaPairRDD<String,float[]> readFeaturesRDD(JavaSparkContext sparkContext, Path path) {
 log.info("Loading features RDD from {}", path);
 JavaRDD<String> featureLines = sparkContext.textFile(path.toString());
 return featureLines.mapToPair(line -> {
  List<?> update = TextUtils.readJSON(line, List.class);
  String key = update.get(0).toString();
  float[] vector = TextUtils.convertViaJSON(update.get(1), float[].class);
  return new Tuple2<>(key, vector);
 });
}

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
   throw new Exception("Usage BasicFlatMap sparkMaster inputFile");
    }

  JavaSparkContext sc = new JavaSparkContext(
   args[0], "basicflatmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> rdd = sc.textFile(args[1]);
  JavaRDD<String> words = rdd.flatMap(
   new FlatMapFunction<String, String>() { public Iterable<String> call(String x) {
     return Arrays.asList(x.split(" "));
    }});
  Map<String, Long> result = words.countByValue();
  for (Entry<String, Long> entry: result.entrySet()) {
   System.out.println(entry.getKey() + ":" + entry.getValue());
  }
 }
}

代码示例来源:origin: databricks/learning-spark

public void run(String master, String csv1, String csv2) throws Exception {
    JavaSparkContext sc = new JavaSparkContext(
   master, "basicjoincsv", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> csvFile1 = sc.textFile(csv1);
  JavaRDD<String> csvFile2 = sc.textFile(csv2);
  JavaPairRDD<Integer, String[]> keyedRDD1 = csvFile1.mapToPair(new ParseLine());
  JavaPairRDD<Integer, String[]> keyedRDD2 = csvFile1.mapToPair(new ParseLine());
  JavaPairRDD<Integer, Tuple2<String[], String[]>> result = keyedRDD1.join(keyedRDD2);
  List<Tuple2<Integer, Tuple2<String[], String[]>>> resultCollection = result.collect();
  }
}

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
   throw new Exception("Usage KeyValueMapFilter sparkMaster inputFile");
    }
  String master = args[0];
  String inputFile = args[1];

    JavaSparkContext sc = new JavaSparkContext(
   master, "KeyValueMapFilter", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> input = sc.textFile(inputFile);
  PairFunction<String, String, String> keyData = new PairFunction<String, String, String>() {
   @Override
   public Tuple2<String, String> call(String x) {
    return new Tuple2(x.split(" ")[0], x);
   }
  };
  Function<Tuple2<String, String>, Boolean> longWordFilter = new Function<Tuple2<String, String>, Boolean>() {
   @Override
   public Boolean call(Tuple2<String, String> input) {
    return (input._2().length() < 20);
   }
  };
  JavaPairRDD<String, String> rdd = input.mapToPair(keyData);
  JavaPairRDD<String, String> result = rdd.filter(longWordFilter);
  Map<String, String> resultMap = result.collectAsMap();
  for (Entry<String, String> entry : resultMap.entrySet()) {
   System.out.println(entry.getKey() + ":" + entry.getValue());
  }
  }
}

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
    String master = args[0];
    JavaSparkContext sc = new JavaSparkContext(
   master, "wordcount", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> rdd = sc.textFile(args[1]);
  JavaPairRDD<String, Integer> counts = rdd.flatMap(
   new FlatMapFunction<String, String>() {
    public Iterable<String> call(String x) {
     return Arrays.asList(x.split(" "));
    }}).mapToPair(new PairFunction<String, String, Integer>(){
      public Tuple2<String, Integer> call(String x){
       return new Tuple2(x, 1);
      }}).reduceByKey(new Function2<Integer, Integer, Integer>(){
        public Integer call(Integer x, Integer y){ return x+y;}});
  counts.saveAsTextFile(args[2]);
  }
}

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
   throw new Exception("Usage BasicLoadJson [sparkMaster] [jsoninput] [jsonoutput]");
    }
  String master = args[0];
  String fileName = args[1];
  String outfile = args[2];

    JavaSparkContext sc = new JavaSparkContext(
   master, "basicloadjson", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> input = sc.textFile(fileName);
  JavaRDD<Person> result = input.mapPartitions(new ParseJson()).filter(new LikesPandas());
  JavaRDD<String> formatted = result.mapPartitions(new WriteJson());
  formatted.saveAsTextFile(outfile);
  }
}

代码示例来源:origin: databricks/learning-spark

JavaRDD<String> spam = sc.textFile("files/spam.txt");
JavaRDD<String> ham = sc.textFile("files/ham.txt");

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void textFilesCompressed() throws IOException {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 rdd.saveAsTextFile(outputDir, DefaultCodec.class);
 // Try reading it in as a text file RDD
 List<String> expected = Arrays.asList("1", "2", "3", "4");
 JavaRDD<String> readRDD = sc.textFile(outputDir);
 assertEquals(expected, readRDD.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void textFilesCompressed() throws IOException {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 rdd.saveAsTextFile(outputDir, DefaultCodec.class);
 // Try reading it in as a text file RDD
 List<String> expected = Arrays.asList("1", "2", "3", "4");
 JavaRDD<String> readRDD = sc.textFile(outputDir);
 assertEquals(expected, readRDD.collect());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void textFilesCompressed() throws IOException {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 rdd.saveAsTextFile(outputDir, DefaultCodec.class);
 // Try reading it in as a text file RDD
 List<String> expected = Arrays.asList("1", "2", "3", "4");
 JavaRDD<String> readRDD = sc.textFile(outputDir);
 assertEquals(expected, readRDD.collect());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void textFiles() throws IOException {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 rdd.saveAsTextFile(outputDir);
 // Read the plain text file and check it's OK
 File outputFile = new File(outputDir, "part-00000");
 String content = Files.toString(outputFile, StandardCharsets.UTF_8);
 assertEquals("1\n2\n3\n4\n", content);
 // Also try reading it in as a text file RDD
 List<String> expected = Arrays.asList("1", "2", "3", "4");
 JavaRDD<String> readRDD = sc.textFile(outputDir);
 assertEquals(expected, readRDD.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void textFiles() throws IOException {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 rdd.saveAsTextFile(outputDir);
 // Read the plain text file and check it's OK
 File outputFile = new File(outputDir, "part-00000");
 String content = Files.toString(outputFile, StandardCharsets.UTF_8);
 assertEquals("1\n2\n3\n4\n", content);
 // Also try reading it in as a text file RDD
 List<String> expected = Arrays.asList("1", "2", "3", "4");
 JavaRDD<String> readRDD = sc.textFile(outputDir);
 assertEquals(expected, readRDD.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void textFiles() throws IOException {
 String outputDir = new File(tempDir, "output").getAbsolutePath();
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 rdd.saveAsTextFile(outputDir);
 // Read the plain text file and check it's OK
 File outputFile = new File(outputDir, "part-00000");
 String content = Files.toString(outputFile, StandardCharsets.UTF_8);
 assertEquals("1\n2\n3\n4\n", content);
 // Also try reading it in as a text file RDD
 List<String> expected = Arrays.asList("1", "2", "3", "4");
 JavaRDD<String> readRDD = sc.textFile(outputDir);
 assertEquals(expected, readRDD.collect());
}

代码示例来源:origin: deeplearning4j/dl4j-examples

JavaRDD<String> stringData = sc.textFile(filename);

代码示例来源:origin: deeplearning4j/dl4j-examples

JavaRDD<List<Writable>> customerInfo = sc.textFile(customerInfoPath).map(new StringToWritablesFunction(rr));
JavaRDD<List<Writable>> purchaseInfo = sc.textFile(purchaseInfoPath).map(new StringToWritablesFunction(rr));

代码示例来源:origin: deeplearning4j/dl4j-examples

JavaRDD<String> stringData = sc.textFile(path);

代码示例来源:origin: deeplearning4j/dl4j-examples

JavaRDD<String> stringData = sc.textFile(directory);

代码示例来源:origin: deeplearning4j/dl4j-examples

JavaRDD<String> logLines = sc.textFile(EXTRACTED_PATH);

相关文章

微信公众号

最新文章

更多