本文整理了Java中org.apache.spark.api.java.JavaSparkContext.textFile()
方法的一些代码示例,展示了JavaSparkContext.textFile()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.textFile()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:textFile
暂无
代码示例来源:origin: stackoverflow.com
JavaSparkContext sc;
JavaRDD<String> data = sc.textFile("path/input.csv");
JavaSQLContext sqlContext = new JavaSQLContext(sc);
JavaRDD<Record> rdd_records = sc.textFile(data).map(
new Function<String, Record>() {
public Record call(String line) throws Exception {
// Here you can use JSON
// Gson gson = new Gson();
// gson.fromJson(line, Record.class);
String[] fields = line.split(",");
Record sd = new Record(fields[0], fields[1], fields[2].trim(), fields[3]);
return sd;
}
});
代码示例来源:origin: stackoverflow.com
/*** SimpleApp.java ***/
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.Function;
public class SimpleApp {
public static void main(String[] args) {
String logFile = "$YOUR_SPARK_HOME/README.md"; // Should be some file on your system
JavaSparkContext sc = new JavaSparkContext("local", "Simple App",
"$YOUR_SPARK_HOME", new String[]{"target/simple-project-1.0.jar"});
JavaRDD<String> logData = sc.textFile(logFile).cache();
long numAs = logData.filter(new Function<String, Boolean>() {
public Boolean call(String s) { return s.contains("a"); }
}).count();
long numBs = logData.filter(new Function<String, Boolean>() {
public Boolean call(String s) { return s.contains("b"); }
}).count();
System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
}
}
代码示例来源:origin: OryxProject/oryx
private static JavaPairRDD<String,float[]> readFeaturesRDD(JavaSparkContext sparkContext, Path path) {
log.info("Loading features RDD from {}", path);
JavaRDD<String> featureLines = sparkContext.textFile(path.toString());
return featureLines.mapToPair(line -> {
List<?> update = TextUtils.readJSON(line, List.class);
String key = update.get(0).toString();
float[] vector = TextUtils.convertViaJSON(update.get(1), float[].class);
return new Tuple2<>(key, vector);
});
}
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new Exception("Usage BasicFlatMap sparkMaster inputFile");
}
JavaSparkContext sc = new JavaSparkContext(
args[0], "basicflatmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> rdd = sc.textFile(args[1]);
JavaRDD<String> words = rdd.flatMap(
new FlatMapFunction<String, String>() { public Iterable<String> call(String x) {
return Arrays.asList(x.split(" "));
}});
Map<String, Long> result = words.countByValue();
for (Entry<String, Long> entry: result.entrySet()) {
System.out.println(entry.getKey() + ":" + entry.getValue());
}
}
}
代码示例来源:origin: databricks/learning-spark
public void run(String master, String csv1, String csv2) throws Exception {
JavaSparkContext sc = new JavaSparkContext(
master, "basicjoincsv", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> csvFile1 = sc.textFile(csv1);
JavaRDD<String> csvFile2 = sc.textFile(csv2);
JavaPairRDD<Integer, String[]> keyedRDD1 = csvFile1.mapToPair(new ParseLine());
JavaPairRDD<Integer, String[]> keyedRDD2 = csvFile1.mapToPair(new ParseLine());
JavaPairRDD<Integer, Tuple2<String[], String[]>> result = keyedRDD1.join(keyedRDD2);
List<Tuple2<Integer, Tuple2<String[], String[]>>> resultCollection = result.collect();
}
}
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new Exception("Usage KeyValueMapFilter sparkMaster inputFile");
}
String master = args[0];
String inputFile = args[1];
JavaSparkContext sc = new JavaSparkContext(
master, "KeyValueMapFilter", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> input = sc.textFile(inputFile);
PairFunction<String, String, String> keyData = new PairFunction<String, String, String>() {
@Override
public Tuple2<String, String> call(String x) {
return new Tuple2(x.split(" ")[0], x);
}
};
Function<Tuple2<String, String>, Boolean> longWordFilter = new Function<Tuple2<String, String>, Boolean>() {
@Override
public Boolean call(Tuple2<String, String> input) {
return (input._2().length() < 20);
}
};
JavaPairRDD<String, String> rdd = input.mapToPair(keyData);
JavaPairRDD<String, String> result = rdd.filter(longWordFilter);
Map<String, String> resultMap = result.collectAsMap();
for (Entry<String, String> entry : resultMap.entrySet()) {
System.out.println(entry.getKey() + ":" + entry.getValue());
}
}
}
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
String master = args[0];
JavaSparkContext sc = new JavaSparkContext(
master, "wordcount", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> rdd = sc.textFile(args[1]);
JavaPairRDD<String, Integer> counts = rdd.flatMap(
new FlatMapFunction<String, String>() {
public Iterable<String> call(String x) {
return Arrays.asList(x.split(" "));
}}).mapToPair(new PairFunction<String, String, Integer>(){
public Tuple2<String, Integer> call(String x){
return new Tuple2(x, 1);
}}).reduceByKey(new Function2<Integer, Integer, Integer>(){
public Integer call(Integer x, Integer y){ return x+y;}});
counts.saveAsTextFile(args[2]);
}
}
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
if (args.length != 3) {
throw new Exception("Usage BasicLoadJson [sparkMaster] [jsoninput] [jsonoutput]");
}
String master = args[0];
String fileName = args[1];
String outfile = args[2];
JavaSparkContext sc = new JavaSparkContext(
master, "basicloadjson", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<String> input = sc.textFile(fileName);
JavaRDD<Person> result = input.mapPartitions(new ParseJson()).filter(new LikesPandas());
JavaRDD<String> formatted = result.mapPartitions(new WriteJson());
formatted.saveAsTextFile(outfile);
}
}
代码示例来源:origin: databricks/learning-spark
JavaRDD<String> spam = sc.textFile("files/spam.txt");
JavaRDD<String> ham = sc.textFile("files/ham.txt");
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void textFilesCompressed() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsTextFile(outputDir, DefaultCodec.class);
// Try reading it in as a text file RDD
List<String> expected = Arrays.asList("1", "2", "3", "4");
JavaRDD<String> readRDD = sc.textFile(outputDir);
assertEquals(expected, readRDD.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void textFilesCompressed() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsTextFile(outputDir, DefaultCodec.class);
// Try reading it in as a text file RDD
List<String> expected = Arrays.asList("1", "2", "3", "4");
JavaRDD<String> readRDD = sc.textFile(outputDir);
assertEquals(expected, readRDD.collect());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void textFilesCompressed() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsTextFile(outputDir, DefaultCodec.class);
// Try reading it in as a text file RDD
List<String> expected = Arrays.asList("1", "2", "3", "4");
JavaRDD<String> readRDD = sc.textFile(outputDir);
assertEquals(expected, readRDD.collect());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void textFiles() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsTextFile(outputDir);
// Read the plain text file and check it's OK
File outputFile = new File(outputDir, "part-00000");
String content = Files.toString(outputFile, StandardCharsets.UTF_8);
assertEquals("1\n2\n3\n4\n", content);
// Also try reading it in as a text file RDD
List<String> expected = Arrays.asList("1", "2", "3", "4");
JavaRDD<String> readRDD = sc.textFile(outputDir);
assertEquals(expected, readRDD.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void textFiles() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsTextFile(outputDir);
// Read the plain text file and check it's OK
File outputFile = new File(outputDir, "part-00000");
String content = Files.toString(outputFile, StandardCharsets.UTF_8);
assertEquals("1\n2\n3\n4\n", content);
// Also try reading it in as a text file RDD
List<String> expected = Arrays.asList("1", "2", "3", "4");
JavaRDD<String> readRDD = sc.textFile(outputDir);
assertEquals(expected, readRDD.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void textFiles() throws IOException {
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsTextFile(outputDir);
// Read the plain text file and check it's OK
File outputFile = new File(outputDir, "part-00000");
String content = Files.toString(outputFile, StandardCharsets.UTF_8);
assertEquals("1\n2\n3\n4\n", content);
// Also try reading it in as a text file RDD
List<String> expected = Arrays.asList("1", "2", "3", "4");
JavaRDD<String> readRDD = sc.textFile(outputDir);
assertEquals(expected, readRDD.collect());
}
代码示例来源:origin: deeplearning4j/dl4j-examples
JavaRDD<String> stringData = sc.textFile(filename);
代码示例来源:origin: deeplearning4j/dl4j-examples
JavaRDD<List<Writable>> customerInfo = sc.textFile(customerInfoPath).map(new StringToWritablesFunction(rr));
JavaRDD<List<Writable>> purchaseInfo = sc.textFile(purchaseInfoPath).map(new StringToWritablesFunction(rr));
代码示例来源:origin: deeplearning4j/dl4j-examples
JavaRDD<String> stringData = sc.textFile(path);
代码示例来源:origin: deeplearning4j/dl4j-examples
JavaRDD<String> stringData = sc.textFile(directory);
代码示例来源:origin: deeplearning4j/dl4j-examples
JavaRDD<String> logLines = sc.textFile(EXTRACTED_PATH);
内容来源于网络,如有侵权,请联系作者删除!