本文整理了Java中org.apache.spark.SparkContext.textFile()
方法的一些代码示例,展示了SparkContext.textFile()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。SparkContext.textFile()
方法的具体详情如下:
包路径:org.apache.spark.SparkContext
类名称:SparkContext
方法名:textFile
暂无
代码示例来源:origin: apache/incubator-nemo
/**
* Constructor.
*
* @param sparkContext the spark context.
* @param inputPath the path of the target text file.
* @param numPartitions the number of partitions.
*/
public SparkTextFileBoundedSourceVertex(final SparkContext sparkContext,
final String inputPath,
final int numPartitions) {
super();
this.readables = new ArrayList<>();
final Partition[] partitions = sparkContext.textFile(inputPath, numPartitions).getPartitions();
for (int i = 0; i < partitions.length; i++) {
readables.add(new SparkTextFileBoundedSourceReadable(
partitions[i],
sparkContext.getConf(),
i,
inputPath,
numPartitions));
}
}
代码示例来源:origin: uber/marmaray
@Override
public JavaRDD<AvroPayload> getData(@NonNull final FileWorkUnitCalculator.FileWorkUnitCalculatorResult result) {
Preconditions.checkState(result.hasWorkUnits(), "no work to do: " + this.conf.getDirectory());
// todo: support more types
Preconditions.checkState(this.conf.getType().equals("json"), "only json files supported");
try {
final FileSystem fs = this.conf.getFileSystem();
final String filesToRead = result.getWorkUnits().stream()
.map(LocatedFileStatus::getPath)
.map(Path::toString)
.collect(Collectors.joining(","));
final RDD<String> fileRows = this.jsc.sc().textFile(filesToRead, 1);
return this.converter.map(fileRows.toJavaRDD()).getData();
} catch (IOException e) {
throw new JobRuntimeException("Error getting files", e);
}
}
}
代码示例来源:origin: Stratio/deep-spark
private RDD<Cells> createRDDFromFilePath(String filePath, TextFileDataTable textFileDataTable) {
RDD<String> result = this.sc().textFile(filePath.toString(), 1);
JavaRDD<Cells> resultCells = result.toJavaRDD().map(new MapSchemaFromLines(textFileDataTable));
return resultCells.rdd();
}
代码示例来源:origin: com.stratio.deep/deep-core
private RDD<Cells> createRDDFromFilePath(String filePath, TextFileDataTable textFileDataTable) {
RDD<String> result = this.sc().textFile(filePath.toString(), 1);
JavaRDD<Cells> resultCells = result.toJavaRDD().map(new MapSchemaFromLines(textFileDataTable));
return resultCells.rdd();
}
代码示例来源:origin: apache/incubator-nemo
@Override
protected Iterator<String> initializeIterator() {
// for setting up the same environment in the executors.
final SparkContext sparkContext = SparkContext.getOrCreate(sparkConf);
// Spark does lazy evaluation: it doesn't load the full data in rdd, but only the partition it is asked for.
final RDD<String> rdd = sparkContext.textFile(inputPath, numPartitions);
final Iterable<String> iterable = () -> JavaConverters.asJavaIteratorConverter(
rdd.iterator(rdd.getPartitions()[partitionIndex], TaskContext$.MODULE$.empty())).asJava();
return iterable.iterator();
}
}
代码示例来源:origin: edu.usc.ir/age-predictor-cli
tokenizer, featureGenerators);
JavaRDD<String> data = spark.sparkContext().textFile(dataIn, 48).toJavaRDD()
.cache();
代码示例来源:origin: Stratio/deep-spark
@Test
public void createHDFSRDDTest() throws Exception {
deepSparkContext = createDeepSparkContext();
DeepSparkContext deepSparkContextSpy = PowerMockito.spy(deepSparkContext);
SQLContext sqlContext = mock(SQLContext.class);
Whitebox.setInternalState(deepSparkContextSpy, "sc", sparkContext);
Whitebox.setInternalState(deepSparkContextSpy, "sqlContext", sqlContext);
RDD<String> rdd = mock(RDD.class);
JavaRDD<String> javaRdd = mock(JavaRDD.class);
when(deepSparkContextSpy.sc().textFile(anyString(), anyInt())).thenReturn(rdd);
doReturn(javaRdd).when(deepSparkContextSpy).textFile(anyString());
when(rdd.toJavaRDD()).thenReturn(javaRdd);
when(rdd.toJavaRDD().map(any(Function.class))).thenReturn(singleRdd);
ExtractorConfig<Cells> config = createHDFSDeepJobConfig();
RDD rddReturn = deepSparkContextSpy.createHDFSRDD(config);
verify(deepSparkContextSpy.sc(), times(1)).textFile(anyString(), anyInt());
verify(javaRdd, times(1)).map(any(Function.class));
}
代码示例来源:origin: Stratio/deep-spark
@Test
public void createS3RDDTest() throws Exception {
deepSparkContext = createDeepSparkContext();
Configuration hadoopConf = mock(Configuration.class);
when(sparkContext.hadoopConfiguration()).thenReturn(hadoopConf);
DeepSparkContext deepSparkContextSpy = PowerMockito.spy(deepSparkContext);
SQLContext sqlContext = mock(SQLContext.class);
Whitebox.setInternalState(deepSparkContextSpy, "sc", sparkContext);
Whitebox.setInternalState(deepSparkContextSpy, "sqlContext", sqlContext);
RDD<String> rdd = mock(RDD.class);
JavaRDD<String> javaRDD = mock(JavaRDD.class);
when(deepSparkContextSpy.sc().textFile(anyString(), anyInt())).thenReturn(rdd);
doReturn(javaRDD).when(deepSparkContextSpy).textFile(anyString());
when(rdd.toJavaRDD()).thenReturn(javaRDD);
when(rdd.toJavaRDD().map(any(Function.class))).thenReturn(singleRdd);
ExtractorConfig<Cells> config = createS3DeepJobConfig();
deepSparkContextSpy.createS3RDD(config);
verify(hadoopConf, times(1)).set("fs.s3n.awsAccessKeyId", config.getString(ExtractorConstants.S3_ACCESS_KEY_ID));
verify(hadoopConf, times(1)).set("fs.s3n.awsSecretAccessKey", config.getString(ExtractorConstants.S3_SECRET_ACCESS_KEY));
verify(deepSparkContextSpy.sc(), times(1)).textFile(anyString(), anyInt());
verify(javaRDD, times(1)).map(any(Function.class));
}
代码示例来源:origin: apache/incubator-nemo
/**
* Static method to create a JavaRDD object from an text file.
*
* @param sparkContext the spark context containing configurations.
* @param minPartitions the minimum number of partitions.
* @param inputPath the path of the input text file.
* @return the new JavaRDD object
*/
public static JavaRDD<String> of(final SparkContext sparkContext,
final int minPartitions,
final String inputPath) {
final DAGBuilder<IRVertex, IREdge> builder = new DAGBuilder<>();
final org.apache.spark.rdd.RDD<String> textRdd = sparkContext.textFile(inputPath, minPartitions);
final int numPartitions = textRdd.getNumPartitions();
final IRVertex textSourceVertex = new SparkTextFileBoundedSourceVertex(sparkContext, inputPath, numPartitions);
textSourceVertex.setProperty(ParallelismProperty.of(numPartitions));
builder.addVertex(textSourceVertex);
return new JavaRDD<>(textRdd, sparkContext, builder.buildWithoutSourceSinkCheck(), textSourceVertex);
}
代码示例来源:origin: edu.usc.ir/age-predictor-cli
null : new AgeClassifyModelWrapper(classifyModel);
JavaRDD<String> data = spark.sparkContext().textFile(dataIn,8).toJavaRDD().cache();
代码示例来源:origin: edu.usc.ir/age-predictor-cli
int iterations = getIterations(params);
JavaRDD<String> data = spark.sparkContext().textFile(eventDir, 24).toJavaRDD()
.cache();
内容来源于网络,如有侵权,请联系作者删除!