本文整理了Java中org.apache.spark.api.java.JavaPairRDD.cache()
方法的一些代码示例,展示了JavaPairRDD.cache()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.cache()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:cache
暂无
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void binaryFilesCaching() throws Exception {
// Reusing the wholeText files example
byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
String tempDirName = tempDir.getAbsolutePath();
File file1 = new File(tempDirName + "/part-00000");
FileOutputStream fos1 = new FileOutputStream(file1);
FileChannel channel1 = fos1.getChannel();
ByteBuffer bbuf = ByteBuffer.wrap(content1);
channel1.write(bbuf);
channel1.close();
JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
for (Tuple2<String, PortableDataStream> res : result) {
assertArrayEquals(content1, res._2().toArray());
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void binaryFilesCaching() throws Exception {
// Reusing the wholeText files example
byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
String tempDirName = tempDir.getAbsolutePath();
File file1 = new File(tempDirName + "/part-00000");
FileOutputStream fos1 = new FileOutputStream(file1);
FileChannel channel1 = fos1.getChannel();
ByteBuffer bbuf = ByteBuffer.wrap(content1);
channel1.write(bbuf);
channel1.close();
JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
for (Tuple2<String, PortableDataStream> res : result) {
assertArrayEquals(content1, res._2().toArray());
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void binaryFilesCaching() throws Exception {
// Reusing the wholeText files example
byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
String tempDirName = tempDir.getAbsolutePath();
File file1 = new File(tempDirName + "/part-00000");
FileOutputStream fos1 = new FileOutputStream(file1);
FileChannel channel1 = fos1.getChannel();
ByteBuffer bbuf = ByteBuffer.wrap(content1);
channel1.write(bbuf);
channel1.close();
JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
for (Tuple2<String, PortableDataStream> res : result) {
assertArrayEquals(content1, res._2().toArray());
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void map() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
doubles.collect();
JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
.cache();
pairs.collect();
JavaRDD<String> strings = rdd.map(Object::toString).cache();
strings.collect();
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
/**
* Cache the result RDD, since K-Means is an iterative machine learning
* algorithm and the result will be used many times
*
* @param wikiData, a featureized data
* @param context a Java spark context object
* @return JavaPairRDD<String, Vector>, where K is <project_code> + " " + <page_title>
* and V is a Vector of features
*
*/
static JavaPairRDD<String, Vector> getFeatureizedData(
String wikiData,
JavaSparkContext context) {
//
JavaPairRDD<String, Vector> data = context.textFile(wikiData).mapToPair(
new PairFunction<String, String, Vector>() {
@Override
public Tuple2<String, Vector> call(String in) throws Exception {
// in: <key><#><feature_1><,><feature_2><,>...<,><feature_24>
String[] parts = StringUtils.split(in, "#");
return new Tuple2<String, Vector>(parts[0], Util.buildVector(parts[1], ","));
}
}).cache();
return data;
}
代码示例来源:origin: org.rcsb/mmtf-spark
/**
* Cache the underlying RDD.
* @return the cached {@link StructureDataRDD}
*/
public StructureDataRDD cache() {
javaPairRdd.cache();
return this;
}
代码示例来源:origin: org.rcsb/mmtf-spark
/**
* Cache the data. Good if the user wants to produce multiple analyses off the same
* data.
* @return the cached data object.
*/
public SegmentDataRDD cache() {
return new SegmentDataRDD(segmentRDD.cache());
}
代码示例来源:origin: com.davidbracewell/mango
@Override
public MPairStream<T, U> cache() {
return new SparkPairStream<>(rdd.cache());
}
代码示例来源:origin: ypriverol/spark-java8
/**
* Cache the result RDD, since K-Means is an iterative machine learning
* algorithm and the result will be used many times
*
* @param wikiData, a featureized data
* @param context a Java spark context object
* @return JavaPairRDD<String, Vector>, where K is <project_code> + " " + <page_title>
* and V is a Vector of features
*
*/
static JavaPairRDD<String, Vector> getFeatureizedData(
String wikiData,
JavaSparkContext context) {
//
return context.textFile(wikiData).mapToPair(
(PairFunction<String, String, Vector>) in -> {
// in: <key><#><feature_1><,><feature_2><,>...<,><feature_24>
String[] parts = StringUtils.split(in, "#");
return new Tuple2<>(parts[0], Util.buildVector(parts[1], ","));
}).cache();
}
代码示例来源:origin: edu.usc.ir/age-predictor-cli
}).cache();
return (t != null);
}).cache();
samples.unpersist();
JavaPairRDD<String, String> correct = validSamples.filter(new EvaluateSample(model)).cache();
return i1 + i2;
}).cache();
return i1 + i2;
}).cache();
代码示例来源:origin: unchartedsoftware/ensemble-clustering
instances.cache();
代码示例来源:origin: Graphify/graphify
String[] parts = SPACES.split(s);
return new Tuple2<>(parts[0], parts[1]);
}).distinct().groupByKey().cache();
代码示例来源:origin: scipr-lab/dizk
long numVariables = oneFullAssignment.cache().count();
config.endLog("[gaussianFitApp] oneFullAssignment");
constraints.A().cache().count();
constraints.B().cache().count();
long totalNumConstraints = constraints.C().cache().count();
config.endLog("[gaussianFitApp] constraints generation");
System.out.println("[numconstraints: " + totalNumConstraints + "]");
代码示例来源:origin: scipr-lab/dizk
long numVariables2 = oneFullAssignment.cache().count();
config.endLog("[matmulApp] oneFullAssignment");
constraints.A().cache().count();
constraints.B().cache().count();
long totalNumConstraints = constraints.C().cache().count();
config.endLog("[matmulApp] constraints generation");
System.out.println("[numconstraints: " + totalNumConstraints + "]");
代码示例来源:origin: unchartedsoftware/ensemble-clustering
rdd.getRDD().cache();
内容来源于网络,如有侵权,请联系作者删除!