org.apache.spark.api.java.JavaPairRDD.cache()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(8.6k)|赞(0)|评价(0)|浏览(207)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.cache()方法的一些代码示例,展示了JavaPairRDD.cache()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.cache()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:cache

JavaPairRDD.cache介绍

暂无

代码示例

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void binaryFilesCaching() throws Exception {
 // Reusing the wholeText files example
 byte[] content1 = "spark is easy to use.\n".getBytes(StandardCharsets.UTF_8);
 String tempDirName = tempDir.getAbsolutePath();
 File file1 = new File(tempDirName + "/part-00000");
 FileOutputStream fos1 = new FileOutputStream(file1);
 FileChannel channel1 = fos1.getChannel();
 ByteBuffer bbuf = ByteBuffer.wrap(content1);
 channel1.write(bbuf);
 channel1.close();
 JavaPairRDD<String, PortableDataStream> readRDD = sc.binaryFiles(tempDirName).cache();
 readRDD.foreach(pair -> pair._2().toArray()); // force the file to read
 List<Tuple2<String, PortableDataStream>> result = readRDD.collect();
 for (Tuple2<String, PortableDataStream> res : result) {
  assertArrayEquals(content1, res._2().toArray());
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

/**
 * Cache the result RDD, since K-Means is an iterative machine learning 
 * algorithm and the result will be used many times
 *
 * @param wikiData, a featureized data
 * @param context a Java spark context object
 * @return JavaPairRDD<String, Vector>, where K is <project_code> + " " + <page_title>
 * and V is a Vector of features
 *
 */
static JavaPairRDD<String, Vector> getFeatureizedData(
    String wikiData, 
    JavaSparkContext context) {
  //
  JavaPairRDD<String, Vector> data = context.textFile(wikiData).mapToPair(
      new PairFunction<String, String, Vector>() {
    @Override
    public Tuple2<String, Vector> call(String in) throws Exception {
      // in: <key><#><feature_1><,><feature_2><,>...<,><feature_24>
      String[] parts = StringUtils.split(in, "#");
      return new Tuple2<String, Vector>(parts[0], Util.buildVector(parts[1], ","));
    }
  }).cache();
  return data;
}

代码示例来源:origin: org.rcsb/mmtf-spark

/**
 * Cache the underlying RDD.
 * @return the cached {@link StructureDataRDD}
 */
public StructureDataRDD cache() {
  javaPairRdd.cache();
  return this;
}

代码示例来源:origin: org.rcsb/mmtf-spark

/**
 * Cache the data. Good if the user wants to produce multiple analyses off the same
 * data.
 * @return the cached data object.
 */
public SegmentDataRDD cache() {
  return new SegmentDataRDD(segmentRDD.cache());
}

代码示例来源:origin: com.davidbracewell/mango

@Override
public MPairStream<T, U> cache() {
 return new SparkPairStream<>(rdd.cache());
}

代码示例来源:origin: ypriverol/spark-java8

/**
 * Cache the result RDD, since K-Means is an iterative machine learning
 * algorithm and the result will be used many times
 *
 * @param wikiData, a featureized data
 * @param context a Java spark context object
 * @return JavaPairRDD<String, Vector>, where K is <project_code> + " " + <page_title>
 * and V is a Vector of features
 *
 */
static JavaPairRDD<String, Vector> getFeatureizedData(
    String wikiData,
    JavaSparkContext context) {
  //
  return context.textFile(wikiData).mapToPair(
      (PairFunction<String, String, Vector>) in -> {
        // in: <key><#><feature_1><,><feature_2><,>...<,><feature_24>
        String[] parts = StringUtils.split(in, "#");
        return new Tuple2<>(parts[0], Util.buildVector(parts[1], ","));
      }).cache();
}

代码示例来源:origin: edu.usc.ir/age-predictor-cli

}).cache();
       return (t != null);
     }).cache();
samples.unpersist();
JavaPairRDD<String, String> correct = validSamples.filter(new EvaluateSample(model)).cache();
    return i1 + i2;
  }).cache();
    return i1 + i2;
  }).cache();

代码示例来源:origin: unchartedsoftware/ensemble-clustering

instances.cache();

代码示例来源:origin: Graphify/graphify

String[] parts = SPACES.split(s);
  return new Tuple2<>(parts[0], parts[1]);
}).distinct().groupByKey().cache();

代码示例来源:origin: scipr-lab/dizk

long numVariables = oneFullAssignment.cache().count();
config.endLog("[gaussianFitApp] oneFullAssignment");
constraints.A().cache().count();
constraints.B().cache().count();
long totalNumConstraints = constraints.C().cache().count();
config.endLog("[gaussianFitApp] constraints generation");
System.out.println("[numconstraints: " + totalNumConstraints + "]");

代码示例来源:origin: scipr-lab/dizk

long numVariables2 = oneFullAssignment.cache().count();
config.endLog("[matmulApp] oneFullAssignment");
constraints.A().cache().count();
constraints.B().cache().count();
long totalNumConstraints = constraints.C().cache().count();
config.endLog("[matmulApp] constraints generation");
System.out.println("[numconstraints: " + totalNumConstraints + "]");

代码示例来源:origin: unchartedsoftware/ensemble-clustering

rdd.getRDD().cache();

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法