org.apache.spark.api.java.JavaRDD.persist()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(10.9k)|赞(0)|评价(0)|浏览(155)

本文整理了Java中org.apache.spark.api.java.JavaRDD.persist()方法的一些代码示例,展示了JavaRDD.persist()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.persist()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:persist

JavaRDD.persist介绍

暂无

代码示例

代码示例来源:origin: org.apache.spark/spark-core_2.11

@SuppressWarnings("unchecked")
@Test
public void persist() {
 JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
 doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY());
 assertEquals(20, doubleRDD.sum(), 0.1);
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
 pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY());
 assertEquals("a", pairRDD.first()._2());
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 rdd = rdd.persist(StorageLevel.DISK_ONLY());
 assertEquals(1, rdd.first().intValue());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@SuppressWarnings("unchecked")
@Test
public void persist() {
 JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
 doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY());
 assertEquals(20, doubleRDD.sum(), 0.1);
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
 pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY());
 assertEquals("a", pairRDD.first()._2());
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 rdd = rdd.persist(StorageLevel.DISK_ONLY());
 assertEquals(1, rdd.first().intValue());
}

代码示例来源:origin: org.apache.spark/spark-core

@SuppressWarnings("unchecked")
@Test
public void persist() {
 JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
 doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY());
 assertEquals(20, doubleRDD.sum(), 0.1);
 List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<>(1, "a"),
  new Tuple2<>(2, "aa"),
  new Tuple2<>(3, "aaa")
 );
 JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
 pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY());
 assertEquals("a", pairRDD.first()._2());
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 rdd = rdd.persist(StorageLevel.DISK_ONLY());
 assertEquals(1, rdd.first().intValue());
}

代码示例来源:origin: DataSystemsLab/GeoSpark

/**
 * Analyze.
 *
 * @param newLevel the new level
 * @return true, if successful
 */
public boolean analyze(StorageLevel newLevel)
{
  this.rawSpatialRDD = this.rawSpatialRDD.persist(newLevel);
  this.analyze();
  return true;
}

代码示例来源:origin: org.datasyslab/geospark

/**
 * Analyze.
 *
 * @param newLevel the new level
 * @return true, if successful
 */
public boolean analyze(StorageLevel newLevel)
{
  this.rawSpatialRDD = this.rawSpatialRDD.persist(newLevel);
  this.analyze();
  return true;
}

代码示例来源:origin: uber/marmaray

private JavaRDD<HoodieRecord<HoodieRecordPayload>> dedupRecords(@NonNull final HoodieWriteClientWrapper writeClient,
  @NonNull final JavaRDD<HoodieRecord<HoodieRecordPayload>> hoodieRecords) {
  return writeClient.filterExists(hoodieRecords).persist(StorageLevel.DISK_ONLY());
}

代码示例来源:origin: org.datasyslab/geospark

/**
 * Test spatial knn query.
 *
 * @throws Exception the exception
 */
public static void testSpatialKnnQuery()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
  objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    List<Point> result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, false);
    assert result.size() > -1;
  }
}

代码示例来源:origin: DataSystemsLab/GeoSpark

/**
 * Test spatial knn query.
 *
 * @throws Exception the exception
 */
public static void testSpatialKnnQuery()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
  objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    List<Point> result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, false);
    assert result.size() > -1;
  }
}

代码示例来源:origin: DataSystemsLab/GeoSpark

/**
 * Test spatial knn query using index.
 *
 * @throws Exception the exception
 */
public static void testSpatialKnnQueryUsingIndex()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
  objectRDD.buildIndex(PointRDDIndexType, false);
  objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    List<Point> result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, true);
    assert result.size() > -1;
  }
}

代码示例来源:origin: org.datasyslab/geospark

/**
 * Test spatial knn query using index.
 *
 * @throws Exception the exception
 */
public static void testSpatialKnnQueryUsingIndex()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
  objectRDD.buildIndex(PointRDDIndexType, false);
  objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    List<Point> result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, true);
    assert result.size() > -1;
  }
}

代码示例来源:origin: DataSystemsLab/GeoSpark

/**
 * Test spatial range query.
 *
 * @throws Exception the exception
 */
public static void testSpatialRangeQuery()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
  objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count();
    assert resultSize > -1;
  }
}

代码示例来源:origin: DataSystemsLab/GeoSpark

/**
 * Test CRS transformation spatial range query.
 *
 * @throws Exception the exception
 */
public static void testCRSTransformationSpatialRangeQuery()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005");
  objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count();
    assert resultSize > -1;
  }
}

代码示例来源:origin: org.datasyslab/geospark

/**
 * Test CRS transformation spatial range query.
 *
 * @throws Exception the exception
 */
public static void testCRSTransformationSpatialRangeQuery()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005");
  objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count();
    assert resultSize > -1;
  }
}

代码示例来源:origin: org.datasyslab/geospark

/**
 * Test spatial range query.
 *
 * @throws Exception the exception
 */
public static void testSpatialRangeQuery()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
  objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count();
    assert resultSize > -1;
  }
}

代码示例来源:origin: uber/hudi

private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD,
  HoodieTable<T> table, String commitTime) {
 // Update the index back
 JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
 // Trigger the insert and collect statuses
 statuses = statuses.persist(config.getWriteStatusStorageLevel());
 commitOnAutoCommit(commitTime, statuses,
   new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true)
     .getCommitActionType());
 return statuses;
}

代码示例来源:origin: DataSystemsLab/GeoSpark

/**
 * Test spatial range query using index.
 *
 * @throws Exception the exception
 */
public static void testSpatialRangeQueryUsingIndex()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
  objectRDD.buildIndex(PointRDDIndexType, false);
  objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count();
    assert resultSize > -1;
  }
}

代码示例来源:origin: DataSystemsLab/GeoSpark

/**
 * Test CRS transformation spatial range query using index.
 *
 * @throws Exception the exception
 */
public static void testCRSTransformationSpatialRangeQueryUsingIndex()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005");
  objectRDD.buildIndex(PointRDDIndexType, false);
  objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count();
    assert resultSize > -1;
  }
}

代码示例来源:origin: com.uber.hoodie/hoodie-client

private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD,
  HoodieTable<T> table, String commitTime) {
 // Update the index back
 JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
 // Trigger the insert and collect statuses
 statuses = statuses.persist(config.getWriteStatusStorageLevel());
 commitOnAutoCommit(commitTime, statuses,
   new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true)
     .getCommitActionType());
 return statuses;
}

代码示例来源:origin: org.datasyslab/geospark

/**
 * Test CRS transformation spatial range query using index.
 *
 * @throws Exception the exception
 */
public static void testCRSTransformationSpatialRangeQueryUsingIndex()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005");
  objectRDD.buildIndex(PointRDDIndexType, false);
  objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count();
    assert resultSize > -1;
  }
}

代码示例来源:origin: org.datasyslab/geospark

/**
 * Test spatial range query using index.
 *
 * @throws Exception the exception
 */
public static void testSpatialRangeQueryUsingIndex()
    throws Exception
{
  objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
  objectRDD.buildIndex(PointRDDIndexType, false);
  objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
  for (int i = 0; i < eachQueryLoopTimes; i++) {
    long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count();
    assert resultSize > -1;
  }
}

相关文章

微信公众号

最新文章

更多