本文整理了Java中org.apache.spark.api.java.JavaRDD.persist()
方法的一些代码示例,展示了JavaRDD.persist()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.persist()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:persist
暂无
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void persist() {
JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY());
assertEquals(20, doubleRDD.sum(), 0.1);
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY());
assertEquals("a", pairRDD.first()._2());
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
rdd = rdd.persist(StorageLevel.DISK_ONLY());
assertEquals(1, rdd.first().intValue());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void persist() {
JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY());
assertEquals(20, doubleRDD.sum(), 0.1);
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY());
assertEquals("a", pairRDD.first()._2());
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
rdd = rdd.persist(StorageLevel.DISK_ONLY());
assertEquals(1, rdd.first().intValue());
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void persist() {
JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
doubleRDD = doubleRDD.persist(StorageLevel.DISK_ONLY());
assertEquals(20, doubleRDD.sum(), 0.1);
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<>(1, "a"),
new Tuple2<>(2, "aa"),
new Tuple2<>(3, "aaa")
);
JavaPairRDD<Integer, String> pairRDD = sc.parallelizePairs(pairs);
pairRDD = pairRDD.persist(StorageLevel.DISK_ONLY());
assertEquals("a", pairRDD.first()._2());
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
rdd = rdd.persist(StorageLevel.DISK_ONLY());
assertEquals(1, rdd.first().intValue());
}
代码示例来源:origin: DataSystemsLab/GeoSpark
/**
* Analyze.
*
* @param newLevel the new level
* @return true, if successful
*/
public boolean analyze(StorageLevel newLevel)
{
this.rawSpatialRDD = this.rawSpatialRDD.persist(newLevel);
this.analyze();
return true;
}
代码示例来源:origin: org.datasyslab/geospark
/**
* Analyze.
*
* @param newLevel the new level
* @return true, if successful
*/
public boolean analyze(StorageLevel newLevel)
{
this.rawSpatialRDD = this.rawSpatialRDD.persist(newLevel);
this.analyze();
return true;
}
代码示例来源:origin: uber/marmaray
private JavaRDD<HoodieRecord<HoodieRecordPayload>> dedupRecords(@NonNull final HoodieWriteClientWrapper writeClient,
@NonNull final JavaRDD<HoodieRecord<HoodieRecordPayload>> hoodieRecords) {
return writeClient.filterExists(hoodieRecords).persist(StorageLevel.DISK_ONLY());
}
代码示例来源:origin: org.datasyslab/geospark
/**
* Test spatial knn query.
*
* @throws Exception the exception
*/
public static void testSpatialKnnQuery()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
List<Point> result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, false);
assert result.size() > -1;
}
}
代码示例来源:origin: DataSystemsLab/GeoSpark
/**
* Test spatial knn query.
*
* @throws Exception the exception
*/
public static void testSpatialKnnQuery()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
List<Point> result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, false);
assert result.size() > -1;
}
}
代码示例来源:origin: DataSystemsLab/GeoSpark
/**
* Test spatial knn query using index.
*
* @throws Exception the exception
*/
public static void testSpatialKnnQueryUsingIndex()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
objectRDD.buildIndex(PointRDDIndexType, false);
objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
List<Point> result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, true);
assert result.size() > -1;
}
}
代码示例来源:origin: org.datasyslab/geospark
/**
* Test spatial knn query using index.
*
* @throws Exception the exception
*/
public static void testSpatialKnnQueryUsingIndex()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
objectRDD.buildIndex(PointRDDIndexType, false);
objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
List<Point> result = KNNQuery.SpatialKnnQuery(objectRDD, kNNQueryPoint, 1000, true);
assert result.size() > -1;
}
}
代码示例来源:origin: DataSystemsLab/GeoSpark
/**
* Test spatial range query.
*
* @throws Exception the exception
*/
public static void testSpatialRangeQuery()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count();
assert resultSize > -1;
}
}
代码示例来源:origin: DataSystemsLab/GeoSpark
/**
* Test CRS transformation spatial range query.
*
* @throws Exception the exception
*/
public static void testCRSTransformationSpatialRangeQuery()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005");
objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count();
assert resultSize > -1;
}
}
代码示例来源:origin: org.datasyslab/geospark
/**
* Test CRS transformation spatial range query.
*
* @throws Exception the exception
*/
public static void testCRSTransformationSpatialRangeQuery()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005");
objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count();
assert resultSize > -1;
}
}
代码示例来源:origin: org.datasyslab/geospark
/**
* Test spatial range query.
*
* @throws Exception the exception
*/
public static void testSpatialRangeQuery()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
objectRDD.rawSpatialRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, false).count();
assert resultSize > -1;
}
}
代码示例来源:origin: uber/hudi
private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD,
HoodieTable<T> table, String commitTime) {
// Update the index back
JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
// Trigger the insert and collect statuses
statuses = statuses.persist(config.getWriteStatusStorageLevel());
commitOnAutoCommit(commitTime, statuses,
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true)
.getCommitActionType());
return statuses;
}
代码示例来源:origin: DataSystemsLab/GeoSpark
/**
* Test spatial range query using index.
*
* @throws Exception the exception
*/
public static void testSpatialRangeQueryUsingIndex()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
objectRDD.buildIndex(PointRDDIndexType, false);
objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count();
assert resultSize > -1;
}
}
代码示例来源:origin: DataSystemsLab/GeoSpark
/**
* Test CRS transformation spatial range query using index.
*
* @throws Exception the exception
*/
public static void testCRSTransformationSpatialRangeQueryUsingIndex()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005");
objectRDD.buildIndex(PointRDDIndexType, false);
objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count();
assert resultSize > -1;
}
}
代码示例来源:origin: com.uber.hoodie/hoodie-client
private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD,
HoodieTable<T> table, String commitTime) {
// Update the index back
JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
// Trigger the insert and collect statuses
statuses = statuses.persist(config.getWriteStatusStorageLevel());
commitOnAutoCommit(commitTime, statuses,
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true)
.getCommitActionType());
return statuses;
}
代码示例来源:origin: org.datasyslab/geospark
/**
* Test CRS transformation spatial range query using index.
*
* @throws Exception the exception
*/
public static void testCRSTransformationSpatialRangeQueryUsingIndex()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY(), "epsg:4326", "epsg:3005");
objectRDD.buildIndex(PointRDDIndexType, false);
objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count();
assert resultSize > -1;
}
}
代码示例来源:origin: org.datasyslab/geospark
/**
* Test spatial range query using index.
*
* @throws Exception the exception
*/
public static void testSpatialRangeQueryUsingIndex()
throws Exception
{
objectRDD = new PointRDD(sc, PointRDDInputLocation, PointRDDOffset, PointRDDSplitter, true, StorageLevel.MEMORY_ONLY());
objectRDD.buildIndex(PointRDDIndexType, false);
objectRDD.indexedRawRDD.persist(StorageLevel.MEMORY_ONLY());
for (int i = 0; i < eachQueryLoopTimes; i++) {
long resultSize = RangeQuery.SpatialRangeQuery(objectRDD, rangeQueryWindow, false, true).count();
assert resultSize > -1;
}
}
内容来源于网络,如有侵权,请联系作者删除!