本文整理了Java中org.apache.spark.api.java.JavaRDD.count()
方法的一些代码示例,展示了JavaRDD.count()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.count()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:count
暂无
代码示例来源:origin: OryxProject/oryx
static JavaRDD<Vector> fetchSampleData(JavaRDD<Vector> evalData) {
long count = evalData.count();
if (count > MAX_SAMPLE_SIZE) {
return evalData.sample(false, (double) MAX_SAMPLE_SIZE / count);
}
return evalData;
}
代码示例来源:origin: OryxProject/oryx
@Override
public double evaluate(JavaSparkContext sparkContext,
PMML model,
Path modelParentPath,
JavaRDD<String> testData,
JavaRDD<String> trainData) {
long testDataCount = testData.count();
testCounts.add((int) testDataCount);
log.info("Returning eval {}", testDataCount);
return testDataCount;
}
代码示例来源:origin: OryxProject/oryx
static double accuracy(DecisionForest forest, JavaRDD<Example> examples) {
long total = examples.count();
if (total == 0) {
return 0.0;
}
long correct = examples.filter(example -> {
CategoricalPrediction prediction = (CategoricalPrediction) forest.predict(example);
CategoricalFeature target = (CategoricalFeature) example.getTarget();
return prediction.getMostProbableCategoryEncoding() == target.getEncoding();
}).count();
return (double) correct / total;
}
代码示例来源:origin: OryxProject/oryx
@Override
public PMML buildModel(JavaSparkContext sparkContext,
JavaRDD<String> trainData,
List<?> hyperParameters,
Path candidatePath) {
// If lists are unequal at this point, there must have been an empty test set
// which yielded no call to evaluate(). Fill in the blank
while (trainCounts.size() > testCounts.size()) {
testCounts.add(0);
}
trainCounts.add((int) trainData.count());
return PMMLUtilsTest.buildDummyModel();
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void sample() {
List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
JavaRDD<Integer> rdd = sc.parallelize(ints);
// the seeds here are "magic" to make this work out nicely
JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 8);
assertEquals(2, sample20.count());
JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 2);
assertEquals(2, sample20WithoutReplacement.count());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void sample() {
List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
JavaRDD<Integer> rdd = sc.parallelize(ints);
// the seeds here are "magic" to make this work out nicely
JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 8);
assertEquals(2, sample20.count());
JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 2);
assertEquals(2, sample20WithoutReplacement.count());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void sample() {
List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
JavaRDD<Integer> rdd = sc.parallelize(ints);
// the seeds here are "magic" to make this work out nicely
JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 8);
assertEquals(2, sample20.count());
JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 2);
assertEquals(2, sample20WithoutReplacement.count());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void emptyRDD() {
JavaRDD<String> rdd = sc.emptyRDD();
assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void emptyRDD() {
JavaRDD<String> rdd = sc.emptyRDD();
assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void emptyRDD() {
JavaRDD<String> rdd = sc.emptyRDD();
assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void testJavaJdbcRDD() throws Exception {
JavaRDD<Integer> rdd = JdbcRDD.create(
sc,
() -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
"SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
1, 100, 1,
r -> r.getInt(1)
).cache();
Assert.assertEquals(100, rdd.count());
Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void testJavaJdbcRDD() throws Exception {
JavaRDD<Integer> rdd = JdbcRDD.create(
sc,
() -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
"SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
1, 100, 1,
r -> r.getInt(1)
).cache();
Assert.assertEquals(100, rdd.count());
Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void testJavaJdbcRDD() throws Exception {
JavaRDD<Integer> rdd = JdbcRDD.create(
sc,
() -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
"SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
1, 100, 1,
r -> r.getInt(1)
).cache();
Assert.assertEquals(100, rdd.count());
Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
}
}
代码示例来源:origin: OryxProject/oryx
@Test
public void testFetchSampleEvalData() {
JavaRDD<Vector> evalData = SilhouetteCoefficient.fetchSampleData(getRddOfVectors());
assertEquals(6, evalData.count());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void checkpointAndComputation() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
sc.setCheckpointDir(tempDir.getAbsolutePath());
assertFalse(rdd.isCheckpointed());
rdd.checkpoint();
rdd.count(); // Forces the DAG to cause a checkpoint
assertTrue(rdd.isCheckpointed());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), rdd.collect());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void checkpointAndComputation() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
sc.setCheckpointDir(tempDir.getAbsolutePath());
assertFalse(rdd.isCheckpointed());
rdd.checkpoint();
rdd.count(); // Forces the DAG to cause a checkpoint
assertTrue(rdd.isCheckpointed());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), rdd.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void checkpointAndComputation() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
sc.setCheckpointDir(tempDir.getAbsolutePath());
assertFalse(rdd.isCheckpointed());
rdd.checkpoint();
rdd.count(); // Forces the DAG to cause a checkpoint
assertTrue(rdd.isCheckpointed());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), rdd.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void checkpointAndRestore() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
sc.setCheckpointDir(tempDir.getAbsolutePath());
assertFalse(rdd.isCheckpointed());
rdd.checkpoint();
rdd.count(); // Forces the DAG to cause a checkpoint
assertTrue(rdd.isCheckpointed());
assertTrue(rdd.getCheckpointFile().isPresent());
JavaRDD<Integer> recovered = sc.checkpointFile(rdd.getCheckpointFile().get());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void checkpointAndRestore() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
sc.setCheckpointDir(tempDir.getAbsolutePath());
assertFalse(rdd.isCheckpointed());
rdd.checkpoint();
rdd.count(); // Forces the DAG to cause a checkpoint
assertTrue(rdd.isCheckpointed());
assertTrue(rdd.getCheckpointFile().isPresent());
JavaRDD<Integer> recovered = sc.checkpointFile(rdd.getCheckpointFile().get());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void checkpointAndRestore() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
sc.setCheckpointDir(tempDir.getAbsolutePath());
assertFalse(rdd.isCheckpointed());
rdd.checkpoint();
rdd.count(); // Forces the DAG to cause a checkpoint
assertTrue(rdd.isCheckpointed());
assertTrue(rdd.getCheckpointFile().isPresent());
JavaRDD<Integer> recovered = sc.checkpointFile(rdd.getCheckpointFile().get());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect());
}
内容来源于网络,如有侵权,请联系作者删除!