org.apache.spark.api.java.JavaRDD.count()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(7.7k)|赞(0)|评价(0)|浏览(176)

本文整理了Java中org.apache.spark.api.java.JavaRDD.count()方法的一些代码示例,展示了JavaRDD.count()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.count()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:count

JavaRDD.count介绍

暂无

代码示例

代码示例来源:origin: OryxProject/oryx

static JavaRDD<Vector> fetchSampleData(JavaRDD<Vector> evalData) {
 long count = evalData.count();
 if (count > MAX_SAMPLE_SIZE) {
  return evalData.sample(false, (double) MAX_SAMPLE_SIZE / count);
 }
 return evalData;
}

代码示例来源:origin: OryxProject/oryx

@Override
public double evaluate(JavaSparkContext sparkContext,
            PMML model,
            Path modelParentPath,
            JavaRDD<String> testData,
            JavaRDD<String> trainData) {
 long testDataCount = testData.count();
 testCounts.add((int) testDataCount);
 log.info("Returning eval {}", testDataCount);
 return testDataCount;
}

代码示例来源:origin: OryxProject/oryx

static double accuracy(DecisionForest forest, JavaRDD<Example> examples) {
 long total = examples.count();
 if (total == 0) {
  return 0.0;
 }
 long correct = examples.filter(example -> {
   CategoricalPrediction prediction = (CategoricalPrediction) forest.predict(example);
   CategoricalFeature target = (CategoricalFeature) example.getTarget();
   return prediction.getMostProbableCategoryEncoding() == target.getEncoding();
  }).count();
 return (double) correct / total;
}

代码示例来源:origin: OryxProject/oryx

@Override
public PMML buildModel(JavaSparkContext sparkContext,
            JavaRDD<String> trainData,
            List<?> hyperParameters,
            Path candidatePath) {
 // If lists are unequal at this point, there must have been an empty test set
 // which yielded no call to evaluate(). Fill in the blank
 while (trainCounts.size() > testCounts.size()) {
  testCounts.add(0);
 }
 trainCounts.add((int) trainData.count());
 return PMMLUtilsTest.buildDummyModel();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void sample() {
 List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
 JavaRDD<Integer> rdd = sc.parallelize(ints);
 // the seeds here are "magic" to make this work out nicely
 JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 8);
 assertEquals(2, sample20.count());
 JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 2);
 assertEquals(2, sample20WithoutReplacement.count());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void sample() {
 List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
 JavaRDD<Integer> rdd = sc.parallelize(ints);
 // the seeds here are "magic" to make this work out nicely
 JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 8);
 assertEquals(2, sample20.count());
 JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 2);
 assertEquals(2, sample20WithoutReplacement.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void sample() {
 List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
 JavaRDD<Integer> rdd = sc.parallelize(ints);
 // the seeds here are "magic" to make this work out nicely
 JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 8);
 assertEquals(2, sample20.count());
 JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 2);
 assertEquals(2, sample20WithoutReplacement.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void emptyRDD() {
 JavaRDD<String> rdd = sc.emptyRDD();
 assertEquals("Empty RDD shouldn't have any values", 0, rdd.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
 public void testJavaJdbcRDD() throws Exception {
  JavaRDD<Integer> rdd = JdbcRDD.create(
   sc,
   () -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
   "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
   1, 100, 1,
   r -> r.getInt(1)
  ).cache();

  Assert.assertEquals(100, rdd.count());
  Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
 public void testJavaJdbcRDD() throws Exception {
  JavaRDD<Integer> rdd = JdbcRDD.create(
   sc,
   () -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
   "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
   1, 100, 1,
   r -> r.getInt(1)
  ).cache();

  Assert.assertEquals(100, rdd.count());
  Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
 public void testJavaJdbcRDD() throws Exception {
  JavaRDD<Integer> rdd = JdbcRDD.create(
   sc,
   () -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
   "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
   1, 100, 1,
   r -> r.getInt(1)
  ).cache();

  Assert.assertEquals(100, rdd.count());
  Assert.assertEquals(Integer.valueOf(10100), rdd.reduce((i1, i2) -> i1 + i2));
 }
}

代码示例来源:origin: OryxProject/oryx

@Test
public void testFetchSampleEvalData() {
 JavaRDD<Vector> evalData = SilhouetteCoefficient.fetchSampleData(getRddOfVectors());
 assertEquals(6, evalData.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void checkpointAndComputation() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 sc.setCheckpointDir(tempDir.getAbsolutePath());
 assertFalse(rdd.isCheckpointed());
 rdd.checkpoint();
 rdd.count(); // Forces the DAG to cause a checkpoint
 assertTrue(rdd.isCheckpointed());
 assertEquals(Arrays.asList(1, 2, 3, 4, 5), rdd.collect());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void checkpointAndComputation() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 sc.setCheckpointDir(tempDir.getAbsolutePath());
 assertFalse(rdd.isCheckpointed());
 rdd.checkpoint();
 rdd.count(); // Forces the DAG to cause a checkpoint
 assertTrue(rdd.isCheckpointed());
 assertEquals(Arrays.asList(1, 2, 3, 4, 5), rdd.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void checkpointAndComputation() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 sc.setCheckpointDir(tempDir.getAbsolutePath());
 assertFalse(rdd.isCheckpointed());
 rdd.checkpoint();
 rdd.count(); // Forces the DAG to cause a checkpoint
 assertTrue(rdd.isCheckpointed());
 assertEquals(Arrays.asList(1, 2, 3, 4, 5), rdd.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void checkpointAndRestore() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 sc.setCheckpointDir(tempDir.getAbsolutePath());
 assertFalse(rdd.isCheckpointed());
 rdd.checkpoint();
 rdd.count(); // Forces the DAG to cause a checkpoint
 assertTrue(rdd.isCheckpointed());
 assertTrue(rdd.getCheckpointFile().isPresent());
 JavaRDD<Integer> recovered = sc.checkpointFile(rdd.getCheckpointFile().get());
 assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void checkpointAndRestore() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 sc.setCheckpointDir(tempDir.getAbsolutePath());
 assertFalse(rdd.isCheckpointed());
 rdd.checkpoint();
 rdd.count(); // Forces the DAG to cause a checkpoint
 assertTrue(rdd.isCheckpointed());
 assertTrue(rdd.getCheckpointFile().isPresent());
 JavaRDD<Integer> recovered = sc.checkpointFile(rdd.getCheckpointFile().get());
 assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void checkpointAndRestore() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 sc.setCheckpointDir(tempDir.getAbsolutePath());
 assertFalse(rdd.isCheckpointed());
 rdd.checkpoint();
 rdd.count(); // Forces the DAG to cause a checkpoint
 assertTrue(rdd.isCheckpointed());
 assertTrue(rdd.getCheckpointFile().isPresent());
 JavaRDD<Integer> recovered = sc.checkpointFile(rdd.getCheckpointFile().get());
 assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect());
}

相关文章

微信公众号

最新文章

更多