org.apache.spark.api.java.JavaRDD.countByValue()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(4.4k)|赞(0)|评价(0)|浏览(134)

本文整理了Java中org.apache.spark.api.java.JavaRDD.countByValue()方法的一些代码示例,展示了JavaRDD.countByValue()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.countByValue()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:countByValue

JavaRDD.countByValue介绍

暂无

代码示例

代码示例来源:origin: OryxProject/oryx

/**
 * @param trainPointData data to cluster
 * @param model trained KMeans Model
 * @return map of ClusterId, count of points associated with the clusterId
 */
private static Map<Integer,Long> fetchClusterCountsFromModel(JavaRDD<? extends Vector> trainPointData,
                               KMeansModel model) {
  return trainPointData.map(model::predict).countByValue();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void approximateResults() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Map<Integer, Long> countsByValue = rdd.countByValue();
 assertEquals(2, countsByValue.get(1).longValue());
 assertEquals(1, countsByValue.get(13).longValue());
 PartialResult<Map<Integer, BoundedDouble>> approx = rdd.countByValueApprox(1);
 Map<Integer, BoundedDouble> finalValue = approx.getFinalValue();
 assertEquals(2.0, finalValue.get(1).mean(), 0.01);
 assertEquals(1.0, finalValue.get(13).mean(), 0.01);
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void approximateResults() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Map<Integer, Long> countsByValue = rdd.countByValue();
 assertEquals(2, countsByValue.get(1).longValue());
 assertEquals(1, countsByValue.get(13).longValue());
 PartialResult<Map<Integer, BoundedDouble>> approx = rdd.countByValueApprox(1);
 Map<Integer, BoundedDouble> finalValue = approx.getFinalValue();
 assertEquals(2.0, finalValue.get(1).mean(), 0.01);
 assertEquals(1.0, finalValue.get(13).mean(), 0.01);
}

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
   throw new Exception("Usage BasicFlatMap sparkMaster inputFile");
    }

  JavaSparkContext sc = new JavaSparkContext(
   args[0], "basicflatmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> rdd = sc.textFile(args[1]);
  JavaRDD<String> words = rdd.flatMap(
   new FlatMapFunction<String, String>() { public Iterable<String> call(String x) {
     return Arrays.asList(x.split(" "));
    }});
  Map<String, Long> result = words.countByValue();
  for (Entry<String, Long> entry: result.entrySet()) {
   System.out.println(entry.getKey() + ":" + entry.getValue());
  }
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void approximateResults() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Map<Integer, Long> countsByValue = rdd.countByValue();
 assertEquals(2, countsByValue.get(1).longValue());
 assertEquals(1, countsByValue.get(13).longValue());
 PartialResult<Map<Integer, BoundedDouble>> approx = rdd.countByValueApprox(1);
 Map<Integer, BoundedDouble> finalValue = approx.getFinalValue();
 assertEquals(2.0, finalValue.get(1).mean(), 0.01);
 assertEquals(1.0, finalValue.get(13).mean(), 0.01);
}

代码示例来源:origin: com.davidbracewell/mango

@Override
public Map<T, Long> countByValue() {
 return rdd.countByValue();
}

代码示例来源:origin: phuonglh/vn.vitk

public NGramBuilder(String regexpFileName, String inputFileName, String unigramFileName, String bigramFileName) {
  JavaRDD<String> lines = jsc.textFile(inputFileName).filter(new InvalidLineFilter());
  System.out.println("#(lines) = " + lines.count());
  // create unigrams and save them
  //
  converter = new Converter(regexpFileName);
  Map<String, Long> unigrams = lines.flatMap(new UnigramFunction()).countByValue();
  List<Tuple2<String, Long>> tuples = new ArrayList<Tuple2<String, Long>>(unigrams.size());
  for (String word : unigrams.keySet()) {
    Long f = unigrams.get(word);
    if (f >= 2)
      tuples.add(new Tuple2<String, Long>(word, f));
  }
  
  JavaPairRDD<String, Long> jprdd = jsc.parallelizePairs(tuples);
  jprdd.saveAsTextFile(unigramFileName, GzipCodec.class);
  
  // create bigrams and save them
  Map<Tuple2<String, String>, Long> bigrams = lines.flatMap(new BigramFunction()).countByValue();
  tuples = new ArrayList<Tuple2<String, Long>>(bigrams.size());
  for (Tuple2<String, String> pair : bigrams.keySet()) {
    Long f = bigrams.get(pair);
    if (f >= 2)
      tuples.add(new Tuple2<String, Long>(pair._1() + ',' + pair._2(), f));
  }
  jprdd = jsc.parallelizePairs(tuples);
  jprdd.saveAsTextFile(bigramFileName, GzipCodec.class);		
}

相关文章

微信公众号

最新文章

更多