本文整理了Java中org.apache.spark.api.java.JavaRDD.partitions()
方法的一些代码示例,展示了JavaRDD.partitions()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.partitions()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:partitions
暂无
代码示例来源:origin: OryxProject/oryx
private static JavaPairRDD<String,Collection<String>> knownsRDD(JavaRDD<String[]> allData,
boolean knownItems) {
JavaRDD<String[]> sorted = allData.sortBy(datum -> Long.valueOf(datum[3]), true, allData.partitions().size());
JavaPairRDD<String,Tuple2<String,Boolean>> tuples = sorted.mapToPair(datum -> {
String user = datum[0];
String item = datum[1];
Boolean delete = datum[2].isEmpty();
return knownItems ?
new Tuple2<>(user, new Tuple2<>(item, delete)) :
new Tuple2<>(item, new Tuple2<>(user, delete));
});
// TODO likely need to figure out a way to avoid groupByKey but collectByKey
// won't work here -- doesn't guarantee enough about ordering
return tuples.groupByKey().mapValues(idDeletes -> {
Collection<String> ids = new HashSet<>();
for (Tuple2<String,Boolean> idDelete : idDeletes) {
if (idDelete._2()) {
ids.remove(idDelete._1());
} else {
ids.add(idDelete._1());
}
}
return ids;
});
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void iterator() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2);
TaskContext context = TaskContext$.MODULE$.empty();
assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void iterator() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2);
TaskContext context = TaskContext$.MODULE$.empty();
assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void iterator() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2);
TaskContext context = TaskContext$.MODULE$.empty();
assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue());
}
代码示例来源:origin: com.davidbracewell/mango
@Override
public <R extends Comparable<R>> MStream<T> sorted(boolean ascending, @NonNull SerializableFunction<? super T, ? extends R> keyFunction) {
return new SparkStream<>(rdd.sortBy(t -> {
Configurator.INSTANCE.configure(configBroadcast.value());
return keyFunction.apply(t);
}, ascending, rdd.partitions().size()));
}
代码示例来源:origin: com.davidbracewell/mango
@Override
public <U> SparkPairStream<T, U> zip(@NonNull MStream<U> other) {
if (other instanceof SparkStream) {
return new SparkPairStream<>(rdd.zip(Cast.<SparkStream<U>>as(other).rdd));
}
JavaSparkContext jsc = new JavaSparkContext(rdd.context());
return new SparkPairStream<>(rdd.zip(jsc.parallelize(other.collect(), rdd.partitions().size())));
}
代码示例来源:origin: org.qcri.rheem/rheem-spark
nb_partitions = inputRdd.partitions().size();
partitionSize = (int) Math.ceil((double) datasetSize / nb_partitions);
first = false;
代码示例来源:origin: org.qcri.rheem/rheem-spark
if (tupleID == 0) {
if (nb_partitions == 0) { //it's the first time we sample or we read all partitions already, start again
nb_partitions = inputRdd.partitions().size();
partitions = new ArrayList<>();
for (int i = 0; i < nb_partitions; i++)
内容来源于网络,如有侵权,请联系作者删除!