本文整理了Java中org.apache.spark.api.java.JavaSparkContext.union()
方法的一些代码示例,展示了JavaSparkContext.union()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.union()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:union
暂无
代码示例来源:origin: org.apache.spark/spark-core_2.10
@SuppressWarnings("unchecked")
@Test
public void sparkContextUnion() {
// Union of non-specialized JavaRDDs
List<String> strings = Arrays.asList("Hello", "World");
JavaRDD<String> s1 = sc.parallelize(strings);
JavaRDD<String> s2 = sc.parallelize(strings);
// Varargs
JavaRDD<String> sUnion = sc.union(s1, s2);
assertEquals(4, sUnion.count());
// List
List<JavaRDD<String>> list = new ArrayList<>();
list.add(s2);
sUnion = sc.union(s1, list);
assertEquals(4, sUnion.count());
// Union of JavaDoubleRDDs
List<Double> doubles = Arrays.asList(1.0, 2.0);
JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
JavaDoubleRDD dUnion = sc.union(d1, d2);
assertEquals(4, dUnion.count());
// Union of JavaPairRDDs
List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
pairs.add(new Tuple2<>(1, 2));
pairs.add(new Tuple2<>(3, 4));
JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2);
assertEquals(4, pUnion.count());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@SuppressWarnings("unchecked")
@Test
public void sparkContextUnion() {
// Union of non-specialized JavaRDDs
List<String> strings = Arrays.asList("Hello", "World");
JavaRDD<String> s1 = sc.parallelize(strings);
JavaRDD<String> s2 = sc.parallelize(strings);
// Varargs
JavaRDD<String> sUnion = sc.union(s1, s2);
assertEquals(4, sUnion.count());
// List
List<JavaRDD<String>> list = new ArrayList<>();
list.add(s2);
sUnion = sc.union(s1, list);
assertEquals(4, sUnion.count());
// Union of JavaDoubleRDDs
List<Double> doubles = Arrays.asList(1.0, 2.0);
JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
JavaDoubleRDD dUnion = sc.union(d1, d2);
assertEquals(4, dUnion.count());
// Union of JavaPairRDDs
List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
pairs.add(new Tuple2<>(1, 2));
pairs.add(new Tuple2<>(3, 4));
JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2);
assertEquals(4, pUnion.count());
}
代码示例来源:origin: org.apache.spark/spark-core
@SuppressWarnings("unchecked")
@Test
public void sparkContextUnion() {
// Union of non-specialized JavaRDDs
List<String> strings = Arrays.asList("Hello", "World");
JavaRDD<String> s1 = sc.parallelize(strings);
JavaRDD<String> s2 = sc.parallelize(strings);
// Varargs
JavaRDD<String> sUnion = sc.union(s1, s2);
assertEquals(4, sUnion.count());
// List
List<JavaRDD<String>> list = new ArrayList<>();
list.add(s2);
sUnion = sc.union(s1, list);
assertEquals(4, sUnion.count());
// Union of JavaDoubleRDDs
List<Double> doubles = Arrays.asList(1.0, 2.0);
JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
JavaDoubleRDD dUnion = sc.union(d1, d2);
assertEquals(4, dUnion.count());
// Union of JavaPairRDDs
List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
pairs.add(new Tuple2<>(1, 2));
pairs.add(new Tuple2<>(3, 4));
JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2);
assertEquals(4, pUnion.count());
}
代码示例来源:origin: apache/kylin
sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()]))
.reduceByKey(reduceFunction, SparkUtil.estimateTotalPartitionNum(cubeStatsReader, envConfig))
.mapToPair(convertTextFunction).saveAsNewAPIHadoopDataset(job.getConfiguration());
FileOutputFormat.setOutputPath(job, new Path(cuboidOutputPath));
sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()]))
.reduceByKey(reduceFunction,
SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig))
代码示例来源:origin: mahmoudparsian/data-algorithms-book
static JavaRDD<String> readInputFiles(JavaSparkContext ctx,
String filename)
throws Exception {
List<String> biosetFiles = toList(filename);
int counter = 0;
JavaRDD[] rdds = new JavaRDD[biosetFiles.size()];
for (String biosetFileName : biosetFiles) {
System.out.println("readInputFiles(): biosetFileName=" + biosetFileName);
JavaRDD<String> record = ctx.textFile(biosetFileName);
rdds[counter] = record;
counter++;
}
JavaRDD<String> allBiosets = ctx.union(rdds);
return allBiosets;
//return allBiosets.coalesce(9, false);
} // readInputFiles
代码示例来源:origin: mahmoudparsian/data-algorithms-book
static JavaPairRDD<String, Tuple2<Double, Integer>> computeRankedProducts(
JavaSparkContext context,
JavaPairRDD<String, Long>[] ranks) {
JavaPairRDD<String, Long> unionRDD = context.union(ranks);
// next find unique keys, with their associated copa scores
JavaPairRDD<String, Iterable<Long>> groupedByGeneRDD = unionRDD.groupByKey();
// next calculate ranked products and the number of elements
JavaPairRDD<String, Tuple2<Double, Integer>> rankedProducts =
groupedByGeneRDD.mapValues((Iterable<Long> values) -> {
int N = 0;
long products = 1;
for (Long v : values) {
products *= v;
N++;
}
double rankedProduct = Math.pow( (double) products, 1.0/((double) N));
return new Tuple2<Double, Integer>(rankedProduct, N);
} // input: copa scores for all studies
// output: (rankedProduct, N)
);
return rankedProducts;
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
private static JavaRDD<String> readInputFiles(JavaSparkContext ctx,
String filename)
throws Exception {
List<String> biosetFiles = toList(filename);
int counter = 0;
JavaRDD[] rdds = new JavaRDD[biosetFiles.size()];
for (String biosetFileName : biosetFiles) {
System.out.println("debug1 biosetFileName=" + biosetFileName);
JavaRDD<String> record = ctx.textFile(biosetFileName);
rdds[counter] = record;
counter++;
}
JavaRDD<String> allBiosets = ctx.union(rdds);
return allBiosets.coalesce(9, false);
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
static JavaPairRDD<String, Tuple2<Double, Integer>> computeRankedProducts(
JavaSparkContext context,
JavaPairRDD<String, Long>[] ranks) {
JavaPairRDD<String, Long> unionRDD = context.union(ranks);
// next find unique keys, with their associated copa scores
JavaPairRDD<String, Iterable<Long>> groupedByGeneRDD = unionRDD.groupByKey();
// next calculate ranked products and the number of elements
JavaPairRDD<String, Tuple2<Double, Integer>> rankedProducts = groupedByGeneRDD.mapValues(
new Function<
Iterable<Long>, // input: copa scores for all studies
Tuple2<Double, Integer> // output: (rankedProduct, N)
>() {
@Override
public Tuple2<Double, Integer> call(Iterable<Long> values) {
int N = 0;
long products = 1;
for (Long v : values) {
products *= v;
N++;
}
double rankedProduct = Math.pow( (double) products, 1.0/((double) N));
return new Tuple2<Double, Integer>(rankedProduct, N);
}
});
return rankedProducts;
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
static JavaPairRDD<String, Tuple2<Double, Integer>> computeRankedProductsUsingCombineByKey(
JavaSparkContext context,
JavaPairRDD<String, Long>[] ranks) {
JavaPairRDD<String, Long> unionRDD = context.union(ranks);
代码示例来源:origin: mahmoudparsian/data-algorithms-book
static JavaPairRDD<String, Tuple2<Double, Integer>> computeRankedProductsUsingCombineByKey(
JavaSparkContext context,
JavaPairRDD<String, Long>[] ranks) {
JavaPairRDD<String, Long> unionRDD = context.union(ranks);
代码示例来源:origin: org.apache.crunch/crunch-spark
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
List<PCollectionImpl<?>> parents = getParents();
JavaRDD[] rdds = new JavaRDD[parents.size()];
for (int i = 0; i < rdds.length; i++) {
if (parents.get(i) instanceof PTableBase) {
JavaPairRDD prdd = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
rdds[i] = prdd.mapPartitions(new FlatMapPairDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
} else {
rdds[i] = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
}
}
return runtime.getSparkContext().union(rdds);
}
}
代码示例来源:origin: org.apache.crunch/crunch-spark
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
List<PCollectionImpl<?>> parents = getParents();
JavaPairRDD[] rdds = new JavaPairRDD[parents.size()];
for (int i = 0; i < rdds.length; i++) {
if (parents.get(i) instanceof PTableBase) {
rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
} else {
JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
}
}
return runtime.getSparkContext().union(rdds);
}
}
代码示例来源:origin: apache/crunch
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
List<PCollectionImpl<?>> parents = getParents();
JavaPairRDD[] rdds = new JavaPairRDD[parents.size()];
for (int i = 0; i < rdds.length; i++) {
if (parents.get(i) instanceof PTableBase) {
rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
} else {
JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
}
}
return runtime.getSparkContext().union(rdds);
}
}
代码示例来源:origin: apache/crunch
private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
List<PCollectionImpl<?>> parents = getParents();
JavaRDD[] rdds = new JavaRDD[parents.size()];
for (int i = 0; i < rdds.length; i++) {
if (parents.get(i) instanceof PTableBase) {
JavaPairRDD prdd = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
rdds[i] = prdd.mapPartitions(new FlatMapPairDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
} else {
rdds[i] = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
}
}
return runtime.getSparkContext().union(rdds);
}
}
代码示例来源:origin: org.apache.kylin/kylin-engine-spark
sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()]))
.reduceByKey(reduceFunction, SparkUtil.estimateTotalPartitionNum(cubeStatsReader, envConfig))
.mapToPair(convertTextFunction).saveAsNewAPIHadoopDataset(job.getConfiguration());
FileOutputFormat.setOutputPath(job, new Path(cuboidOutputPath));
sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()]))
.reduceByKey(reduceFunction,
SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig))
代码示例来源:origin: locationtech/geowave
combinedResults = javaSC.union(combinedResults, tierMatches);
combinedResults = combinedResults.reduceByKey((id1, id2) -> id1);
内容来源于网络,如有侵权,请联系作者删除!