org.apache.spark.api.java.JavaSparkContext.union()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(10.8k)|赞(0)|评价(0)|浏览(134)

本文整理了Java中org.apache.spark.api.java.JavaSparkContext.union()方法的一些代码示例,展示了JavaSparkContext.union()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.union()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:union

JavaSparkContext.union介绍

暂无

代码示例

代码示例来源:origin: org.apache.spark/spark-core_2.10

@SuppressWarnings("unchecked")
@Test
public void sparkContextUnion() {
 // Union of non-specialized JavaRDDs
 List<String> strings = Arrays.asList("Hello", "World");
 JavaRDD<String> s1 = sc.parallelize(strings);
 JavaRDD<String> s2 = sc.parallelize(strings);
 // Varargs
 JavaRDD<String> sUnion = sc.union(s1, s2);
 assertEquals(4, sUnion.count());
 // List
 List<JavaRDD<String>> list = new ArrayList<>();
 list.add(s2);
 sUnion = sc.union(s1, list);
 assertEquals(4, sUnion.count());
 // Union of JavaDoubleRDDs
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dUnion = sc.union(d1, d2);
 assertEquals(4, dUnion.count());
 // Union of JavaPairRDDs
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2);
 assertEquals(4, pUnion.count());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@SuppressWarnings("unchecked")
@Test
public void sparkContextUnion() {
 // Union of non-specialized JavaRDDs
 List<String> strings = Arrays.asList("Hello", "World");
 JavaRDD<String> s1 = sc.parallelize(strings);
 JavaRDD<String> s2 = sc.parallelize(strings);
 // Varargs
 JavaRDD<String> sUnion = sc.union(s1, s2);
 assertEquals(4, sUnion.count());
 // List
 List<JavaRDD<String>> list = new ArrayList<>();
 list.add(s2);
 sUnion = sc.union(s1, list);
 assertEquals(4, sUnion.count());
 // Union of JavaDoubleRDDs
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dUnion = sc.union(d1, d2);
 assertEquals(4, dUnion.count());
 // Union of JavaPairRDDs
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2);
 assertEquals(4, pUnion.count());
}

代码示例来源:origin: org.apache.spark/spark-core

@SuppressWarnings("unchecked")
@Test
public void sparkContextUnion() {
 // Union of non-specialized JavaRDDs
 List<String> strings = Arrays.asList("Hello", "World");
 JavaRDD<String> s1 = sc.parallelize(strings);
 JavaRDD<String> s2 = sc.parallelize(strings);
 // Varargs
 JavaRDD<String> sUnion = sc.union(s1, s2);
 assertEquals(4, sUnion.count());
 // List
 List<JavaRDD<String>> list = new ArrayList<>();
 list.add(s2);
 sUnion = sc.union(s1, list);
 assertEquals(4, sUnion.count());
 // Union of JavaDoubleRDDs
 List<Double> doubles = Arrays.asList(1.0, 2.0);
 JavaDoubleRDD d1 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD d2 = sc.parallelizeDoubles(doubles);
 JavaDoubleRDD dUnion = sc.union(d1, d2);
 assertEquals(4, dUnion.count());
 // Union of JavaPairRDDs
 List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
 pairs.add(new Tuple2<>(1, 2));
 pairs.add(new Tuple2<>(3, 4));
 JavaPairRDD<Integer, Integer> p1 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> p2 = sc.parallelizePairs(pairs);
 JavaPairRDD<Integer, Integer> pUnion = sc.union(p1, p2);
 assertEquals(4, pUnion.count());
}

代码示例来源:origin: apache/kylin

sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()]))
    .reduceByKey(reduceFunction, SparkUtil.estimateTotalPartitionNum(cubeStatsReader, envConfig))
    .mapToPair(convertTextFunction).saveAsNewAPIHadoopDataset(job.getConfiguration());
  FileOutputFormat.setOutputPath(job, new Path(cuboidOutputPath));
  sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()]))
      .reduceByKey(reduceFunction,
          SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig))

代码示例来源:origin: mahmoudparsian/data-algorithms-book

static JavaRDD<String> readInputFiles(JavaSparkContext ctx,
                 String filename)
 throws Exception {
   List<String> biosetFiles = toList(filename);
  int counter = 0;
  JavaRDD[] rdds = new JavaRDD[biosetFiles.size()];
  for (String biosetFileName : biosetFiles) {
   System.out.println("readInputFiles(): biosetFileName=" + biosetFileName);
   JavaRDD<String> record = ctx.textFile(biosetFileName);
   rdds[counter] = record;
   counter++;
   }
  JavaRDD<String> allBiosets = ctx.union(rdds);
  return allBiosets;
  //return allBiosets.coalesce(9, false);
} // readInputFiles

代码示例来源:origin: mahmoudparsian/data-algorithms-book

static JavaPairRDD<String, Tuple2<Double, Integer>> computeRankedProducts(
    JavaSparkContext context,
    JavaPairRDD<String, Long>[] ranks) {
  JavaPairRDD<String, Long> unionRDD = context.union(ranks);
  
  // next find unique keys, with their associated copa scores
  JavaPairRDD<String, Iterable<Long>> groupedByGeneRDD = unionRDD.groupByKey();
  
  // next calculate ranked products and the number of elements
  JavaPairRDD<String, Tuple2<Double, Integer>> rankedProducts = 
      groupedByGeneRDD.mapValues((Iterable<Long> values) -> {
    int N = 0;
    long products = 1;
    for (Long v : values) {
      products *= v;
      N++;
    }
    
    double rankedProduct = Math.pow( (double) products, 1.0/((double) N));
    return new Tuple2<Double, Integer>(rankedProduct, N);
  } // input: copa scores for all studies
  // output: (rankedProduct, N)
  );                
  return rankedProducts;
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

private static JavaRDD<String> readInputFiles(JavaSparkContext ctx,
                       String filename)
 throws Exception {
   List<String> biosetFiles = toList(filename);
   int counter = 0;
   JavaRDD[] rdds = new JavaRDD[biosetFiles.size()];
   for (String biosetFileName : biosetFiles) {
    System.out.println("debug1 biosetFileName=" + biosetFileName);
    JavaRDD<String> record = ctx.textFile(biosetFileName);
    rdds[counter] = record;
    counter++;
   }
   JavaRDD<String> allBiosets = ctx.union(rdds);
   return allBiosets.coalesce(9, false);
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

static JavaPairRDD<String, Tuple2<Double, Integer>> computeRankedProducts(
    JavaSparkContext context,
    JavaPairRDD<String, Long>[] ranks) {
  JavaPairRDD<String, Long> unionRDD = context.union(ranks);
  
  // next find unique keys, with their associated copa scores
  JavaPairRDD<String, Iterable<Long>> groupedByGeneRDD = unionRDD.groupByKey();
  
  // next calculate ranked products and the number of elements
  JavaPairRDD<String, Tuple2<Double, Integer>> rankedProducts = groupedByGeneRDD.mapValues(
      new Function<
             Iterable<Long>,            // input: copa scores for all studies
             Tuple2<Double, Integer>    // output: (rankedProduct, N)
            >() {
      @Override
      public Tuple2<Double, Integer> call(Iterable<Long> values) {
        int N = 0;
        long products = 1;
        for (Long v : values) {
          products *= v;
          N++;
        }
        
        double rankedProduct = Math.pow( (double) products, 1.0/((double) N));
        return new Tuple2<Double, Integer>(rankedProduct, N);
      }
  });                
  return rankedProducts;
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

static JavaPairRDD<String, Tuple2<Double, Integer>> computeRankedProductsUsingCombineByKey(
    JavaSparkContext context,
    JavaPairRDD<String, Long>[] ranks) {
  JavaPairRDD<String, Long> unionRDD = context.union(ranks);

代码示例来源:origin: mahmoudparsian/data-algorithms-book

static JavaPairRDD<String, Tuple2<Double, Integer>> computeRankedProductsUsingCombineByKey(
    JavaSparkContext context,
    JavaPairRDD<String, Long>[] ranks) {
  JavaPairRDD<String, Long> unionRDD = context.union(ranks);

代码示例来源:origin: org.apache.crunch/crunch-spark

private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
  List<PCollectionImpl<?>> parents = getParents();
  JavaRDD[] rdds = new JavaRDD[parents.size()];
  for (int i = 0; i < rdds.length; i++) {
   if (parents.get(i) instanceof PTableBase) {
    JavaPairRDD prdd = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
    rdds[i] = prdd.mapPartitions(new FlatMapPairDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
   } else {
    rdds[i] = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
   }
  }
  return runtime.getSparkContext().union(rdds);
 }
}

代码示例来源:origin: org.apache.crunch/crunch-spark

private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
  List<PCollectionImpl<?>> parents = getParents();
  JavaPairRDD[] rdds = new JavaPairRDD[parents.size()];
  for (int i = 0; i < rdds.length; i++) {
   if (parents.get(i) instanceof PTableBase) {
    rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
   } else {
    JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
    rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
   }
  }
  return runtime.getSparkContext().union(rdds);
 }
}

代码示例来源:origin: apache/crunch

private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
  List<PCollectionImpl<?>> parents = getParents();
  JavaPairRDD[] rdds = new JavaPairRDD[parents.size()];
  for (int i = 0; i < rdds.length; i++) {
   if (parents.get(i) instanceof PTableBase) {
    rdds[i] = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
   } else {
    JavaRDD rdd = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
    rdds[i] = rdd.mapPartitionsToPair(new PairFlatMapDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
   }
  }
  return runtime.getSparkContext().union(rdds);
 }
}

代码示例来源:origin: apache/crunch

private JavaRDDLike<?, ?> getJavaRDDLikeInternal(SparkRuntime runtime) {
  List<PCollectionImpl<?>> parents = getParents();
  JavaRDD[] rdds = new JavaRDD[parents.size()];
  for (int i = 0; i < rdds.length; i++) {
   if (parents.get(i) instanceof PTableBase) {
    JavaPairRDD prdd = (JavaPairRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
    rdds[i] = prdd.mapPartitions(new FlatMapPairDoFn(IdentityFn.getInstance(), runtime.getRuntimeContext()));
   } else {
    rdds[i] = (JavaRDD) ((SparkCollection) parents.get(i)).getJavaRDDLike(runtime);
   }
  }
  return runtime.getSparkContext().union(rdds);
 }
}

代码示例来源:origin: org.apache.kylin/kylin-engine-spark

sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()]))
    .reduceByKey(reduceFunction, SparkUtil.estimateTotalPartitionNum(cubeStatsReader, envConfig))
    .mapToPair(convertTextFunction).saveAsNewAPIHadoopDataset(job.getConfiguration());
  FileOutputFormat.setOutputPath(job, new Path(cuboidOutputPath));
  sc.union(mergingSegs.toArray(new JavaPairRDD[mergingSegs.size()]))
      .reduceByKey(reduceFunction,
          SparkUtil.estimateLayerPartitionNum(level, cubeStatsReader, envConfig))

代码示例来源:origin: locationtech/geowave

combinedResults = javaSC.union(combinedResults, tierMatches);
combinedResults = combinedResults.reduceByKey((id1, id2) -> id1);

相关文章

微信公众号

最新文章

更多