org.apache.spark.api.java.JavaPairRDD.getNumPartitions()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(4.8k)|赞(0)|评价(0)|浏览(98)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.getNumPartitions()方法的一些代码示例,展示了JavaPairRDD.getNumPartitions()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.getNumPartitions()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:getNumPartitions

JavaPairRDD.getNumPartitions介绍

暂无

代码示例

代码示例来源:origin: apache/hive

@Override
public JavaPairRDD<KO, VO> transform(
 JavaPairRDD<KI, VI> input) {
 if (caching) {
  if (cachedRDD == null) {
   cachedRDD = doTransform(input);
   cachedRDD.persist(StorageLevel.MEMORY_AND_DISK());
  }
  return cachedRDD.setName(this.name + " (" + cachedRDD.getNumPartitions() + ", cached)");
 } else {
  JavaPairRDD<KO, VO> rdd = doTransform(input);
  return rdd.setName(this.name + " (" + rdd.getNumPartitions() + ")");
 }
}

代码示例来源:origin: apache/hive

@SuppressWarnings("unchecked")
private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork)
  throws Exception {
 JobConf jobConf = cloneJobConf(mapWork);
 Class ifClass = getInputFormat(jobConf, mapWork);
 sc.sc().setCallSite(CallSite.apply(mapWork.getName(), ""));
 JavaPairRDD<WritableComparable, Writable> hadoopRDD;
 if (mapWork.getNumMapTasks() != null) {
  jobConf.setNumMapTasks(mapWork.getNumMapTasks());
  hadoopRDD = sc.hadoopRDD(jobConf, ifClass,
    WritableComparable.class, Writable.class, mapWork.getNumMapTasks());
 } else {
  hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class);
 }
 boolean toCache = false/*cloneToWork.containsKey(mapWork)*/;
 String tables = mapWork.getAllRootOperators().stream()
     .filter(op -> op instanceof TableScanOperator)
     .map(ts -> ((TableScanDesc) ts.getConf()).getAlias())
     .collect(Collectors.joining(", "));
 String rddName = mapWork.getName() + " (" + tables + ", " + hadoopRDD.getNumPartitions() +
     (toCache ? ", cached)" : ")");
 // Caching is disabled for MapInput due to HIVE-8920
 MapInput result = new MapInput(sparkPlan, hadoopRDD, toCache, rddName, mapWork);
 return result;
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void getNumPartitions(){
 JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 3);
 JavaDoubleRDD rdd2 = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0), 2);
 JavaPairRDD<String, Integer> rdd3 = sc.parallelizePairs(
  Arrays.asList(
   new Tuple2<>("a", 1),
   new Tuple2<>("aa", 2),
   new Tuple2<>("aaa", 3)
  ),
  2);
 assertEquals(3, rdd1.getNumPartitions());
 assertEquals(2, rdd2.getNumPartitions());
 assertEquals(2, rdd3.getNumPartitions());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void getNumPartitions(){
 JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 3);
 JavaDoubleRDD rdd2 = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0), 2);
 JavaPairRDD<String, Integer> rdd3 = sc.parallelizePairs(
  Arrays.asList(
   new Tuple2<>("a", 1),
   new Tuple2<>("aa", 2),
   new Tuple2<>("aaa", 3)
  ),
  2);
 assertEquals(3, rdd1.getNumPartitions());
 assertEquals(2, rdd2.getNumPartitions());
 assertEquals(2, rdd3.getNumPartitions());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void getNumPartitions(){
 JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 3);
 JavaDoubleRDD rdd2 = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0), 2);
 JavaPairRDD<String, Integer> rdd3 = sc.parallelizePairs(
  Arrays.asList(
   new Tuple2<>("a", 1),
   new Tuple2<>("aa", 2),
   new Tuple2<>("aaa", 3)
  ),
  2);
 assertEquals(3, rdd1.getNumPartitions());
 assertEquals(2, rdd2.getNumPartitions());
 assertEquals(2, rdd3.getNumPartitions());
}

代码示例来源:origin: apache/hive

prevRDD.name() + ")", ""));
  rdd = rdd.union(prevRDD);
  rdd.setName("UnionRDD (" + rdd.getNumPartitions() + ")");
    ""));
finalRDD = finalRDD.union(rdd);
finalRDD.setName("UnionRDD (" + finalRDD.getNumPartitions() + ")");

代码示例来源:origin: cloudera-labs/envelope

@Override
public void configure(Config config, JavaPairRDD<Row, Row> rdd) {
 this.numPartitions = rdd.getNumPartitions();
}

代码示例来源:origin: cloudera-labs/envelope

partitioner = new HashPartitioner(rdd.getNumPartitions());
 break;
case "range":
 Ordering<Row> rowOrdering = Ordering$.MODULE$.<Row>comparatorToOrdering(new RowComparator());
 ClassTag<Row> rowClassTag = ClassTag$.MODULE$.<Row>apply(Row.class);
 partitioner = new RangePartitioner<Row, Row>(rdd.getNumPartitions(), rdd.rdd(), true, rowOrdering, rowClassTag);
 break;
default:

代码示例来源:origin: locationtech/geowave

final int leftPartCount = leftIndex.getNumPartitions();
final int rightPartCount = rightIndex.getNumPartitions();
final int highestPartCount = (leftPartCount > rightPartCount) ? leftPartCount : rightPartCount;
final int largePartitionerCount = (int) (1.5 * highestPartCount);

代码示例来源:origin: seznam/euphoria

groupingPartitioner = new HashPartitioner(tuples.getNumPartitions());

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法