org.apache.spark.api.java.JavaPairRDD.repartitionAndSortWithinPartitions()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(7.7k)|赞(0)|评价(0)|浏览(196)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.repartitionAndSortWithinPartitions()方法的一些代码示例,展示了JavaPairRDD.repartitionAndSortWithinPartitions()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.repartitionAndSortWithinPartitions()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:repartitionAndSortWithinPartitions

JavaPairRDD.repartitionAndSortWithinPartitions介绍

暂无

代码示例

代码示例来源:origin: apache/drill

@Override
public JavaPairRDD<HiveKey, BytesWritable> shuffle(
  JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
 if (numPartitions < 0) {
  numPartitions = 1;
 }
 return input.repartitionAndSortWithinPartitions(new HashPartitioner(numPartitions));
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

rdd.repartitionAndSortWithinPartitions(partitioner);
assertTrue(repartitioned.partitioner().isPresent());
assertEquals(repartitioned.partitioner().get(), partitioner);

代码示例来源:origin: apache/kylin

hfilerdd.repartitionAndSortWithinPartitions(new HFilePartitioner(keys),
    RowKeyWritable.RowKeyComparator.INSTANCE)
    .mapToPair(new PairFunction<Tuple2<RowKeyWritable, KeyValue>, ImmutableBytesWritable, KeyValue>() {

代码示例来源:origin: org.apache.spark/spark-core

rdd.repartitionAndSortWithinPartitions(partitioner);
assertTrue(repartitioned.partitioner().isPresent());
assertEquals(repartitioned.partitioner().get(), partitioner);

代码示例来源:origin: org.apache.spark/spark-core_2.11

rdd.repartitionAndSortWithinPartitions(partitioner);
assertTrue(repartitioned.partitioner().isPresent());
assertEquals(repartitioned.partitioner().get(), partitioner);

代码示例来源:origin: apache/drill

@Override
public JavaPairRDD<HiveKey, BytesWritable> shuffle(
  JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
 JavaPairRDD<HiveKey, BytesWritable> rdd;
 if (totalOrder) {
  if (numPartitions > 0) {
   if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) {
    input.persist(StorageLevel.DISK_ONLY());
    sparkPlan.addCachedRDDId(input.id());
   }
   rdd = input.sortByKey(true, numPartitions);
  } else {
   rdd = input.sortByKey(true);
  }
 } else {
  Partitioner partitioner = new HashPartitioner(numPartitions);
  rdd = input.repartitionAndSortWithinPartitions(partitioner);
 }
 return rdd;
}

代码示例来源:origin: apache/hive

@Override
public JavaPairRDD<HiveKey, BytesWritable> shuffle(
  JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
 JavaPairRDD<HiveKey, BytesWritable> rdd;
 if (totalOrder) {
  if (numPartitions > 0) {
   if (numPartitions > 1 && input.getStorageLevel() == StorageLevel.NONE()) {
    input.persist(StorageLevel.DISK_ONLY());
    sparkPlan.addCachedRDDId(input.id());
   }
   rdd = input.sortByKey(true, numPartitions);
  } else {
   rdd = input.sortByKey(true);
  }
 } else {
  Partitioner partitioner = new HashPartitioner(numPartitions);
  rdd = input.repartitionAndSortWithinPartitions(partitioner);
 }
 if (shuffleSerializer != null) {
  if (rdd.rdd() instanceof ShuffledRDD) {
   ((ShuffledRDD) rdd.rdd()).setSerializer(shuffleSerializer);
  }
 }
 return rdd;
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

JavaPairRDD<Integer, Integer> repartitioned = rdd.repartitionAndSortWithinPartitions(partitioner);
List<List<Tuple2<Integer, Integer>>> partitions = repartitioned.glom().collect();

代码示例来源:origin: mahmoudparsian/data-algorithms-book

= valueToKey.repartitionAndSortWithinPartitions(new CustomPartitioner(partitions), TupleComparatorDescending.INSTANCE);

代码示例来源:origin: uk.gov.gchq.gaffer/spark-accumulo-library

@Override
protected void prepareKeyValues(final ImportKeyValueJavaPairRDDToAccumulo operation, final AccumuloKeyRangePartitioner partitioner) throws OperationException {
  final JavaPairRDD<Key, Value> rdd = operation.getInput().repartitionAndSortWithinPartitions(partitioner);
  rdd.saveAsNewAPIHadoopFile(operation.getOutputPath(), Key.class, Value.class, AccumuloFileOutputFormat.class, getConfiguration(operation));
}

代码示例来源:origin: org.apache.pig/pig

public static RDD<Tuple> handleSecondarySort(
    RDD<Tuple2<IndexedKey, Tuple>> rdd, POPackage pkgOp) {
  JavaPairRDD<IndexedKey, Tuple> pairRDD = JavaPairRDD.fromRDD(rdd, SparkUtil.getManifest(IndexedKey.class),
      SparkUtil.getManifest(Tuple.class));
  int partitionNums = pairRDD.partitions().size();
  //repartition to group tuples with same indexedkey to same partition
  JavaPairRDD<IndexedKey, Tuple> sorted = pairRDD.repartitionAndSortWithinPartitions(
      new IndexedKeyPartitioner(partitionNums));
  //Package tuples with same indexedkey as the result: (key,(val1,val2,val3,...))
  return sorted.mapPartitions(new AccumulateByKey(pkgOp), true).rdd();
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

@Override
public JavaPairRDD<HiveKey, Iterable<BytesWritable>> shuffle(
  JavaPairRDD<HiveKey, BytesWritable> input, int numPartitions) {
 JavaPairRDD<HiveKey, BytesWritable> rdd;
 if (totalOrder) {
  if (numPartitions > 0) {
   rdd = input.sortByKey(true, numPartitions);
  } else {
   rdd = input.sortByKey(true);
  }
 } else {
  Partitioner partitioner = new HashPartitioner(numPartitions);
  rdd = input.repartitionAndSortWithinPartitions(partitioner);
 }
 return rdd.mapPartitionsToPair(new ShuffleFunction());
}

代码示例来源:origin: org.apache.kylin/kylin-storage-hbase

hfilerdd.repartitionAndSortWithinPartitions(new HFilePartitioner(keys),
    RowKeyWritable.RowKeyComparator.INSTANCE)
    .mapToPair(new PairFunction<Tuple2<RowKeyWritable, KeyValue>, ImmutableBytesWritable, KeyValue>() {

代码示例来源:origin: org.apache.pig/pig

private JavaRDD<Tuple2<IndexedKey,Tuple>> handleSecondarySort(
    RDD<Tuple> rdd, POGlobalRearrangeSpark op, int parallelism) {
  RDD<Tuple2<Tuple, Object>> rddPair = rdd.map(new ToKeyNullValueFunction(),
      SparkUtil.<Tuple, Object>getTuple2Manifest());
  JavaPairRDD<Tuple, Object> pairRDD = new JavaPairRDD<Tuple, Object>(rddPair,
      SparkUtil.getManifest(Tuple.class),
      SparkUtil.getManifest(Object.class));
  //first sort the tuple by secondary key if enable useSecondaryKey sort
  JavaPairRDD<Tuple, Object> sorted = pairRDD.repartitionAndSortWithinPartitions(
      new HashPartitioner(parallelism),
      new PigSecondaryKeyComparatorSpark(op.getSecondarySortOrder()));
  JavaRDD<Tuple> jrdd = sorted.keys();
  JavaRDD<Tuple2<IndexedKey,Tuple>> jrddPair = jrdd.map(new ToKeyValueFunction(op));
  return jrddPair;
}

代码示例来源:origin: org.apache.pig/pig

private JavaRDD<Tuple2<IndexedKey, Tuple>> handleSecondarySort(
    RDD<Tuple> rdd, POReduceBySpark op, int parallelism) {
  RDD<Tuple2<Tuple, Object>> rddPair = rdd.map(new ToKeyNullValueFunction(),
      SparkUtil.<Tuple, Object>getTuple2Manifest());
  JavaPairRDD<Tuple, Object> pairRDD = new JavaPairRDD<Tuple, Object>(rddPair,
      SparkUtil.getManifest(Tuple.class),
      SparkUtil.getManifest(Object.class));
  //first sort the tuple by secondary key if enable useSecondaryKey sort
  JavaPairRDD<Tuple, Object> sorted = pairRDD.repartitionAndSortWithinPartitions(
      new HashPartitioner(parallelism),
      new PigSecondaryKeyComparatorSpark(op.getSecondarySortOrder()));
  JavaRDD<Tuple> jrdd = sorted.keys();
  JavaRDD<Tuple2<IndexedKey, Tuple>> jrddPair = jrdd.map(new ToKeyValueFunction(op));
  return jrddPair;
}

代码示例来源:origin: seznam/euphoria

.union(rightPair)
.setName(operator.getName() + "::union-inputs")
.repartitionAndSortWithinPartitions(partitioner, comparator)
.setName(operator.getName() + "::sort-by-key-and-side")
.mapPartitions(

代码示例来源:origin: seznam/euphoria

.repartitionAndSortWithinPartitions(groupingPartitioner, comparator)
.setName(operator.getName() + "::sort");

代码示例来源:origin: seznam/euphoria

.mapToPair(t -> new Tuple2<>(new KeyedWindowValue<>(t._1, t._2), Empty.get()))
.setName(operator.getName() + "::create-composite-key")
.repartitionAndSortWithinPartitions(
  partitioner, new SecondarySortComparator<>(operator.getValueComparator()))
.setName(operator.getName() + "::secondary-sort")
.repartitionAndSortWithinPartitions(partitioner)
.setName(operator.getName() + "::sort")
.mapPartitionsToPair(ReduceByKeyIterator::new)

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法