org.apache.spark.api.java.JavaRDD.repartition()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(4.7k)|赞(0)|评价(0)|浏览(94)

本文整理了Java中org.apache.spark.api.java.JavaRDD.repartition()方法的一些代码示例,展示了JavaRDD.repartition()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.repartition()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:repartition

JavaRDD.repartition介绍

暂无

代码示例

代码示例来源:origin: SeldonIO/seldon-server

JavaRDD<String> dataSet = jsc.textFile(JobUtils.getSourceDirFromDate(cmdLineArgs.input_path_pattern, cmdLineArgs.input_date_string)).repartition(4);

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void repartition() {
 // Shrinking number of partitions
 JavaRDD<Integer> in1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 2);
 JavaRDD<Integer> repartitioned1 = in1.repartition(4);
 List<List<Integer>> result1 = repartitioned1.glom().collect();
 assertEquals(4, result1.size());
 for (List<Integer> l : result1) {
  assertFalse(l.isEmpty());
 }
 // Growing number of partitions
 JavaRDD<Integer> in2 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 4);
 JavaRDD<Integer> repartitioned2 = in2.repartition(2);
 List<List<Integer>> result2 = repartitioned2.glom().collect();
 assertEquals(2, result2.size());
 for (List<Integer> l: result2) {
  assertFalse(l.isEmpty());
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void repartition() {
 // Shrinking number of partitions
 JavaRDD<Integer> in1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 2);
 JavaRDD<Integer> repartitioned1 = in1.repartition(4);
 List<List<Integer>> result1 = repartitioned1.glom().collect();
 assertEquals(4, result1.size());
 for (List<Integer> l : result1) {
  assertFalse(l.isEmpty());
 }
 // Growing number of partitions
 JavaRDD<Integer> in2 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 4);
 JavaRDD<Integer> repartitioned2 = in2.repartition(2);
 List<List<Integer>> result2 = repartitioned2.glom().collect();
 assertEquals(2, result2.size());
 for (List<Integer> l: result2) {
  assertFalse(l.isEmpty());
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void repartition() {
 // Shrinking number of partitions
 JavaRDD<Integer> in1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 2);
 JavaRDD<Integer> repartitioned1 = in1.repartition(4);
 List<List<Integer>> result1 = repartitioned1.glom().collect();
 assertEquals(4, result1.size());
 for (List<Integer> l : result1) {
  assertFalse(l.isEmpty());
 }
 // Growing number of partitions
 JavaRDD<Integer> in2 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), 4);
 JavaRDD<Integer> repartitioned2 = in2.repartition(2);
 List<List<Integer>> result2 = repartitioned2.glom().collect();
 assertEquals(2, result2.size());
 for (List<Integer> l: result2) {
  assertFalse(l.isEmpty());
 }
}

代码示例来源:origin: com.davidbracewell/mango

@Override
public SparkStream<T> repartition(int numPartitions) {
 return new SparkStream<>(rdd.repartition(numPartitions));
}

代码示例来源:origin: deepspark/deepspark

public void prepareLocal(JavaRDD<Record> data, final String output, int workerSize) {
  JavaRDD<Record> reparted_data = data.repartition(workerSize);

代码示例来源:origin: deepspark/deepspark

public void prepareLocalByte(JavaRDD<ByteRecord> data, final String output, int workerSize) {
  JavaRDD<ByteRecord> reparted_data = data.repartition(workerSize);

代码示例来源:origin: phuonglh/vn.vitk

break;
case TEXT:
  toTaggedSentence(output).repartition(1).saveAsTextFile(outputFileName);

代码示例来源:origin: edu.usc.ir/age-predictor-cli

}).repartition(8);

代码示例来源:origin: uber/marmaray

final int desiredDigit = (int) Math.floor(Math.log10(partNum) + 1);
this.digitNum = desiredDigit > DEFAULT_DIGIT_NUM ? desiredDigit : DEFAULT_DIGIT_NUM;
final JavaRDD<String> dataRepartitioned = convertedData.repartition(partNum);
final JavaRDD<String> dataToWrite;
if (this.conf.isColumnHeader()) {

代码示例来源:origin: cloudera-labs/envelope

@Override
public Dataset<Row> read() throws Exception {    
 JavaRDD<Long> baseRDD = Contexts.getSparkSession().range(tasks).javaRDD().repartition(tasks);
 
 JavaRDD<Row> fixRDD = baseRDD.flatMap(new GenerateFIXMessages(ordersPerTask));
 
 StructType schema = DataTypes.createStructType(Lists.newArrayList(DataTypes.createStructField("fix", DataTypes.StringType, false)));
 
 Dataset<Row> fixDF = Contexts.getSparkSession().createDataFrame(fixRDD, schema);
 
 return fixDF;
}

代码示例来源:origin: org.qcri.rheem/rheem-iejoin

in.getHeadTupleValue(), in.getTailTupleData().getValue(),
          refMinMax._1(), refMinMax._2());
    }).repartition(1);
JavaRDD<Tuple5<Long, Type0, Type0, Type1, Type1>> rdd2TinyObjects = listObjectDataRDD2
    .map(in -> {
          in.getHeadTupleValue(), in.getTailTupleData().getValue(),
          refMinMax._1(), refMinMax._2());
    }).repartition(1);

代码示例来源:origin: org.qcri.rheem/rheem-iejoin

in.getHeadTupleValue(), in.getTailTupleData().getValue(),
      refMinMax._1(), refMinMax._2());
}).repartition(1);

相关文章

微信公众号

最新文章

更多