org.apache.spark.api.java.JavaPairRDD.mapPartitionsToPair()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(8.8k)|赞(0)|评价(0)|浏览(102)

本文整理了Java中org.apache.spark.api.java.JavaPairRDD.mapPartitionsToPair()方法的一些代码示例,展示了JavaPairRDD.mapPartitionsToPair()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaPairRDD.mapPartitionsToPair()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaPairRDD
类名称:JavaPairRDD
方法名:mapPartitionsToPair

JavaPairRDD.mapPartitionsToPair介绍

暂无

代码示例

代码示例来源:origin: apache/hive

@Override
public JavaPairRDD<HiveKey, BytesWritable> doTransform(
  JavaPairRDD<HiveKey, V> input) {
 return input.mapPartitionsToPair(reduceFunc);
}

代码示例来源:origin: apache/hive

@Override
public JavaPairRDD<HiveKey, BytesWritable> doTransform(
  JavaPairRDD<BytesWritable, BytesWritable> input) {
 return input.mapPartitionsToPair(mapFunc);
}

代码示例来源:origin: apache/drill

@Override
public JavaPairRDD<HiveKey, BytesWritable> doTransform(
  JavaPairRDD<BytesWritable, BytesWritable> input) {
 return input.mapPartitionsToPair(mapFunc);
}

代码示例来源:origin: apache/drill

@Override
public JavaPairRDD<HiveKey, BytesWritable> doTransform(
  JavaPairRDD<HiveKey, V> input) {
 return input.mapPartitionsToPair(reduceFunc);
}

代码示例来源:origin: apache/kylin

.mapPartitionsToPair(new MultiOutputFunction(cubeName, metaUrl, sConf, samplingPercent));

代码示例来源:origin: apache/tinkerpop

public static JavaPairRDD<Object, VertexWritable> applyGraphFilter(final JavaPairRDD<Object, VertexWritable> graphRDD, final GraphFilter graphFilter) {
  return graphRDD.mapPartitionsToPair(partitionIterator -> {
    final GraphFilter gFilter = graphFilter.clone();
    return IteratorUtils.filter(partitionIterator, tuple -> (tuple._2().get().applyGraphFilter(gFilter)).isPresent());
  }, true);
}

代码示例来源:origin: apache/tinkerpop

public static <K, V> JavaPairRDD<K, V> executeMap(
    final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce,
    final Configuration graphComputerConfiguration) {
  JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
  if (mapReduce.getMapKeySort().isPresent())
    mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1);
  return mapRDD;
}

代码示例来源:origin: apache/tinkerpop

public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeCombine(final JavaPairRDD<K, V> mapRDD,
                                final Configuration graphComputerConfiguration) {
  return mapRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new CombineIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
}

代码示例来源:origin: apache/tinkerpop

public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(
      final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce,
      final Configuration graphComputerConfiguration) {
    JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> {
      KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
      return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
    });
    if (mapReduce.getReduceKeySort().isPresent())
      reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);
    return reduceRDD;
  }
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

@Override
public JavaPairRDD<HiveKey, BytesWritable> transform(
  JavaPairRDD<HiveKey, Iterable<BytesWritable>> input) {
 return input.mapPartitionsToPair(reduceFunc);
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

@Override
public JavaPairRDD<HiveKey, BytesWritable> transform(
  JavaPairRDD<BytesWritable, BytesWritable> input) {
 return input.mapPartitionsToPair(mapFunc);
}

代码示例来源:origin: apache/tinkerpop

.mapPartitionsToPair(partitionIterator -> {
  KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);

代码示例来源:origin: ai.grakn/grakn-kb

public static JavaPairRDD<Object, VertexWritable> applyGraphFilter(final JavaPairRDD<Object, VertexWritable> graphRDD, final GraphFilter graphFilter) {
  return graphRDD.mapPartitionsToPair(partitionIterator -> {
    final GraphFilter gFilter = graphFilter.clone();
    return IteratorUtils.filter(partitionIterator, tuple -> (tuple._2().get().applyGraphFilter(gFilter)).isPresent());
  }, true);
}

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

public static JavaPairRDD<Object, VertexWritable> applyGraphFilter(final JavaPairRDD<Object, VertexWritable> graphRDD, final GraphFilter graphFilter) {
  return graphRDD.mapPartitionsToPair(partitionIterator -> {
    final GraphFilter gFilter = graphFilter.clone();
    return IteratorUtils.filter(partitionIterator, tuple -> (tuple._2().get().applyGraphFilter(gFilter)).isPresent());
  }, true);
}

代码示例来源:origin: ai.grakn/grakn-kb

public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeCombine(final JavaPairRDD<K, V> mapRDD,
                                final Configuration graphComputerConfiguration) {
  return mapRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new CombineIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
}

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

public static <K, V> JavaPairRDD<K, V> executeMap(
    final JavaPairRDD<Object, VertexWritable> graphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce,
    final Configuration graphComputerConfiguration) {
  JavaPairRDD<K, V> mapRDD = graphRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new MapIterator<>(MapReduce.<MapReduce<K, V, ?, ?, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
  if (mapReduce.getMapKeySort().isPresent())
    mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get(), true, 1);
  return mapRDD;
}

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeCombine(final JavaPairRDD<K, V> mapRDD,
                                final Configuration graphComputerConfiguration) {
  return mapRDD.mapPartitionsToPair(partitionIterator -> {
    KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
    return new CombineIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
  });
}

代码示例来源:origin: org.apache.tinkerpop/spark-gremlin

public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(
      final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce,
      final Configuration graphComputerConfiguration) {
    JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> {
      KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
      return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
    });
    if (mapReduce.getReduceKeySort().isPresent())
      reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);
    return reduceRDD;
  }
}

代码示例来源:origin: ai.grakn/grakn-kb

public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(
      final JavaPairRDD<K, V> mapOrCombineRDD, final MapReduce<K, V, OK, OV, ?> mapReduce,
      final Configuration graphComputerConfiguration) {
    JavaPairRDD<OK, OV> reduceRDD = mapOrCombineRDD.groupByKey().mapPartitionsToPair(partitionIterator -> {
      KryoShimServiceLoader.applyConfiguration(graphComputerConfiguration);
      return new ReduceIterator<>(MapReduce.<MapReduce<K, V, OK, OV, ?>>createMapReduce(HadoopGraph.open(graphComputerConfiguration), graphComputerConfiguration), partitionIterator);
    });
    if (mapReduce.getReduceKeySort().isPresent()){
      reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get(), true, 1);}
    return reduceRDD;
  }
}

代码示例来源:origin: cloudera-labs/envelope

private JavaRDD<Row> planMutationsByKey(Dataset<Row> arriving, List<String> keyFieldNames,
                    Config plannerConfig, Config outputConfig) {
 JavaPairRDD<Row, Row> keyedArriving = 
   arriving.javaRDD().keyBy(new ExtractKeyFunction(keyFieldNames, accumulators));
 JavaPairRDD<Row, Iterable<Row>> arrivingByKey = 
   keyedArriving.groupByKey(getPartitioner(keyedArriving));
 JavaPairRDD<Row, Tuple2<Iterable<Row>, Iterable<Row>>> arrivingAndExistingByKey =
   arrivingByKey.mapPartitionsToPair(new JoinExistingForKeysFunction(outputConfig, keyFieldNames, accumulators));
 JavaRDD<Row> planned = 
   arrivingAndExistingByKey.flatMap(new PlanForKeyFunction(plannerConfig, accumulators));
 return planned;
}

相关文章

微信公众号

最新文章

更多

JavaPairRDD类方法