org.apache.spark.api.java.JavaRDD.mapToPair()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(8.3k)|赞(0)|评价(0)|浏览(129)

本文整理了Java中org.apache.spark.api.java.JavaRDD.mapToPair()方法的一些代码示例,展示了JavaRDD.mapToPair()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.mapToPair()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:mapToPair

JavaRDD.mapToPair介绍

暂无

代码示例

代码示例来源:origin: OryxProject/oryx

private static JavaPairRDD<String,float[]> readFeaturesRDD(JavaSparkContext sparkContext, Path path) {
 log.info("Loading features RDD from {}", path);
 JavaRDD<String> featureLines = sparkContext.textFile(path.toString());
 return featureLines.mapToPair(line -> {
  List<?> update = TextUtils.readJSON(line, List.class);
  String key = update.get(0).toString();
  float[] vector = TextUtils.convertViaJSON(update.get(1), float[].class);
  return new Tuple2<>(key, vector);
 });
}

代码示例来源:origin: databricks/learning-spark

public void run(String master, String csv1, String csv2) throws Exception {
    JavaSparkContext sc = new JavaSparkContext(
   master, "basicjoincsv", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> csvFile1 = sc.textFile(csv1);
  JavaRDD<String> csvFile2 = sc.textFile(csv2);
  JavaPairRDD<Integer, String[]> keyedRDD1 = csvFile1.mapToPair(new ParseLine());
  JavaPairRDD<Integer, String[]> keyedRDD2 = csvFile1.mapToPair(new ParseLine());
  JavaPairRDD<Integer, Tuple2<String[], String[]>> result = keyedRDD1.join(keyedRDD2);
  List<Tuple2<Integer, Tuple2<String[], String[]>>> resultCollection = result.collect();
  }
}

代码示例来源:origin: databricks/learning-spark

public static final JavaPairRDD<String, Long> ipAddressCount(
  JavaRDD<ApacheAccessLog> accessLogRDD) {
 return accessLogRDD
  .mapToPair(new IpTuple())
  .reduceByKey(new LongSumReducer());
}

代码示例来源:origin: databricks/learning-spark

public static final JavaPairRDD<Integer, Long> responseCodeCount(
  JavaRDD<ApacheAccessLog> accessLogRDD) {
 return accessLogRDD
  .mapToPair(new ResponseCodeTuple())
  .reduceByKey(new LongSumReducer());
}

代码示例来源:origin: databricks/learning-spark

public static final JavaPairRDD<String, Long> endpointCount(
   JavaRDD<ApacheAccessLog> accessLogRDD) {
  return accessLogRDD
   .mapToPair(new EndPointTuple())
   .reduceByKey(new LongSumReducer());
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
 JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
 JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
 JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
 assertEquals(Arrays.asList(
  new Tuple2<>(1, 1),
  new Tuple2<>(0, 2),
  new Tuple2<>(1, 3),
  new Tuple2<>(0, 4)), rdd3.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
 JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
 JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
 JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
 assertEquals(Arrays.asList(
  new Tuple2<>(1, 1),
  new Tuple2<>(0, 2),
  new Tuple2<>(1, 3),
  new Tuple2<>(0, 4)), rdd3.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@SuppressWarnings("unchecked")
@Test
public void mapOnPairRDD() {
 JavaRDD<Integer> rdd1 = sc.parallelize(Arrays.asList(1,2,3,4));
 JavaPairRDD<Integer, Integer> rdd2 = rdd1.mapToPair(i -> new Tuple2<>(i, i % 2));
 JavaPairRDD<Integer, Integer> rdd3 = rdd2.mapToPair(in -> new Tuple2<>(in._2(), in._1()));
 assertEquals(Arrays.asList(
  new Tuple2<>(1, 1),
  new Tuple2<>(0, 2),
  new Tuple2<>(1, 3),
  new Tuple2<>(0, 4)), rdd3.collect());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void collectAsMapWithIntArrayValues() {
 // Regression test for SPARK-1040
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
 JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
 pairRDD.collect();  // Works fine
 pairRDD.collectAsMap();  // Used to crash with ClassCastException
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
 public void collectAsMapWithIntArrayValues() {
  // Regression test for SPARK-1040
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
  JavaPairRDD<Integer, int[]> pairRDD =
   rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
  pairRDD.collect();  // Works fine
  pairRDD.collectAsMap();  // Used to crash with ClassCastException
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
 public void collectAsMapWithIntArrayValues() {
  // Regression test for SPARK-1040
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
  JavaPairRDD<Integer, int[]> pairRDD =
   rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
  pairRDD.collect();  // Works fine
  pairRDD.collectAsMap();  // Used to crash with ClassCastException
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void collectAsMapWithIntArrayValues() {
 // Regression test for SPARK-1040
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
 JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
 pairRDD.collect();  // Works fine
 pairRDD.collectAsMap();  // Used to crash with ClassCastException
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
 public void collectAsMapWithIntArrayValues() {
  // Regression test for SPARK-1040
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
  JavaPairRDD<Integer, int[]> pairRDD =
   rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
  pairRDD.collect();  // Works fine
  pairRDD.collectAsMap();  // Used to crash with ClassCastException
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void collectAsMapWithIntArrayValues() {
 // Regression test for SPARK-1040
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1));
 JavaPairRDD<Integer, int[]> pairRDD = rdd.mapToPair(x -> new Tuple2<>(x, new int[]{x}));
 pairRDD.collect();  // Works fine
 pairRDD.collectAsMap();  // Used to crash with ClassCastException
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(Integer::doubleValue).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x)).cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void map() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache();
 doubles.collect();
 JavaPairRDD<Integer, Integer> pairs = rdd.mapToPair(x -> new Tuple2<>(x, x))
  .cache();
 pairs.collect();
 JavaRDD<String> strings = rdd.map(Object::toString).cache();
 strings.collect();
}

相关文章

微信公众号

最新文章

更多