org.apache.spark.api.java.JavaSparkContext.parallelize()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(8.7k)|赞(0)|评价(0)|浏览(143)

本文整理了Java中org.apache.spark.api.java.JavaSparkContext.parallelize()方法的一些代码示例,展示了JavaSparkContext.parallelize()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.parallelize()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:parallelize

JavaSparkContext.parallelize介绍

暂无

代码示例

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
  String master;
  if (args.length > 0) {
   master = args[0];
  } else {
   master = "local";
  }
  JavaSparkContext sc = new JavaSparkContext(
   master, "basicmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
  JavaRDD<Integer> result = rdd.map(
   new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}});
  System.out.println(StringUtils.join(result.collect(), ","));
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void mapPartitions() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
   int sum = 0;
   while (iter.hasNext()) {
    sum += iter.next();
   }
   return Collections.singletonList(sum).iterator();
  });
 assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void countApproxDistinct() {
 List<Integer> arrayData = new ArrayList<>();
 int size = 100;
 for (int i = 0; i < 100000; i++) {
  arrayData.add(i % size);
 }
 JavaRDD<Integer> simpleRdd = sc.parallelize(arrayData, 10);
 assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.05) - size) / (size * 1.0)) <= 0.1);
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void mapPartitionsWithIndex() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitionsWithIndex((index, iter) -> {
   int sum = 0;
   while (iter.hasNext()) {
    sum += iter.next();
   }
   return Collections.singletonList(sum).iterator();
  }, false);
 assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void countApproxDistinct() {
 List<Integer> arrayData = new ArrayList<>();
 int size = 100;
 for (int i = 0; i < 100000; i++) {
  arrayData.add(i % size);
 }
 JavaRDD<Integer> simpleRdd = sc.parallelize(arrayData, 10);
 assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.05) - size) / (size * 1.0)) <= 0.1);
}

代码示例来源:origin: databricks/learning-spark

public static void main(String[] args) throws Exception {
    String master;
    if (args.length > 0) {
   master = args[0];
    } else {
      master = "local";
    }
    JavaSparkContext sc = new JavaSparkContext(
   master, "basicmapfilter", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
  JavaRDD<Integer> squared = rdd.map(
   new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}});
  JavaRDD<Integer> result = squared.filter(
   new Function<Integer, Boolean>() { public Boolean call(Integer x) { return x != 1; }});
  System.out.println(StringUtils.join(result.collect(), ","));
  }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void mapPartitions() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
  int sum = 0;
  while (iter.hasNext()) {
   sum += iter.next();
  }
  return Collections.singletonList(sum).iterator();
 });
 Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void countApproxDistinct() {
 List<Integer> arrayData = new ArrayList<>();
 int size = 100;
 for (int i = 0; i < 100000; i++) {
  arrayData.add(i % size);
 }
 JavaRDD<Integer> simpleRdd = sc.parallelize(arrayData, 10);
 assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.05) - size) / (size * 1.0)) <= 0.1);
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void mapPartitions() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
   int sum = 0;
   while (iter.hasNext()) {
    sum += iter.next();
   }
   return Collections.singletonList(sum).iterator();
  });
 assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.11

@Test
 public void runRidgeRegressionUsingStaticMethods() {
  int numExamples = 50;
  int numFeatures = 20;
  List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0);

  JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples));
  List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);

  RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0);
  double unRegularizedErr = predictionError(validationData, model);

  model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.1);
  double regularizedErr = predictionError(validationData, model);

  Assert.assertTrue(regularizedErr < unRegularizedErr);
 }
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void mapPartitionsWithIndex() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitionsWithIndex((index, iter) -> {
   int sum = 0;
   while (iter.hasNext()) {
    sum += iter.next();
   }
   return Collections.singletonList(sum).iterator();
  }, false);
 assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-mllib

@Test
 public void runRidgeRegressionUsingStaticMethods() {
  int numExamples = 50;
  int numFeatures = 20;
  List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0);

  JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples));
  List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);

  RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0);
  double unRegularizedErr = predictionError(validationData, model);

  model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.1);
  double regularizedErr = predictionError(validationData, model);

  Assert.assertTrue(regularizedErr < unRegularizedErr);
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void toLocalIterator() {
 List<Integer> correct = Arrays.asList(1, 2, 3, 4);
 JavaRDD<Integer> rdd = sc.parallelize(correct);
 List<Integer> result = Lists.newArrayList(rdd.toLocalIterator());
 assertEquals(correct, result);
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.10

@Test
public void runLassoUsingStaticMethods() {
 int nPoints = 10000;
 double A = 0.0;
 double[] weights = {-1.5, 1.0e-2};
 JavaRDD<LabeledPoint> testRDD = jsc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
  weights, nPoints, 42, 0.1), 2).cache();
 List<LabeledPoint> validationData =
  LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 LassoModel model = LassoWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0);
 int numAccurate = validatePrediction(validationData, model);
 Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void treeReduce() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(-5, -4, -3, -2, -1, 1, 2, 3, 4), 10);
 Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
 for (int depth = 1; depth <= 10; depth++) {
  int sum = rdd.treeReduce(add, depth);
  assertEquals(-5, sum);
 }
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void top() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
 List<Integer> top2 = rdd.top(2);
 assertEquals(Arrays.asList(4, 3), top2);
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void cartesian() {
 JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
 JavaRDD<String> stringRDD = sc.parallelize(Arrays.asList("Hello", "World"));
 JavaPairRDD<String, Double> cartesian = stringRDD.cartesian(doubleRDD);
 assertEquals(new Tuple2<>("Hello", 1.0), cartesian.first());
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void foreachWithAnonymousClass() {
 foreachCalls = 0;
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
 rdd.foreach(s -> foreachCalls++);
 Assert.assertEquals(2, foreachCalls);
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void mapPartitions() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
  int sum = 0;
  while (iter.hasNext()) {
   sum += iter.next();
  }
  return Collections.singletonList(sum).iterator();
 });
 Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
}

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void mapPartitions() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
 JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
  int sum = 0;
  while (iter.hasNext()) {
   sum += iter.next();
  }
  return Collections.singletonList(sum).iterator();
 });
 Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
}

相关文章

微信公众号

最新文章

更多