本文整理了Java中org.apache.spark.api.java.JavaSparkContext.parallelize()
方法的一些代码示例,展示了JavaSparkContext.parallelize()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaSparkContext.parallelize()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaSparkContext
类名称:JavaSparkContext
方法名:parallelize
暂无
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
String master;
if (args.length > 0) {
master = args[0];
} else {
master = "local";
}
JavaSparkContext sc = new JavaSparkContext(
master, "basicmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
JavaRDD<Integer> result = rdd.map(
new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}});
System.out.println(StringUtils.join(result.collect(), ","));
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void mapPartitions() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
});
assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void countApproxDistinct() {
List<Integer> arrayData = new ArrayList<>();
int size = 100;
for (int i = 0; i < 100000; i++) {
arrayData.add(i % size);
}
JavaRDD<Integer> simpleRdd = sc.parallelize(arrayData, 10);
assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.05) - size) / (size * 1.0)) <= 0.1);
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void mapPartitionsWithIndex() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitionsWithIndex((index, iter) -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
}, false);
assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void countApproxDistinct() {
List<Integer> arrayData = new ArrayList<>();
int size = 100;
for (int i = 0; i < 100000; i++) {
arrayData.add(i % size);
}
JavaRDD<Integer> simpleRdd = sc.parallelize(arrayData, 10);
assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.05) - size) / (size * 1.0)) <= 0.1);
}
代码示例来源:origin: databricks/learning-spark
public static void main(String[] args) throws Exception {
String master;
if (args.length > 0) {
master = args[0];
} else {
master = "local";
}
JavaSparkContext sc = new JavaSparkContext(
master, "basicmapfilter", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
JavaRDD<Integer> squared = rdd.map(
new Function<Integer, Integer>() { public Integer call(Integer x) { return x*x;}});
JavaRDD<Integer> result = squared.filter(
new Function<Integer, Boolean>() { public Boolean call(Integer x) { return x != 1; }});
System.out.println(StringUtils.join(result.collect(), ","));
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void mapPartitions() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
});
Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void countApproxDistinct() {
List<Integer> arrayData = new ArrayList<>();
int size = 100;
for (int i = 0; i < 100000; i++) {
arrayData.add(i % size);
}
JavaRDD<Integer> simpleRdd = sc.parallelize(arrayData, 10);
assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.05) - size) / (size * 1.0)) <= 0.1);
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void mapPartitions() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
});
assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.11
@Test
public void runRidgeRegressionUsingStaticMethods() {
int numExamples = 50;
int numFeatures = 20;
List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0);
JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples));
List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);
RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0);
double unRegularizedErr = predictionError(validationData, model);
model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.1);
double regularizedErr = predictionError(validationData, model);
Assert.assertTrue(regularizedErr < unRegularizedErr);
}
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void mapPartitionsWithIndex() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitionsWithIndex((index, iter) -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
}, false);
assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-mllib
@Test
public void runRidgeRegressionUsingStaticMethods() {
int numExamples = 50;
int numFeatures = 20;
List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0);
JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples));
List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);
RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0);
double unRegularizedErr = predictionError(validationData, model);
model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.1);
double regularizedErr = predictionError(validationData, model);
Assert.assertTrue(regularizedErr < unRegularizedErr);
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void toLocalIterator() {
List<Integer> correct = Arrays.asList(1, 2, 3, 4);
JavaRDD<Integer> rdd = sc.parallelize(correct);
List<Integer> result = Lists.newArrayList(rdd.toLocalIterator());
assertEquals(correct, result);
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.10
@Test
public void runLassoUsingStaticMethods() {
int nPoints = 10000;
double A = 0.0;
double[] weights = {-1.5, 1.0e-2};
JavaRDD<LabeledPoint> testRDD = jsc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
weights, nPoints, 42, 0.1), 2).cache();
List<LabeledPoint> validationData =
LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
LassoModel model = LassoWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0);
int numAccurate = validatePrediction(validationData, model);
Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void treeReduce() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(-5, -4, -3, -2, -1, 1, 2, 3, 4), 10);
Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
for (int depth = 1; depth <= 10; depth++) {
int sum = rdd.treeReduce(add, depth);
assertEquals(-5, sum);
}
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void top() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
List<Integer> top2 = rdd.top(2);
assertEquals(Arrays.asList(4, 3), top2);
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void cartesian() {
JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
JavaRDD<String> stringRDD = sc.parallelize(Arrays.asList("Hello", "World"));
JavaPairRDD<String, Double> cartesian = stringRDD.cartesian(doubleRDD);
assertEquals(new Tuple2<>("Hello", 1.0), cartesian.first());
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void foreachWithAnonymousClass() {
foreachCalls = 0;
JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
rdd.foreach(s -> foreachCalls++);
Assert.assertEquals(2, foreachCalls);
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void mapPartitions() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
});
Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
}
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void mapPartitions() {
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
JavaRDD<Integer> partitionSums = rdd.mapPartitions(iter -> {
int sum = 0;
while (iter.hasNext()) {
sum += iter.next();
}
return Collections.singletonList(sum).iterator();
});
Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
}
内容来源于网络,如有侵权,请联系作者删除!