本文整理了Java中org.apache.spark.api.java.JavaRDD.randomSplit()
方法的一些代码示例,展示了JavaRDD.randomSplit()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.randomSplit()
方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:randomSplit
暂无
代码示例来源:origin: org.apache.spark/spark-core_2.10
@Test
public void randomSplit() {
List<Integer> ints = new ArrayList<>(1000);
for (int i = 0; i < 1000; i++) {
ints.add(i);
}
JavaRDD<Integer> rdd = sc.parallelize(ints);
JavaRDD<Integer>[] splits = rdd.randomSplit(new double[] { 0.4, 0.6, 1.0 }, 31);
// the splits aren't perfect -- not enough data for them to be -- just check they're about right
assertEquals(3, splits.length);
long s0 = splits[0].count();
long s1 = splits[1].count();
long s2 = splits[2].count();
assertTrue(s0 + " not within expected range", s0 > 150 && s0 < 250);
assertTrue(s1 + " not within expected range", s1 > 250 && s0 < 350);
assertTrue(s2 + " not within expected range", s2 > 430 && s2 < 570);
}
代码示例来源:origin: org.apache.spark/spark-core_2.11
@Test
public void randomSplit() {
List<Integer> ints = new ArrayList<>(1000);
for (int i = 0; i < 1000; i++) {
ints.add(i);
}
JavaRDD<Integer> rdd = sc.parallelize(ints);
JavaRDD<Integer>[] splits = rdd.randomSplit(new double[] { 0.4, 0.6, 1.0 }, 31);
// the splits aren't perfect -- not enough data for them to be -- just check they're about right
assertEquals(3, splits.length);
long s0 = splits[0].count();
long s1 = splits[1].count();
long s2 = splits[2].count();
assertTrue(s0 + " not within expected range", s0 > 150 && s0 < 250);
assertTrue(s1 + " not within expected range", s1 > 250 && s0 < 350);
assertTrue(s2 + " not within expected range", s2 > 430 && s2 < 570);
}
代码示例来源:origin: org.apache.spark/spark-core
@Test
public void randomSplit() {
List<Integer> ints = new ArrayList<>(1000);
for (int i = 0; i < 1000; i++) {
ints.add(i);
}
JavaRDD<Integer> rdd = sc.parallelize(ints);
JavaRDD<Integer>[] splits = rdd.randomSplit(new double[] { 0.4, 0.6, 1.0 }, 31);
// the splits aren't perfect -- not enough data for them to be -- just check they're about right
assertEquals(3, splits.length);
long s0 = splits[0].count();
long s1 = splits[1].count();
long s2 = splits[2].count();
assertTrue(s0 + " not within expected range", s0 > 150 && s0 < 250);
assertTrue(s1 + " not within expected range", s1 > 250 && s0 < 350);
assertTrue(s2 + " not within expected range", s2 > 430 && s2 < 570);
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
public static <T> List<JavaRDD<T>> splitData(SplitStrategy splitStrategy, JavaRDD<T> data, long seed) {
if (splitStrategy instanceof RandomSplit) {
RandomSplit rs = (RandomSplit) splitStrategy;
double fractionTrain = rs.getFractionTrain();
double[] splits = new double[] {fractionTrain, 1.0 - fractionTrain};
JavaRDD<T>[] split = data.randomSplit(splits, seed);
List<JavaRDD<T>> list = new ArrayList<>(2);
Collections.addAll(list, split);
return list;
} else {
throw new RuntimeException("Not yet implemented");
}
}
代码示例来源:origin: org.datavec/datavec-spark
public static <T> List<JavaRDD<T>> splitData(SplitStrategy splitStrategy, JavaRDD<T> data, long seed) {
if (splitStrategy instanceof RandomSplit) {
RandomSplit rs = (RandomSplit) splitStrategy;
double fractionTrain = rs.getFractionTrain();
double[] splits = new double[] {fractionTrain, 1.0 - fractionTrain};
JavaRDD<T>[] split = data.randomSplit(splits, seed);
List<JavaRDD<T>> list = new ArrayList<>(2);
Collections.addAll(list, split);
return list;
} else {
throw new RuntimeException("Not yet implemented");
}
}
代码示例来源:origin: org.datavec/datavec-spark
public static void exportCSVLocal(String outputDir, String baseFileName, int numFiles, String delimiter,
String quote, JavaRDD<List<Writable>> data, int rngSeed) throws Exception {
JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote));
double[] split = new double[numFiles];
for (int i = 0; i < split.length; i++)
split[i] = 1.0 / numFiles;
JavaRDD<String>[] splitData = lines.randomSplit(split);
int count = 0;
Random r = new Random(rngSeed);
for (JavaRDD<String> subset : splitData) {
String path = FilenameUtils.concat(outputDir, baseFileName + (count++) + ".csv");
List<String> linesList = subset.collect();
if (!(linesList instanceof ArrayList))
linesList = new ArrayList<>(linesList);
Collections.shuffle(linesList, r);
FileUtils.writeLines(new File(path), linesList);
}
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
public static void exportCSVLocal(String outputDir, String baseFileName, int numFiles, String delimiter,
String quote, JavaRDD<List<Writable>> data, int rngSeed) throws Exception {
JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote));
double[] split = new double[numFiles];
for (int i = 0; i < split.length; i++)
split[i] = 1.0 / numFiles;
JavaRDD<String>[] splitData = lines.randomSplit(split);
int count = 0;
Random r = new Random(rngSeed);
for (JavaRDD<String> subset : splitData) {
String path = FilenameUtils.concat(outputDir, baseFileName + (count++) + ".csv");
List<String> linesList = subset.collect();
if (!(linesList instanceof ArrayList))
linesList = new ArrayList<>(linesList);
Collections.shuffle(linesList, r);
FileUtils.writeLines(new File(path), linesList);
}
}
代码示例来源:origin: org.datavec/datavec-spark
public static void exportCSVLocal(String outputDir, String baseFileName, int numFiles, String delimiter,
String quote, JavaRDD<List<Writable>> data) throws Exception {
JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote));
double[] split = new double[numFiles];
for (int i = 0; i < split.length; i++)
split[i] = 1.0 / numFiles;
JavaRDD<String>[] splitData = lines.randomSplit(split);
int count = 0;
for (JavaRDD<String> subset : splitData) {
String path = FilenameUtils.concat(outputDir, baseFileName + (count++) + ".csv");
// subset.saveAsTextFile(path);
List<String> linesList = subset.collect();
FileUtils.writeLines(new File(path), linesList);
}
}
代码示例来源:origin: org.datavec/datavec-spark_2.11
public static void exportCSVLocal(String outputDir, String baseFileName, int numFiles, String delimiter,
String quote, JavaRDD<List<Writable>> data) throws Exception {
JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote));
double[] split = new double[numFiles];
for (int i = 0; i < split.length; i++)
split[i] = 1.0 / numFiles;
JavaRDD<String>[] splitData = lines.randomSplit(split);
int count = 0;
for (JavaRDD<String> subset : splitData) {
String path = FilenameUtils.concat(outputDir, baseFileName + (count++) + ".csv");
// subset.saveAsTextFile(path);
List<String> linesList = subset.collect();
FileUtils.writeLines(new File(path), linesList);
}
}
代码示例来源:origin: bhdrkn/Java-Examples
JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.6, 0.4}, 11L);
JavaRDD<LabeledPoint> training = splits[0].cache();
JavaRDD<LabeledPoint> test = splits[1];
内容来源于网络,如有侵权,请联系作者删除!