org.apache.spark.api.java.JavaRDD.randomSplit()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(6.8k)|赞(0)|评价(0)|浏览(104)

本文整理了Java中org.apache.spark.api.java.JavaRDD.randomSplit()方法的一些代码示例,展示了JavaRDD.randomSplit()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。JavaRDD.randomSplit()方法的具体详情如下:
包路径:org.apache.spark.api.java.JavaRDD
类名称:JavaRDD
方法名:randomSplit

JavaRDD.randomSplit介绍

暂无

代码示例

代码示例来源:origin: org.apache.spark/spark-core_2.10

@Test
public void randomSplit() {
 List<Integer> ints = new ArrayList<>(1000);
 for (int i = 0; i < 1000; i++) {
  ints.add(i);
 }
 JavaRDD<Integer> rdd = sc.parallelize(ints);
 JavaRDD<Integer>[] splits = rdd.randomSplit(new double[] { 0.4, 0.6, 1.0 }, 31);
 // the splits aren't perfect -- not enough data for them to be -- just check they're about right
 assertEquals(3, splits.length);
 long s0 = splits[0].count();
 long s1 = splits[1].count();
 long s2 = splits[2].count();
 assertTrue(s0 + " not within expected range", s0 > 150 && s0 < 250);
 assertTrue(s1 + " not within expected range", s1 > 250 && s0 < 350);
 assertTrue(s2 + " not within expected range", s2 > 430 && s2 < 570);
}

代码示例来源:origin: org.apache.spark/spark-core_2.11

@Test
public void randomSplit() {
 List<Integer> ints = new ArrayList<>(1000);
 for (int i = 0; i < 1000; i++) {
  ints.add(i);
 }
 JavaRDD<Integer> rdd = sc.parallelize(ints);
 JavaRDD<Integer>[] splits = rdd.randomSplit(new double[] { 0.4, 0.6, 1.0 }, 31);
 // the splits aren't perfect -- not enough data for them to be -- just check they're about right
 assertEquals(3, splits.length);
 long s0 = splits[0].count();
 long s1 = splits[1].count();
 long s2 = splits[2].count();
 assertTrue(s0 + " not within expected range", s0 > 150 && s0 < 250);
 assertTrue(s1 + " not within expected range", s1 > 250 && s0 < 350);
 assertTrue(s2 + " not within expected range", s2 > 430 && s2 < 570);
}

代码示例来源:origin: org.apache.spark/spark-core

@Test
public void randomSplit() {
 List<Integer> ints = new ArrayList<>(1000);
 for (int i = 0; i < 1000; i++) {
  ints.add(i);
 }
 JavaRDD<Integer> rdd = sc.parallelize(ints);
 JavaRDD<Integer>[] splits = rdd.randomSplit(new double[] { 0.4, 0.6, 1.0 }, 31);
 // the splits aren't perfect -- not enough data for them to be -- just check they're about right
 assertEquals(3, splits.length);
 long s0 = splits[0].count();
 long s1 = splits[1].count();
 long s2 = splits[2].count();
 assertTrue(s0 + " not within expected range", s0 > 150 && s0 < 250);
 assertTrue(s1 + " not within expected range", s1 > 250 && s0 < 350);
 assertTrue(s2 + " not within expected range", s2 > 430 && s2 < 570);
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

public static <T> List<JavaRDD<T>> splitData(SplitStrategy splitStrategy, JavaRDD<T> data, long seed) {
  if (splitStrategy instanceof RandomSplit) {
    RandomSplit rs = (RandomSplit) splitStrategy;
    double fractionTrain = rs.getFractionTrain();
    double[] splits = new double[] {fractionTrain, 1.0 - fractionTrain};
    JavaRDD<T>[] split = data.randomSplit(splits, seed);
    List<JavaRDD<T>> list = new ArrayList<>(2);
    Collections.addAll(list, split);
    return list;
  } else {
    throw new RuntimeException("Not yet implemented");
  }
}

代码示例来源:origin: org.datavec/datavec-spark

public static <T> List<JavaRDD<T>> splitData(SplitStrategy splitStrategy, JavaRDD<T> data, long seed) {
  if (splitStrategy instanceof RandomSplit) {
    RandomSplit rs = (RandomSplit) splitStrategy;
    double fractionTrain = rs.getFractionTrain();
    double[] splits = new double[] {fractionTrain, 1.0 - fractionTrain};
    JavaRDD<T>[] split = data.randomSplit(splits, seed);
    List<JavaRDD<T>> list = new ArrayList<>(2);
    Collections.addAll(list, split);
    return list;
  } else {
    throw new RuntimeException("Not yet implemented");
  }
}

代码示例来源:origin: org.datavec/datavec-spark

public static void exportCSVLocal(String outputDir, String baseFileName, int numFiles, String delimiter,
        String quote, JavaRDD<List<Writable>> data, int rngSeed) throws Exception {
  JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote));
  double[] split = new double[numFiles];
  for (int i = 0; i < split.length; i++)
    split[i] = 1.0 / numFiles;
  JavaRDD<String>[] splitData = lines.randomSplit(split);
  int count = 0;
  Random r = new Random(rngSeed);
  for (JavaRDD<String> subset : splitData) {
    String path = FilenameUtils.concat(outputDir, baseFileName + (count++) + ".csv");
    List<String> linesList = subset.collect();
    if (!(linesList instanceof ArrayList))
      linesList = new ArrayList<>(linesList);
    Collections.shuffle(linesList, r);
    FileUtils.writeLines(new File(path), linesList);
  }
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

public static void exportCSVLocal(String outputDir, String baseFileName, int numFiles, String delimiter,
        String quote, JavaRDD<List<Writable>> data, int rngSeed) throws Exception {
  JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote));
  double[] split = new double[numFiles];
  for (int i = 0; i < split.length; i++)
    split[i] = 1.0 / numFiles;
  JavaRDD<String>[] splitData = lines.randomSplit(split);
  int count = 0;
  Random r = new Random(rngSeed);
  for (JavaRDD<String> subset : splitData) {
    String path = FilenameUtils.concat(outputDir, baseFileName + (count++) + ".csv");
    List<String> linesList = subset.collect();
    if (!(linesList instanceof ArrayList))
      linesList = new ArrayList<>(linesList);
    Collections.shuffle(linesList, r);
    FileUtils.writeLines(new File(path), linesList);
  }
}

代码示例来源:origin: org.datavec/datavec-spark

public static void exportCSVLocal(String outputDir, String baseFileName, int numFiles, String delimiter,
        String quote, JavaRDD<List<Writable>> data) throws Exception {
  JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote));
  double[] split = new double[numFiles];
  for (int i = 0; i < split.length; i++)
    split[i] = 1.0 / numFiles;
  JavaRDD<String>[] splitData = lines.randomSplit(split);
  int count = 0;
  for (JavaRDD<String> subset : splitData) {
    String path = FilenameUtils.concat(outputDir, baseFileName + (count++) + ".csv");
    //            subset.saveAsTextFile(path);
    List<String> linesList = subset.collect();
    FileUtils.writeLines(new File(path), linesList);
  }
}

代码示例来源:origin: org.datavec/datavec-spark_2.11

public static void exportCSVLocal(String outputDir, String baseFileName, int numFiles, String delimiter,
        String quote, JavaRDD<List<Writable>> data) throws Exception {
  JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote));
  double[] split = new double[numFiles];
  for (int i = 0; i < split.length; i++)
    split[i] = 1.0 / numFiles;
  JavaRDD<String>[] splitData = lines.randomSplit(split);
  int count = 0;
  for (JavaRDD<String> subset : splitData) {
    String path = FilenameUtils.concat(outputDir, baseFileName + (count++) + ".csv");
    //            subset.saveAsTextFile(path);
    List<String> linesList = subset.collect();
    FileUtils.writeLines(new File(path), linesList);
  }
}

代码示例来源:origin: bhdrkn/Java-Examples

JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.6, 0.4}, 11L);
JavaRDD<LabeledPoint> training = splits[0].cache();
JavaRDD<LabeledPoint> test = splits[1];

相关文章

微信公众号

最新文章

更多