org.apache.spark.mllib.clustering.KMeans类的使用及代码示例

x33g5p2x  于2022-01-24 转载在 其他  
字(7.8k)|赞(0)|评价(0)|浏览(163)

本文整理了Java中org.apache.spark.mllib.clustering.KMeans类的一些代码示例,展示了KMeans类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。KMeans类的具体详情如下:
包路径:org.apache.spark.mllib.clustering.KMeans
类名称:KMeans

KMeans介绍

暂无

代码示例

代码示例来源:origin: OryxProject/oryx

/**
 * @param sparkContext    active Spark Context
 * @param trainData       training data on which to build a model
 * @param hyperParameters ordered list of hyper parameter values to use in building model
 * @param candidatePath   directory where additional model files can be written
 * @return a {@link PMML} representation of a model trained on the given data
 */
@Override
public PMML buildModel(JavaSparkContext sparkContext,
            JavaRDD<String> trainData,
            List<?> hyperParameters,
            Path candidatePath) {
 int numClusters = (Integer) hyperParameters.get(0);
 Preconditions.checkArgument(numClusters > 1);
 log.info("Building KMeans Model with {} clusters", numClusters);
 JavaRDD<Vector> trainingData = parsedToVectorRDD(trainData.map(MLFunctions.PARSE_FN));
 KMeansModel kMeansModel = KMeans.train(trainingData.rdd(), numClusters, maxIterations,
                     numberOfRuns, initializationStrategy);
 return kMeansModelToPMML(kMeansModel, fetchClusterCountsFromModel(trainingData, kMeansModel));
}

代码示例来源:origin: OryxProject/oryx

public KMeansUpdate(Config config) {
 super(config);
 initializationStrategy = config.getString("oryx.kmeans.initialization-strategy");
 evaluationStrategy = Enum.valueOf(KMeansEvalStrategy.class, config.getString("oryx.kmeans.evaluation-strategy"));
 numberOfRuns = config.getInt("oryx.kmeans.runs");
 maxIterations = config.getInt("oryx.kmeans.iterations");
 hyperParamValues = new ArrayList<>();
 hyperParamValues.add(HyperParams.fromConfig(config, "oryx.kmeans.hyperparams.k"));
 inputSchema = new InputSchema(config);
 Preconditions.checkArgument(maxIterations > 0);
 Preconditions.checkArgument(numberOfRuns > 0);
 Preconditions.checkArgument(
   initializationStrategy.equals(KMeans.K_MEANS_PARALLEL()) ||
     initializationStrategy.equals(KMeans.RANDOM()));
 // Should be an unsupervised problem. This impl only supports numeric features.
 Preconditions.checkArgument(!inputSchema.hasTarget());
 for (int i = 0; i < inputSchema.getNumFeatures(); i++) {
  Preconditions.checkArgument(!inputSchema.isCategorical(i));
 }
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.11

@Test
public void runKMeansUsingConstructor() {
 List<Vector> points = Arrays.asList(
  Vectors.dense(1.0, 2.0, 6.0),
  Vectors.dense(1.0, 3.0, 0.0),
  Vectors.dense(1.0, 4.0, 6.0)
 );
 Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
 JavaRDD<Vector> data = jsc.parallelize(points, 2);
 KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
 assertEquals(1, model.clusterCenters().length);
 assertEquals(expectedCenter, model.clusterCenters()[0]);
 model = new KMeans()
  .setK(1)
  .setMaxIterations(1)
  .setInitializationMode(KMeans.RANDOM())
  .run(data.rdd());
 assertEquals(expectedCenter, model.clusterCenters()[0]);
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.11

@Test
 public void testPredictJavaRDD() {
  List<Vector> points = Arrays.asList(
   Vectors.dense(1.0, 2.0, 6.0),
   Vectors.dense(1.0, 3.0, 0.0),
   Vectors.dense(1.0, 4.0, 6.0)
  );
  JavaRDD<Vector> data = jsc.parallelize(points, 2);
  KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
  JavaRDD<Integer> predictions = model.predict(data);
  // Should be able to get the first prediction.
  predictions.first();
 }
}

代码示例来源:origin: org.apache.spark/spark-mllib

@Test
public void runKMeansUsingStaticMethods() {
 List<Vector> points = Arrays.asList(
  Vectors.dense(1.0, 2.0, 6.0),
  Vectors.dense(1.0, 3.0, 0.0),
  Vectors.dense(1.0, 4.0, 6.0)
 );
 Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
 JavaRDD<Vector> data = jsc.parallelize(points, 2);
 KMeansModel model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.K_MEANS_PARALLEL());
 assertEquals(1, model.clusterCenters().length);
 assertEquals(expectedCenter, model.clusterCenters()[0]);
 model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
 assertEquals(expectedCenter, model.clusterCenters()[0]);
}

代码示例来源:origin: locationtech/geowave

final KMeans kmeans = new KMeans();
kmeans.setInitializationMode("kmeans||");
kmeans.setK(numClusters);
kmeans.setMaxIterations(numIterations);
 kmeans.setEpsilon(epsilon);
outputModel = kmeans.run(centroidVectors.rdd());

代码示例来源:origin: org.apache.spark/spark-mllib_2.10

@Test
public void runKMeansUsingConstructor() {
 List<Vector> points = Arrays.asList(
  Vectors.dense(1.0, 2.0, 6.0),
  Vectors.dense(1.0, 3.0, 0.0),
  Vectors.dense(1.0, 4.0, 6.0)
 );
 Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
 JavaRDD<Vector> data = jsc.parallelize(points, 2);
 KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
 assertEquals(1, model.clusterCenters().length);
 assertEquals(expectedCenter, model.clusterCenters()[0]);
 model = new KMeans()
  .setK(1)
  .setMaxIterations(1)
  .setInitializationMode(KMeans.RANDOM())
  .run(data.rdd());
 assertEquals(expectedCenter, model.clusterCenters()[0]);
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.10

@Test
 public void testPredictJavaRDD() {
  List<Vector> points = Arrays.asList(
   Vectors.dense(1.0, 2.0, 6.0),
   Vectors.dense(1.0, 3.0, 0.0),
   Vectors.dense(1.0, 4.0, 6.0)
  );
  JavaRDD<Vector> data = jsc.parallelize(points, 2);
  KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
  JavaRDD<Integer> predictions = model.predict(data);
  // Should be able to get the first prediction.
  predictions.first();
 }
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.10

@Test
public void runKMeansUsingStaticMethods() {
 List<Vector> points = Arrays.asList(
  Vectors.dense(1.0, 2.0, 6.0),
  Vectors.dense(1.0, 3.0, 0.0),
  Vectors.dense(1.0, 4.0, 6.0)
 );
 Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
 JavaRDD<Vector> data = jsc.parallelize(points, 2);
 KMeansModel model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.K_MEANS_PARALLEL());
 assertEquals(1, model.clusterCenters().length);
 assertEquals(expectedCenter, model.clusterCenters()[0]);
 model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
 assertEquals(expectedCenter, model.clusterCenters()[0]);
}

代码示例来源:origin: org.apache.spark/spark-mllib

@Test
public void runKMeansUsingConstructor() {
 List<Vector> points = Arrays.asList(
  Vectors.dense(1.0, 2.0, 6.0),
  Vectors.dense(1.0, 3.0, 0.0),
  Vectors.dense(1.0, 4.0, 6.0)
 );
 Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
 JavaRDD<Vector> data = jsc.parallelize(points, 2);
 KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
 assertEquals(1, model.clusterCenters().length);
 assertEquals(expectedCenter, model.clusterCenters()[0]);
 model = new KMeans()
  .setK(1)
  .setMaxIterations(1)
  .setInitializationMode(KMeans.RANDOM())
  .run(data.rdd());
 assertEquals(expectedCenter, model.clusterCenters()[0]);
}

代码示例来源:origin: org.apache.spark/spark-mllib

@Test
 public void testPredictJavaRDD() {
  List<Vector> points = Arrays.asList(
   Vectors.dense(1.0, 2.0, 6.0),
   Vectors.dense(1.0, 3.0, 0.0),
   Vectors.dense(1.0, 4.0, 6.0)
  );
  JavaRDD<Vector> data = jsc.parallelize(points, 2);
  KMeansModel model = new KMeans().setK(1).setMaxIterations(5).run(data.rdd());
  JavaRDD<Integer> predictions = model.predict(data);
  // Should be able to get the first prediction.
  predictions.first();
 }
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.11

@Test
public void runKMeansUsingStaticMethods() {
 List<Vector> points = Arrays.asList(
  Vectors.dense(1.0, 2.0, 6.0),
  Vectors.dense(1.0, 3.0, 0.0),
  Vectors.dense(1.0, 4.0, 6.0)
 );
 Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
 JavaRDD<Vector> data = jsc.parallelize(points, 2);
 KMeansModel model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.K_MEANS_PARALLEL());
 assertEquals(1, model.clusterCenters().length);
 assertEquals(expectedCenter, model.clusterCenters()[0]);
 model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
 assertEquals(expectedCenter, model.clusterCenters()[0]);
}

代码示例来源:origin: ypriverol/spark-java8

KMeansModel clusters = org.apache.spark.mllib.clustering.KMeans.train(parsedData.rdd(), numClusters, numIterations);

代码示例来源:origin: apache/lens

KMeansModel model = KMeans.train(trainableRDD.rdd(), k, maxIterations, runs, initializationMode);
 return new KMeansClusteringModel(modelId, model);
} catch (Exception e) {

相关文章