org.apache.spark.mllib.linalg.Vector.toArray()方法的使用及代码示例

x33g5p2x  于2022-02-01 转载在 其他  
字(12.4k)|赞(0)|评价(0)|浏览(76)

本文整理了Java中org.apache.spark.mllib.linalg.Vector.toArray()方法的一些代码示例,展示了Vector.toArray()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Vector.toArray()方法的具体详情如下:
包路径:org.apache.spark.mllib.linalg.Vector
类名称:Vector
方法名:toArray

Vector.toArray介绍

暂无

代码示例

代码示例来源:origin: OryxProject/oryx

private JavaPairRDD<Integer, Iterable<double[]>> fetchClusteredPoints(JavaRDD<? extends Vector> evalData) {
 return evalData.mapToPair(vector -> {
  double closestDist = Double.POSITIVE_INFINITY;
  int minClusterID = Integer.MIN_VALUE;
  double[] vec = vector.toArray();
  DistanceFn<double[]> distanceFn = getDistanceFn();
  Map<Integer,ClusterInfo> clusters = getClustersByID();
  for (ClusterInfo cluster : clusters.values()) {
   double distance = distanceFn.applyAsDouble(cluster.getCenter(), vec);
   if (distance < closestDist) {
    closestDist = distance;
    minClusterID = cluster.getID();
   }
  }
  Preconditions.checkState(!Double.isInfinite(closestDist) && !Double.isNaN(closestDist));
  return new Tuple2<>(minClusterID, vec);
 }).groupByKey();
}

代码示例来源:origin: OryxProject/oryx

/**
 * @param evalData points to cluster for evaluation
 * @return cluster IDs as keys, and metrics for each cluster like the count, sum of distances to centroid,
 *  and sum of squared distances
 */
JavaPairRDD<Integer,ClusterMetric> fetchClusterMetrics(JavaRDD<Vector> evalData) {
 return evalData.mapToPair(vector -> {
  double closestDist = Double.POSITIVE_INFINITY;
  int minClusterID = Integer.MIN_VALUE;
  double[] vec = vector.toArray();
  for (ClusterInfo cluster : clusters.values()) {
   double distance = distanceFn.applyAsDouble(cluster.getCenter(), vec);
   if (distance < closestDist) {
    closestDist = distance;
    minClusterID = cluster.getID();
   }
  }
  Preconditions.checkState(!Double.isInfinite(closestDist) && !Double.isNaN(closestDist));
  return new Tuple2<>(minClusterID, new ClusterMetric(1L, closestDist, closestDist * closestDist));
 }).reduceByKey(ClusterMetric::add);
}

代码示例来源:origin: OryxProject/oryx

mapToObj(i -> new IntLongHashMap()).collect(Collectors.toList());
data.forEachRemaining(datum -> {
 double[] featureVector = datum.features().toArray();
 for (int i = 0; i < trees.length; i++) {
  DecisionTreeModel tree = trees[i];

代码示例来源:origin: OryxProject/oryx

/**
 * @param trainPointData data to run down trees
 * @param model random decision forest model to count on
 * @return map of predictor index to the number of training examples that reached a
 *  node whose decision is based on that feature. The index is among predictors, not all
 *  features, since there are fewer predictors than features. That is, the index will
 *  match the one used in the {@link RandomForestModel}.
 */
private static IntLongHashMap predictorExampleCounts(JavaRDD<? extends LabeledPoint> trainPointData,
                           RandomForestModel model) {
 return trainPointData.mapPartitions(data -> {
   IntLongHashMap featureIndexCount = new IntLongHashMap();
   data.forEachRemaining(datum -> {
    double[] featureVector = datum.features().toArray();
    for (DecisionTreeModel tree : model.trees()) {
     org.apache.spark.mllib.tree.model.Node node = tree.topNode();
     // This logic cloned from Node.predict:
     while (!node.isLeaf()) {
      Split split = node.split().get();
      int featureIndex = split.feature();
      // Count feature
      featureIndexCount.addToValue(featureIndex, 1);
      node = nextNode(featureVector, node, split, featureIndex);
     }
    }
   });
   return Collections.singleton(featureIndexCount).iterator();
 }).reduce(RDFUpdate::merge);
}

代码示例来源:origin: OryxProject/oryx

private ClusteringModel pmmlClusteringModel(KMeansModel model,
                      Map<Integer,Long> clusterSizesMap) {
 Vector[] clusterCenters = model.clusterCenters();
 List<ClusteringField> clusteringFields = new ArrayList<>();
 for (int i = 0; i < inputSchema.getNumFeatures(); i++) {
  if (inputSchema.isActive(i)) {
   FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(i));
   ClusteringField clusteringField =
     new ClusteringField(fieldName).setCenterField(ClusteringField.CenterField.TRUE);
   clusteringFields.add(clusteringField);
  }
 }
 List<Cluster> clusters = new ArrayList<>(clusterCenters.length);
 for (int i = 0; i < clusterCenters.length; i++) {
  clusters.add(new Cluster().setId(Integer.toString(i))
           .setSize(clusterSizesMap.get(i).intValue())
           .setArray(AppPMMLUtils.toArray(clusterCenters[i].toArray())));
 }
 return new ClusteringModel(
   MiningFunction.CLUSTERING,
   ClusteringModel.ModelClass.CENTER_BASED,
   clusters.size(),
   AppPMMLUtils.buildMiningSchema(inputSchema),
   new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE).setMeasure(new SquaredEuclidean()),
   clusteringFields,
   clusters);
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

static void debug(String record, Vector v) {
  THE_LOGGER.info("DEBUG started:");
  double[] d = v.toArray();
  StringBuilder builder = new StringBuilder();
  builder.append("DEBUG[record=");
  builder.append(record);
  builder.append("]:");
  for (int i=0; i < d.length; i++){
    builder.append("\t");
    builder.append(d[i]);
  }
  THE_LOGGER.info(builder.toString());
}

代码示例来源:origin: mahmoudparsian/data-algorithms-book

static void debug(String record, Vector v) {
  THE_LOGGER.info("DEBUG started:");
  double[] d = v.toArray();
  StringBuilder builder = new StringBuilder();
  builder.append("DEBUG[record=");
  builder.append(record);
  builder.append("]:");
  for (int i=0; i < d.length; i++){
    builder.append("\t");
    builder.append(d[i]);
  }
  THE_LOGGER.info(builder.toString());
}

代码示例来源:origin: flipkart-incubator/spark-transformers

@Override
public StandardScalerModelInfo getModelInfo(final StandardScalerModel from, final DataFrame df) {
  final StandardScalerModelInfo modelInfo = new StandardScalerModelInfo();
  modelInfo.setMean(from.mean().toArray());
  modelInfo.setStd(from.std().toArray());
  modelInfo.setWithMean(from.getWithMean());
  modelInfo.setWithStd(from.getWithStd());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(from.getInputCol());
  modelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(from.getOutputCol());
  modelInfo.setOutputKeys(outputKeys);
  return modelInfo;
}

代码示例来源:origin: flipkart-incubator/spark-transformers

@Override
public MinMaxScalerModelInfo getModelInfo(final MinMaxScalerModel from, final DataFrame df) {
  final MinMaxScalerModelInfo modelInfo = new MinMaxScalerModelInfo();
  modelInfo.setOriginalMax(from.originalMax().toArray());
  modelInfo.setOriginalMin(from.originalMin().toArray());
  modelInfo.setMax(from.getMax());
  modelInfo.setMin(from.getMin());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(from.getInputCol());
  modelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(from.getOutputCol());
  modelInfo.setOutputKeys(outputKeys);
  return modelInfo;
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.11

@Test
 public void sparseArrayConstruction() {
  @SuppressWarnings("unchecked")
  Vector v = Vectors.sparse(3, Arrays.asList(
   new Tuple2<>(0, 2.0),
   new Tuple2<>(2, 3.0)));
  assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
 }
}

代码示例来源:origin: org.apache.spark/spark-mllib

@Test
 public void sparseArrayConstruction() {
  @SuppressWarnings("unchecked")
  Vector v = Vectors.sparse(3, Arrays.asList(
   new Tuple2<>(0, 2.0),
   new Tuple2<>(2, 3.0)));
  assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
 }
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.10

@Test
 public void sparseArrayConstruction() {
  @SuppressWarnings("unchecked")
  Vector v = Vectors.sparse(3, Arrays.asList(
   new Tuple2<>(0, 2.0),
   new Tuple2<>(2, 3.0)));
  assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
 }
}

代码示例来源:origin: flipkart-incubator/spark-transformers

@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel, DataFrame df) {
  final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
  logisticRegressionModelInfo.setWeights(sparkLRModel.weights().toArray());
  logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
  logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
  logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
  logisticRegressionModelInfo.setThreshold((double) sparkLRModel.getThreshold().get());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add("features");
  logisticRegressionModelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add("prediction");
  outputKeys.add("probability");
  logisticRegressionModelInfo.setOutputKeys(outputKeys);
  return logisticRegressionModelInfo;
}

代码示例来源:origin: flipkart-incubator/spark-transformers

@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel) {
  final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
  logisticRegressionModelInfo.setWeights(sparkLRModel.weights().toArray());
  logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
  logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
  logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
  logisticRegressionModelInfo.setThreshold((double) sparkLRModel.getThreshold().get());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add("features");
  logisticRegressionModelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add("prediction");
  outputKeys.add("probability");
  logisticRegressionModelInfo.setOutputKeys(outputKeys);
  return logisticRegressionModelInfo;
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.10

@Test
public void denseArrayConstruction() {
 Vector v = Vectors.dense(1.0, 2.0, 3.0);
 assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.11

@Test
public void denseArrayConstruction() {
 Vector v = Vectors.dense(1.0, 2.0, 3.0);
 assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}

代码示例来源:origin: org.apache.spark/spark-mllib

@Test
public void denseArrayConstruction() {
 Vector v = Vectors.dense(1.0, 2.0, 3.0);
 assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}

代码示例来源:origin: flipkart-incubator/spark-transformers

@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel, DataFrame df) {
  final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
  logisticRegressionModelInfo.setWeights(sparkLRModel.coefficients().toArray());
  logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
  logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
  logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
  logisticRegressionModelInfo.setThreshold(sparkLRModel.getThreshold());
  logisticRegressionModelInfo.setProbabilityKey(sparkLRModel.getProbabilityCol());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(sparkLRModel.getFeaturesCol());
  logisticRegressionModelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(sparkLRModel.getPredictionCol());
  outputKeys.add(sparkLRModel.getProbabilityCol());
  logisticRegressionModelInfo.setOutputKeys(outputKeys);
  return logisticRegressionModelInfo;
}

代码示例来源:origin: org.apache.spark/spark-mllib_2.10

@Test
 public void twoDimensionalData() {
  JavaRDD<Vector> points = jsc.parallelize(Arrays.asList(
   Vectors.dense(4, -1),
   Vectors.dense(4, 1),
   Vectors.sparse(2, new int[]{0}, new double[]{1.0})
  ), 2);

  BisectingKMeans bkm = new BisectingKMeans()
   .setK(4)
   .setMaxIterations(2)
   .setSeed(1L);
  BisectingKMeansModel model = bkm.run(points);
  Assert.assertEquals(3, model.k());
  Assert.assertArrayEquals(new double[]{3.0, 0.0}, model.root().center().toArray(), 1e-12);
  for (ClusteringTreeNode child : model.root().children()) {
   double[] center = child.center().toArray();
   if (center[0] > 2) {
    Assert.assertEquals(2, child.size());
    Assert.assertArrayEquals(new double[]{4.0, 0.0}, center, 1e-12);
   } else {
    Assert.assertEquals(1, child.size());
    Assert.assertArrayEquals(new double[]{1.0, 0.0}, center, 1e-12);
   }
  }
 }
}

代码示例来源:origin: org.apache.spark/spark-mllib

@Test
 public void twoDimensionalData() {
  JavaRDD<Vector> points = jsc.parallelize(Arrays.asList(
   Vectors.dense(4, -1),
   Vectors.dense(4, 1),
   Vectors.sparse(2, new int[]{0}, new double[]{1.0})
  ), 2);

  BisectingKMeans bkm = new BisectingKMeans()
   .setK(4)
   .setMaxIterations(2)
   .setSeed(1L);
  BisectingKMeansModel model = bkm.run(points);
  Assert.assertEquals(3, model.k());
  Assert.assertArrayEquals(new double[]{3.0, 0.0}, model.root().center().toArray(), 1e-12);
  for (ClusteringTreeNode child : model.root().children()) {
   double[] center = child.center().toArray();
   if (center[0] > 2) {
    Assert.assertEquals(2, child.size());
    Assert.assertArrayEquals(new double[]{4.0, 0.0}, center, 1e-12);
   } else {
    Assert.assertEquals(1, child.size());
    Assert.assertArrayEquals(new double[]{1.0, 0.0}, center, 1e-12);
   }
  }
 }
}

相关文章

微信公众号

最新文章

更多