本文整理了Java中org.apache.spark.mllib.linalg.Vector.toArray()
方法的一些代码示例,展示了Vector.toArray()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Vector.toArray()
方法的具体详情如下:
包路径:org.apache.spark.mllib.linalg.Vector
类名称:Vector
方法名:toArray
暂无
代码示例来源:origin: OryxProject/oryx
private JavaPairRDD<Integer, Iterable<double[]>> fetchClusteredPoints(JavaRDD<? extends Vector> evalData) {
return evalData.mapToPair(vector -> {
double closestDist = Double.POSITIVE_INFINITY;
int minClusterID = Integer.MIN_VALUE;
double[] vec = vector.toArray();
DistanceFn<double[]> distanceFn = getDistanceFn();
Map<Integer,ClusterInfo> clusters = getClustersByID();
for (ClusterInfo cluster : clusters.values()) {
double distance = distanceFn.applyAsDouble(cluster.getCenter(), vec);
if (distance < closestDist) {
closestDist = distance;
minClusterID = cluster.getID();
}
}
Preconditions.checkState(!Double.isInfinite(closestDist) && !Double.isNaN(closestDist));
return new Tuple2<>(minClusterID, vec);
}).groupByKey();
}
代码示例来源:origin: OryxProject/oryx
/**
* @param evalData points to cluster for evaluation
* @return cluster IDs as keys, and metrics for each cluster like the count, sum of distances to centroid,
* and sum of squared distances
*/
JavaPairRDD<Integer,ClusterMetric> fetchClusterMetrics(JavaRDD<Vector> evalData) {
return evalData.mapToPair(vector -> {
double closestDist = Double.POSITIVE_INFINITY;
int minClusterID = Integer.MIN_VALUE;
double[] vec = vector.toArray();
for (ClusterInfo cluster : clusters.values()) {
double distance = distanceFn.applyAsDouble(cluster.getCenter(), vec);
if (distance < closestDist) {
closestDist = distance;
minClusterID = cluster.getID();
}
}
Preconditions.checkState(!Double.isInfinite(closestDist) && !Double.isNaN(closestDist));
return new Tuple2<>(minClusterID, new ClusterMetric(1L, closestDist, closestDist * closestDist));
}).reduceByKey(ClusterMetric::add);
}
代码示例来源:origin: OryxProject/oryx
mapToObj(i -> new IntLongHashMap()).collect(Collectors.toList());
data.forEachRemaining(datum -> {
double[] featureVector = datum.features().toArray();
for (int i = 0; i < trees.length; i++) {
DecisionTreeModel tree = trees[i];
代码示例来源:origin: OryxProject/oryx
/**
* @param trainPointData data to run down trees
* @param model random decision forest model to count on
* @return map of predictor index to the number of training examples that reached a
* node whose decision is based on that feature. The index is among predictors, not all
* features, since there are fewer predictors than features. That is, the index will
* match the one used in the {@link RandomForestModel}.
*/
private static IntLongHashMap predictorExampleCounts(JavaRDD<? extends LabeledPoint> trainPointData,
RandomForestModel model) {
return trainPointData.mapPartitions(data -> {
IntLongHashMap featureIndexCount = new IntLongHashMap();
data.forEachRemaining(datum -> {
double[] featureVector = datum.features().toArray();
for (DecisionTreeModel tree : model.trees()) {
org.apache.spark.mllib.tree.model.Node node = tree.topNode();
// This logic cloned from Node.predict:
while (!node.isLeaf()) {
Split split = node.split().get();
int featureIndex = split.feature();
// Count feature
featureIndexCount.addToValue(featureIndex, 1);
node = nextNode(featureVector, node, split, featureIndex);
}
}
});
return Collections.singleton(featureIndexCount).iterator();
}).reduce(RDFUpdate::merge);
}
代码示例来源:origin: OryxProject/oryx
private ClusteringModel pmmlClusteringModel(KMeansModel model,
Map<Integer,Long> clusterSizesMap) {
Vector[] clusterCenters = model.clusterCenters();
List<ClusteringField> clusteringFields = new ArrayList<>();
for (int i = 0; i < inputSchema.getNumFeatures(); i++) {
if (inputSchema.isActive(i)) {
FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(i));
ClusteringField clusteringField =
new ClusteringField(fieldName).setCenterField(ClusteringField.CenterField.TRUE);
clusteringFields.add(clusteringField);
}
}
List<Cluster> clusters = new ArrayList<>(clusterCenters.length);
for (int i = 0; i < clusterCenters.length; i++) {
clusters.add(new Cluster().setId(Integer.toString(i))
.setSize(clusterSizesMap.get(i).intValue())
.setArray(AppPMMLUtils.toArray(clusterCenters[i].toArray())));
}
return new ClusteringModel(
MiningFunction.CLUSTERING,
ClusteringModel.ModelClass.CENTER_BASED,
clusters.size(),
AppPMMLUtils.buildMiningSchema(inputSchema),
new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE).setMeasure(new SquaredEuclidean()),
clusteringFields,
clusters);
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
static void debug(String record, Vector v) {
THE_LOGGER.info("DEBUG started:");
double[] d = v.toArray();
StringBuilder builder = new StringBuilder();
builder.append("DEBUG[record=");
builder.append(record);
builder.append("]:");
for (int i=0; i < d.length; i++){
builder.append("\t");
builder.append(d[i]);
}
THE_LOGGER.info(builder.toString());
}
代码示例来源:origin: mahmoudparsian/data-algorithms-book
static void debug(String record, Vector v) {
THE_LOGGER.info("DEBUG started:");
double[] d = v.toArray();
StringBuilder builder = new StringBuilder();
builder.append("DEBUG[record=");
builder.append(record);
builder.append("]:");
for (int i=0; i < d.length; i++){
builder.append("\t");
builder.append(d[i]);
}
THE_LOGGER.info(builder.toString());
}
代码示例来源:origin: flipkart-incubator/spark-transformers
@Override
public StandardScalerModelInfo getModelInfo(final StandardScalerModel from, final DataFrame df) {
final StandardScalerModelInfo modelInfo = new StandardScalerModelInfo();
modelInfo.setMean(from.mean().toArray());
modelInfo.setStd(from.std().toArray());
modelInfo.setWithMean(from.getWithMean());
modelInfo.setWithStd(from.getWithStd());
Set<String> inputKeys = new LinkedHashSet<String>();
inputKeys.add(from.getInputCol());
modelInfo.setInputKeys(inputKeys);
Set<String> outputKeys = new LinkedHashSet<String>();
outputKeys.add(from.getOutputCol());
modelInfo.setOutputKeys(outputKeys);
return modelInfo;
}
代码示例来源:origin: flipkart-incubator/spark-transformers
@Override
public MinMaxScalerModelInfo getModelInfo(final MinMaxScalerModel from, final DataFrame df) {
final MinMaxScalerModelInfo modelInfo = new MinMaxScalerModelInfo();
modelInfo.setOriginalMax(from.originalMax().toArray());
modelInfo.setOriginalMin(from.originalMin().toArray());
modelInfo.setMax(from.getMax());
modelInfo.setMin(from.getMin());
Set<String> inputKeys = new LinkedHashSet<String>();
inputKeys.add(from.getInputCol());
modelInfo.setInputKeys(inputKeys);
Set<String> outputKeys = new LinkedHashSet<String>();
outputKeys.add(from.getOutputCol());
modelInfo.setOutputKeys(outputKeys);
return modelInfo;
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.11
@Test
public void sparseArrayConstruction() {
@SuppressWarnings("unchecked")
Vector v = Vectors.sparse(3, Arrays.asList(
new Tuple2<>(0, 2.0),
new Tuple2<>(2, 3.0)));
assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
}
}
代码示例来源:origin: org.apache.spark/spark-mllib
@Test
public void sparseArrayConstruction() {
@SuppressWarnings("unchecked")
Vector v = Vectors.sparse(3, Arrays.asList(
new Tuple2<>(0, 2.0),
new Tuple2<>(2, 3.0)));
assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
}
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.10
@Test
public void sparseArrayConstruction() {
@SuppressWarnings("unchecked")
Vector v = Vectors.sparse(3, Arrays.asList(
new Tuple2<>(0, 2.0),
new Tuple2<>(2, 3.0)));
assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0);
}
}
代码示例来源:origin: flipkart-incubator/spark-transformers
@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel, DataFrame df) {
final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
logisticRegressionModelInfo.setWeights(sparkLRModel.weights().toArray());
logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
logisticRegressionModelInfo.setThreshold((double) sparkLRModel.getThreshold().get());
Set<String> inputKeys = new LinkedHashSet<String>();
inputKeys.add("features");
logisticRegressionModelInfo.setInputKeys(inputKeys);
Set<String> outputKeys = new LinkedHashSet<String>();
outputKeys.add("prediction");
outputKeys.add("probability");
logisticRegressionModelInfo.setOutputKeys(outputKeys);
return logisticRegressionModelInfo;
}
代码示例来源:origin: flipkart-incubator/spark-transformers
@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel) {
final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
logisticRegressionModelInfo.setWeights(sparkLRModel.weights().toArray());
logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
logisticRegressionModelInfo.setThreshold((double) sparkLRModel.getThreshold().get());
Set<String> inputKeys = new LinkedHashSet<String>();
inputKeys.add("features");
logisticRegressionModelInfo.setInputKeys(inputKeys);
Set<String> outputKeys = new LinkedHashSet<String>();
outputKeys.add("prediction");
outputKeys.add("probability");
logisticRegressionModelInfo.setOutputKeys(outputKeys);
return logisticRegressionModelInfo;
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.10
@Test
public void denseArrayConstruction() {
Vector v = Vectors.dense(1.0, 2.0, 3.0);
assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.11
@Test
public void denseArrayConstruction() {
Vector v = Vectors.dense(1.0, 2.0, 3.0);
assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}
代码示例来源:origin: org.apache.spark/spark-mllib
@Test
public void denseArrayConstruction() {
Vector v = Vectors.dense(1.0, 2.0, 3.0);
assertArrayEquals(new double[]{1.0, 2.0, 3.0}, v.toArray(), 0.0);
}
代码示例来源:origin: flipkart-incubator/spark-transformers
@Override
public LogisticRegressionModelInfo getModelInfo(final LogisticRegressionModel sparkLRModel, DataFrame df) {
final LogisticRegressionModelInfo logisticRegressionModelInfo = new LogisticRegressionModelInfo();
logisticRegressionModelInfo.setWeights(sparkLRModel.coefficients().toArray());
logisticRegressionModelInfo.setIntercept(sparkLRModel.intercept());
logisticRegressionModelInfo.setNumClasses(sparkLRModel.numClasses());
logisticRegressionModelInfo.setNumFeatures(sparkLRModel.numFeatures());
logisticRegressionModelInfo.setThreshold(sparkLRModel.getThreshold());
logisticRegressionModelInfo.setProbabilityKey(sparkLRModel.getProbabilityCol());
Set<String> inputKeys = new LinkedHashSet<String>();
inputKeys.add(sparkLRModel.getFeaturesCol());
logisticRegressionModelInfo.setInputKeys(inputKeys);
Set<String> outputKeys = new LinkedHashSet<String>();
outputKeys.add(sparkLRModel.getPredictionCol());
outputKeys.add(sparkLRModel.getProbabilityCol());
logisticRegressionModelInfo.setOutputKeys(outputKeys);
return logisticRegressionModelInfo;
}
代码示例来源:origin: org.apache.spark/spark-mllib_2.10
@Test
public void twoDimensionalData() {
JavaRDD<Vector> points = jsc.parallelize(Arrays.asList(
Vectors.dense(4, -1),
Vectors.dense(4, 1),
Vectors.sparse(2, new int[]{0}, new double[]{1.0})
), 2);
BisectingKMeans bkm = new BisectingKMeans()
.setK(4)
.setMaxIterations(2)
.setSeed(1L);
BisectingKMeansModel model = bkm.run(points);
Assert.assertEquals(3, model.k());
Assert.assertArrayEquals(new double[]{3.0, 0.0}, model.root().center().toArray(), 1e-12);
for (ClusteringTreeNode child : model.root().children()) {
double[] center = child.center().toArray();
if (center[0] > 2) {
Assert.assertEquals(2, child.size());
Assert.assertArrayEquals(new double[]{4.0, 0.0}, center, 1e-12);
} else {
Assert.assertEquals(1, child.size());
Assert.assertArrayEquals(new double[]{1.0, 0.0}, center, 1e-12);
}
}
}
}
代码示例来源:origin: org.apache.spark/spark-mllib
@Test
public void twoDimensionalData() {
JavaRDD<Vector> points = jsc.parallelize(Arrays.asList(
Vectors.dense(4, -1),
Vectors.dense(4, 1),
Vectors.sparse(2, new int[]{0}, new double[]{1.0})
), 2);
BisectingKMeans bkm = new BisectingKMeans()
.setK(4)
.setMaxIterations(2)
.setSeed(1L);
BisectingKMeansModel model = bkm.run(points);
Assert.assertEquals(3, model.k());
Assert.assertArrayEquals(new double[]{3.0, 0.0}, model.root().center().toArray(), 1e-12);
for (ClusteringTreeNode child : model.root().children()) {
double[] center = child.center().toArray();
if (center[0] > 2) {
Assert.assertEquals(2, child.size());
Assert.assertArrayEquals(new double[]{4.0, 0.0}, center, 1e-12);
} else {
Assert.assertEquals(1, child.size());
Assert.assertArrayEquals(new double[]{1.0, 0.0}, center, 1e-12);
}
}
}
}
内容来源于网络,如有侵权,请联系作者删除!