cc.mallet.types.InstanceList.getDataAlphabet()方法的使用及代码示例

x33g5p2x  于2022-01-21 转载在 其他  
字(6.9k)|赞(0)|评价(0)|浏览(109)

本文整理了Java中cc.mallet.types.InstanceList.getDataAlphabet()方法的一些代码示例,展示了InstanceList.getDataAlphabet()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。InstanceList.getDataAlphabet()方法的具体详情如下:
包路径:cc.mallet.types.InstanceList
类名称:InstanceList
方法名:getDataAlphabet

InstanceList.getDataAlphabet介绍

[英]Returns the Alphabet mapping features of the data to integers.
[中]返回数据到整数的Alphabet映射特征。

代码示例

代码示例来源:origin: cc.mallet/mallet

public InfoGain (InstanceList ilist)
{
  super (ilist.getDataAlphabet(), calcInfoGains (ilist));
  baseEntropy = staticBaseEntropy;
  baseLabelDistribution = staticBaseLabelDistribution;
}

代码示例来源:origin: cc.mallet/mallet

public void setPerLabelFeatureSelection (FeatureSelection[] selectedFeatures)
{
  if (selectedFeatures != null) {
    for (int i = 0; i < selectedFeatures.length; i++)
      if (selectedFeatures[i].getAlphabet() != getDataAlphabet())
        throw new IllegalArgumentException ("Vocabularies do not match");
  }
  perLabelFeatureSelection = selectedFeatures;
}

代码示例来源:origin: de.julielab/jcore-mallet-2.0.9

public FeatureCountTool (InstanceList instances) {
  this.instances = instances;
  numFeatures = instances.getDataAlphabet().size();
  featureCounts = new double[numFeatures];
  documentFrequencies = new int[numFeatures];
}

代码示例来源:origin: de.julielab/jcore-mallet-2.0.9

public InfoGain (InstanceList ilist)
{
  super (ilist.getDataAlphabet(), calcInfoGains (ilist));
  baseEntropy = staticBaseEntropy;
  baseLabelDistribution = staticBaseLabelDistribution;
}

代码示例来源:origin: com.github.steveash.mallet/mallet

public void printParameters(String filename) throws IOException {
  Alphabet alphabet = data.getDataAlphabet();
  PrintWriter out = new PrintWriter(filename);
  
  for (int feature = 0; feature < alphabet.size(); feature++) {
    out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]);
  }
  out.close();
}

代码示例来源:origin: cc.mallet/mallet

public void printParameters(String filename) throws IOException {
  Alphabet alphabet = data.getDataAlphabet();
  PrintWriter out = new PrintWriter(filename);
  
  for (int feature = 0; feature < alphabet.size(); feature++) {
    out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]);
  }
  out.close();
}

代码示例来源:origin: de.julielab/jcore-mallet-2.0.9

public void printParameters(String filename) throws IOException {
  Alphabet alphabet = data.getDataAlphabet();
  PrintWriter out = new PrintWriter(filename);
  
  for (int feature = 0; feature < alphabet.size(); feature++) {
    out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]);
  }
  out.close();
}

代码示例来源:origin: com.github.steveash.jg2p/jg2p-core

/**
 * Returns a ranked feature vector of the gradient gain for the given training data on the given trained CRF.  The
 * instance list must have the target labels as LabelSequence
 */
public static RankedFeatureVector gradientGainFrom(InstanceList ilist, CRF crf) {
 int numFeatures = ilist.getDataAlphabet().size();
 double[] gradientgains = new double[numFeatures];
 fillResults(ilist, crf, gradientgains, null, null);
 return new RankedFeatureVector(ilist.getDataAlphabet(), gradientgains);
}

代码示例来源:origin: cc.mallet/mallet

public void setFeatureSelection (FeatureSelection selectedFeatures)
{
  if (selectedFeatures != null
      && selectedFeatures.getAlphabet() != null  // xxx We allow a null vocabulary here?  See CRF3.java
      && selectedFeatures.getAlphabet() != getDataAlphabet())
    throw new IllegalArgumentException ("Vocabularies do not match");
  featureSelection = selectedFeatures;
}

代码示例来源:origin: de.julielab/jcore-mallet-2.0.9

public ArrayList getInstancesWithFeature (Object feature)
{
  int index = ilist.getDataAlphabet().lookupIndex (feature, false);
  if (index == -1)
    throw new IllegalArgumentException ("Feature "+feature+" not contained in InvertedIndex");
  return getInstancesWithFeature (index);
}

代码示例来源:origin: de.julielab/jcore-mallet-2.0.9

public void setFeatureSelection (FeatureSelection selectedFeatures)
{
  if (selectedFeatures != null
      && selectedFeatures.getAlphabet() != null  // xxx We allow a null vocabulary here?  See CRF3.java
      && selectedFeatures.getAlphabet() != getDataAlphabet())
    throw new IllegalArgumentException ("Vocabularies do not match");
  featureSelection = selectedFeatures;
}

代码示例来源:origin: de.julielab/jcore-mallet-2.0.9

public FeatureCooccurrenceCounter (InstanceList instances) {
  this.instances = instances;
  numFeatures = instances.getDataAlphabet().size();
  featureFeatureCounts = new TIntIntHashMap[numFeatures];
  for (int feature = 0; feature < numFeatures; feature++) {
    featureFeatureCounts[feature] = new TIntIntHashMap();
  }
  documentFrequencies = new int[numFeatures];
}

代码示例来源:origin: cc.mallet/mallet

public FeatureCooccurrenceCounter (InstanceList instances) {
  this.instances = instances;
  numFeatures = instances.getDataAlphabet().size();
  featureFeatureCounts = new TIntIntHashMap[numFeatures];
  for (int feature = 0; feature < numFeatures; feature++) {
    featureFeatureCounts[feature] = new TIntIntHashMap();
  }
  documentFrequencies = new int[numFeatures];
}

代码示例来源:origin: cc.mallet/mallet

public KLGain (InstanceList ilist, Classification[] classifications)
{
  super (ilist.getDataAlphabet(),
         calcKLGains (ilist, getLabelVectorsFromClassifications(classifications)));
}

代码示例来源:origin: cc.mallet/mallet

public GradientGain (InstanceList ilist, Classification[] classifications)
{
  super (ilist.getDataAlphabet(),
         calcGradientGains (ilist, getLabelVectorsFromClassifications(classifications)));
}

代码示例来源:origin: de.julielab/jcore-mallet-2.0.9

public KLGain (InstanceList ilist, Classification[] classifications)
{
  super (ilist.getDataAlphabet(),
         calcKLGains (ilist, getLabelVectorsFromClassifications(classifications)));
}

代码示例来源:origin: cc.mallet/mallet

/** Replaces the <code>Instance</code> at position <code>index</code>
 * with a new one. */
public void setInstance (int index, Instance instance)
{
  assert (this.getDataAlphabet().equals(instance.getDataAlphabet()));
  assert (this.getTargetAlphabet().equals(instance.getTargetAlphabet()));
  this.set(index, instance);
}

代码示例来源:origin: cc.mallet/mallet

public Alphabet getPrunedAlphabet(int minDocs, int maxDocs, int minCount, int maxCount) {
  Alphabet inputAlphabet = instances.getDataAlphabet();
  Alphabet outputAlphabet = new Alphabet();
  for (int inputType = 0; inputType < numFeatures; inputType++) {
    if (featureCounts[inputType] >= minCount && featureCounts[inputType] <= maxCount && documentFrequencies[inputType] >= minDocs && documentFrequencies[inputType] <= maxDocs) {
      outputAlphabet.lookupIndex(inputAlphabet.lookupObject(inputType));
    }
  }
  
  return outputAlphabet;
}

代码示例来源:origin: de.julielab/jcore-mallet-2.0.9

public PerLabelInfoGain (InstanceList ilist)
{
  double[][] pcig = calcPerLabelInfoGains (ilist);
  Alphabet v = ilist.getDataAlphabet();
  int numClasses = ilist.getTargetAlphabet().size();
  ig = new InfoGain[numClasses];
  for (int i = 0; i < numClasses; i++)
    ig[i] = new InfoGain (v, pcig[i]);
}

代码示例来源:origin: cc.mallet/mallet

public Node (InstanceList ilist, Node parent, FeatureSelection fs)
{
  InfoGain ig = new InfoGain (ilist);
  this.featureIndex = ig.getMaxValuedIndexIn (fs);
  this.infoGain = ig.value(featureIndex);
  this.ilist = ilist;
  this.dictionary = ilist.getDataAlphabet();
  this.parent = parent;
  this.labeling = ig.getBaseLabelDistribution();
  this.labelEntropy = ig.getBaseEntropy();
  this.child0 = this.child1 = null;
}

相关文章

微信公众号

最新文章

更多