opennlp.tools.util.Span.getType()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(11.6k)|赞(0)|评价(0)|浏览(123)

本文整理了Java中opennlp.tools.util.Span.getType()方法的一些代码示例,展示了Span.getType()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Span.getType()方法的具体详情如下:
包路径:opennlp.tools.util.Span
类名称:Span
方法名:getType

Span.getType介绍

[英]Retrieves the type of the span.
[中]检索跨度的类型。

代码示例

代码示例来源:origin: apache/opennlp

/**
 * Creates a new immutable span based on an existing span, where the existing span did not include the prob
 * @param span the span that has no prob or the prob is incorrect and a new Span must be generated
 * @param prob the probability of the span
 */
public Span(Span span, double prob) {
 this(span.start, span.end, span.getType(), prob);
}

代码示例来源:origin: apache/opennlp

/**
 * Initializes a new Span object with an existing Span which is shifted by an
 * offset.
 *
 * @param span
 * @param offset
 */
public Span(Span span, int offset) {
 this(span.start + offset, span.end + offset, span.getType(), span.getProb());
}

代码示例来源:origin: apache/opennlp

/**
 * Generates a human readable string.
 */
@Override
public String toString() {
 StringBuilder toStringBuffer = new StringBuilder(15);
 toStringBuffer.append("[");
 toStringBuffer.append(getStart());
 toStringBuffer.append("..");
 toStringBuffer.append(getEnd());
 toStringBuffer.append(")");
 if (getType() != null) {
  toStringBuffer.append(" ");
  toStringBuffer.append(getType());
 }
 return toStringBuffer.toString();
}

代码示例来源:origin: apache/opennlp

@Override
public NameSample read() throws IOException {
 NameSample sample = samples.read();
 if (sample != null) {
  sentenceCount++;
  tokenCount += sample.getSentence().length;
  for (Span nameSpan : sample.getNames()) {
   Integer nameCounter = nameCounters.get(nameSpan.getType());
   if (nameCounter == null) {
    nameCounter = 0;
   }
   nameCounters.put(nameSpan.getType(), nameCounter + 1);
  }
 }
 return sample;
}

代码示例来源:origin: apache/opennlp

/**
 * Generates a hash code of the current span.
 */
@Override
public int hashCode() {
 return Objects.hash(getStart(), getEnd(), getType());
}

代码示例来源:origin: apache/opennlp

public String[] encode(Span[] names, int length) {
 String[] outcomes = new String[length];
 for (int i = 0; i < outcomes.length; i++) {
  outcomes[i] = BioCodec.OTHER;
 }
 for (Span name : names) {
  if (name.getType() == null) {
   outcomes[name.getStart()] = "default" + "-" + BioCodec.START;
  }
  else {
   outcomes[name.getStart()] = name.getType() + "-" + BioCodec.START;
  }
  // now iterate from begin + 1 till end
  for (int i = name.getStart() + 1; i < name.getEnd(); i++) {
   if (name.getType() == null) {
    outcomes[i] = "default" + "-" + BioCodec.CONTINUE;
   }
   else {
    outcomes[i] = name.getType() + "-" + BioCodec.CONTINUE;
   }
  }
 }
 return outcomes;
}

代码示例来源:origin: apache/opennlp

public void missclassified(T reference, T prediction) {
 samples++;
 Span[] references = asSpanArray(reference);
 Span[] predictions = asSpanArray(prediction);
 Set<Span> refSet = new HashSet<>(Arrays.asList(references));
 Set<Span> predSet = new HashSet<>(Arrays.asList(predictions));
 for (Span ref : refSet) {
  if (predSet.contains(ref)) {
   addTruePositive(ref.getType());
  } else {
   addFalseNegative(ref.getType());
  }
 }
 for (Span pred : predSet) {
  if (!refSet.contains(pred)) {
   addFalsePositive(pred.getType());
  }
 }
}

代码示例来源:origin: apache/opennlp

@Override
 public void endElement(String name) {

  if (NAME_ELEMENT_NAMES.contains(name)) {
   Span nameSpan = incompleteNames.pop();
   nameSpan = new Span(nameSpan.getStart(), text.size(), nameSpan.getType());
   names.add(nameSpan);
  }

  if (MucElementNames.CONTENT_ELEMENTS.contains(name)) {
   storedSamples.add(new NameSample(text.toArray(new String[text.size()]),
     names.toArray(new Span[names.size()]), isClearAdaptiveData));

   if (isClearAdaptiveData) {
    isClearAdaptiveData = false;
   }

   text.clear();
   names.clear();
   isInsideContentElement = false;
  }
 }
}

代码示例来源:origin: apache/opennlp

/**
 * Compares the specified span to the current span.
 */
public int compareTo(Span s) {
 if (getStart() < s.getStart()) {
  return -1;
 } else if (getStart() == s.getStart()) {
  if (getEnd() > s.getEnd()) {
   return -1;
  } else if (getEnd() < s.getEnd()) {
   return 1;
  } else {
   // compare the type
   if (getType() == null && s.getType() == null) {
    return 0;
   } else if (getType() != null && s.getType() != null) {
    // use type lexicography order
    return getType().compareTo(s.getType());
   } else if (getType() != null) {
    return -1;
   }
   return 1;
  }
 } else {
  return 1;
 }
}

代码示例来源:origin: apache/opennlp

public void correctlyClassified(T reference, T prediction) {
 samples++;
 // add all true positives!
 Span[] spans = asSpanArray(reference);
 for (Span span : spans) {
  addTruePositive(span.getType());
 }
}

代码示例来源:origin: apache/opennlp

/**
 * Checks if the specified span is equal to the current span.
 */
@Override
public boolean equals(Object o) {
 if (o == this) {
  return true;
 }
 if (o instanceof Span) {
  Span s = (Span) o;
  return getStart() == s.getStart() && getEnd() == s.getEnd()
    && Objects.equals(getType(), s.getType());
 }
 return false;
}

代码示例来源:origin: apache/opennlp

public NameSample read() throws IOException {

  NameSample sample = samples.read();

  if (sample != null) {

   List<Span> filteredNames = new ArrayList<>();

   for (Span name : sample.getNames()) {
    if (types.contains(name.getType())) {
     filteredNames.add(name);
    }
   }

   return new NameSample(sample.getId(), sample.getSentence(),
     filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet());
  }
  else {
   return null;
  }
 }
}

代码示例来源:origin: apache/opennlp

/**
 * Return a copy of this span with leading and trailing white spaces removed.
 *
 * @param text
 *
 * @return the trimmed span or the same object if already trimmed
 */
public Span trim(CharSequence text) {
 int newStartOffset = getStart();
 for (int i = getStart(); i < getEnd() && StringUtil.isWhitespace(text.charAt(i)); i++) {
  newStartOffset++;
 }
 int newEndOffset = getEnd();
 for (int i = getEnd(); i > getStart() && StringUtil.isWhitespace(text.charAt(i - 1)); i--) {
  newEndOffset--;
 }
 if (newStartOffset == getStart() && newEndOffset == getEnd()) {
  return this;
 } else if (newStartOffset > newEndOffset) {
  return new Span(getStart(), getStart(), getType());
 } else {
  return new Span(newStartOffset, newEndOffset, getType());
 }
}

代码示例来源:origin: apache/opennlp

private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash)
  throws Exception {
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 TokenNameFinder nameFinder = new NameFinderME(model);
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   Span[] names = nameFinder.find(line.getText());
   for (Span name : names) {
    digest.update((name.getType() + name.getStart()
      + name.getEnd()).getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
}

代码示例来源:origin: apache/opennlp

/**
 * Checks if it accepts name type with some special characters
 */
@Test
public void testTypeWithSpecialChars() throws Exception {
 NameSample parsedSample = NameSample
   .parse(
     "<START:type-1> U . S . <END> "
       + "President <START:type_2> Barack Obama <END> is considering sending "
       + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .",
     false);
 Assert.assertEquals(3, parsedSample.getNames().length);
 Assert.assertEquals("type-1", parsedSample.getNames()[0].getType());
 Assert.assertEquals("type_2", parsedSample.getNames()[1].getType());
 Assert.assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType());
}

代码示例来源:origin: apache/opennlp

@Test
public void testFindTokenizdPattern() {
 Pattern testPattern = Pattern.compile("[0-9]+ year");
 String[] sentence = new String[]{"a", "80", "year", "b", "c"};
 Pattern[] patterns = new Pattern[]{testPattern};
 Map<String, Pattern[]> regexMap = new HashMap<>();
 String type = "match";
 regexMap.put(type, patterns);
 RegexNameFinder finder =
     new RegexNameFinder(regexMap);
 Span[] result = finder.find(sentence);
 Assert.assertTrue(result.length == 1);
 Assert.assertTrue(result[0].getStart() == 1);
 Assert.assertTrue(result[0].getEnd() == 3);
 Assert.assertTrue(result[0].getType().equals("match"));
}

代码示例来源:origin: apache/opennlp

@Test
public void testSingleFilter() throws IOException {
 final String[] types = new String[] {organization};
 filter = new NameSampleTypeFilter(types, sampleStream(text));
 NameSample ns = filter.read();
 Assert.assertEquals(1, ns.getNames().length);
 Assert.assertEquals(organization, ns.getNames()[0].getType());
}

代码示例来源:origin: apache/opennlp

/**
 * Evaluates the given reference {@link NameSample} object.
 *
 * This is done by finding the names with the
 * {@link TokenNameFinder} in the sentence from the reference
 * {@link NameSample}. The found names are then used to
 * calculate and update the scores.
 *
 * @param reference the reference {@link NameSample}.
 *
 * @return the predicted {@link NameSample}.
 */
@Override
protected NameSample processSample(NameSample reference) {
 if (reference.isClearAdaptiveDataSet()) {
  nameFinder.clearAdaptiveData();
 }
 Span[] predictedNames = nameFinder.find(reference.getSentence());
 Span[] references = reference.getNames();
 // OPENNLP-396 When evaluating with a file in the old format
 // the type of the span is null, but must be set to default to match
 // the output of the name finder.
 for (int i = 0; i < references.length; i++) {
  if (references[i].getType() == null) {
   references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default");
  }
 }
 fmeasure.updateScores(references, predictedNames);
 return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet());
}

代码示例来源:origin: apache/opennlp

/**
 * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it.
 * This is related to the issue OPENNLP-9
 */
@Test
public void testOnlyWithEntitiesWithTypes() throws Exception {
 // train the name finder
 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
   new PlainTextByLineStream(new MockInputStreamFactory(
    new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
   params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
 // now test if it can detect the sample sentences
 String[] sentence = "NATO United States Barack Obama".split("\\s+");
 Span[] names1 = nameFinder.find(sentence);
 Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO
 Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States
 Assert.assertEquals("person", names1[2].getType());
 Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

代码示例来源:origin: apache/opennlp

/**
 * Train NamefinderME using OnlyWithNamesWithTypes.train.
 * The goal is to check if the model validator accepts it.
 * This is related to the issue OPENNLP-9
 */
@Test
public void testOnlyWithNamesWithTypes() throws Exception {
 // train the name finder
 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
   new PlainTextByLineStream(new MockInputStreamFactory(
    new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
   params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
 // now test if it can detect the sample sentences
 String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
   "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");
 Span[] names1 = nameFinder.find(sentence);
 Assert.assertEquals(new Span(0, 2, "person"), names1[0]);
 Assert.assertEquals(new Span(2, 4, "person"), names1[1]);
 Assert.assertEquals(new Span(4, 6, "person"), names1[2]);
 Assert.assertEquals("person", names1[2].getType());
 Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

相关文章