本文整理了Java中opennlp.tools.util.Span.getType()
方法的一些代码示例,展示了Span.getType()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Span.getType()
方法的具体详情如下:
包路径:opennlp.tools.util.Span
类名称:Span
方法名:getType
[英]Retrieves the type of the span.
[中]检索跨度的类型。
代码示例来源:origin: apache/opennlp
/**
* Creates a new immutable span based on an existing span, where the existing span did not include the prob
* @param span the span that has no prob or the prob is incorrect and a new Span must be generated
* @param prob the probability of the span
*/
public Span(Span span, double prob) {
this(span.start, span.end, span.getType(), prob);
}
代码示例来源:origin: apache/opennlp
/**
* Initializes a new Span object with an existing Span which is shifted by an
* offset.
*
* @param span
* @param offset
*/
public Span(Span span, int offset) {
this(span.start + offset, span.end + offset, span.getType(), span.getProb());
}
代码示例来源:origin: apache/opennlp
/**
* Generates a human readable string.
*/
@Override
public String toString() {
StringBuilder toStringBuffer = new StringBuilder(15);
toStringBuffer.append("[");
toStringBuffer.append(getStart());
toStringBuffer.append("..");
toStringBuffer.append(getEnd());
toStringBuffer.append(")");
if (getType() != null) {
toStringBuffer.append(" ");
toStringBuffer.append(getType());
}
return toStringBuffer.toString();
}
代码示例来源:origin: apache/opennlp
@Override
public NameSample read() throws IOException {
NameSample sample = samples.read();
if (sample != null) {
sentenceCount++;
tokenCount += sample.getSentence().length;
for (Span nameSpan : sample.getNames()) {
Integer nameCounter = nameCounters.get(nameSpan.getType());
if (nameCounter == null) {
nameCounter = 0;
}
nameCounters.put(nameSpan.getType(), nameCounter + 1);
}
}
return sample;
}
代码示例来源:origin: apache/opennlp
/**
* Generates a hash code of the current span.
*/
@Override
public int hashCode() {
return Objects.hash(getStart(), getEnd(), getType());
}
代码示例来源:origin: apache/opennlp
public String[] encode(Span[] names, int length) {
String[] outcomes = new String[length];
for (int i = 0; i < outcomes.length; i++) {
outcomes[i] = BioCodec.OTHER;
}
for (Span name : names) {
if (name.getType() == null) {
outcomes[name.getStart()] = "default" + "-" + BioCodec.START;
}
else {
outcomes[name.getStart()] = name.getType() + "-" + BioCodec.START;
}
// now iterate from begin + 1 till end
for (int i = name.getStart() + 1; i < name.getEnd(); i++) {
if (name.getType() == null) {
outcomes[i] = "default" + "-" + BioCodec.CONTINUE;
}
else {
outcomes[i] = name.getType() + "-" + BioCodec.CONTINUE;
}
}
}
return outcomes;
}
代码示例来源:origin: apache/opennlp
public void missclassified(T reference, T prediction) {
samples++;
Span[] references = asSpanArray(reference);
Span[] predictions = asSpanArray(prediction);
Set<Span> refSet = new HashSet<>(Arrays.asList(references));
Set<Span> predSet = new HashSet<>(Arrays.asList(predictions));
for (Span ref : refSet) {
if (predSet.contains(ref)) {
addTruePositive(ref.getType());
} else {
addFalseNegative(ref.getType());
}
}
for (Span pred : predSet) {
if (!refSet.contains(pred)) {
addFalsePositive(pred.getType());
}
}
}
代码示例来源:origin: apache/opennlp
@Override
public void endElement(String name) {
if (NAME_ELEMENT_NAMES.contains(name)) {
Span nameSpan = incompleteNames.pop();
nameSpan = new Span(nameSpan.getStart(), text.size(), nameSpan.getType());
names.add(nameSpan);
}
if (MucElementNames.CONTENT_ELEMENTS.contains(name)) {
storedSamples.add(new NameSample(text.toArray(new String[text.size()]),
names.toArray(new Span[names.size()]), isClearAdaptiveData));
if (isClearAdaptiveData) {
isClearAdaptiveData = false;
}
text.clear();
names.clear();
isInsideContentElement = false;
}
}
}
代码示例来源:origin: apache/opennlp
/**
* Compares the specified span to the current span.
*/
public int compareTo(Span s) {
if (getStart() < s.getStart()) {
return -1;
} else if (getStart() == s.getStart()) {
if (getEnd() > s.getEnd()) {
return -1;
} else if (getEnd() < s.getEnd()) {
return 1;
} else {
// compare the type
if (getType() == null && s.getType() == null) {
return 0;
} else if (getType() != null && s.getType() != null) {
// use type lexicography order
return getType().compareTo(s.getType());
} else if (getType() != null) {
return -1;
}
return 1;
}
} else {
return 1;
}
}
代码示例来源:origin: apache/opennlp
public void correctlyClassified(T reference, T prediction) {
samples++;
// add all true positives!
Span[] spans = asSpanArray(reference);
for (Span span : spans) {
addTruePositive(span.getType());
}
}
代码示例来源:origin: apache/opennlp
/**
* Checks if the specified span is equal to the current span.
*/
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (o instanceof Span) {
Span s = (Span) o;
return getStart() == s.getStart() && getEnd() == s.getEnd()
&& Objects.equals(getType(), s.getType());
}
return false;
}
代码示例来源:origin: apache/opennlp
public NameSample read() throws IOException {
NameSample sample = samples.read();
if (sample != null) {
List<Span> filteredNames = new ArrayList<>();
for (Span name : sample.getNames()) {
if (types.contains(name.getType())) {
filteredNames.add(name);
}
}
return new NameSample(sample.getId(), sample.getSentence(),
filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet());
}
else {
return null;
}
}
}
代码示例来源:origin: apache/opennlp
/**
* Return a copy of this span with leading and trailing white spaces removed.
*
* @param text
*
* @return the trimmed span or the same object if already trimmed
*/
public Span trim(CharSequence text) {
int newStartOffset = getStart();
for (int i = getStart(); i < getEnd() && StringUtil.isWhitespace(text.charAt(i)); i++) {
newStartOffset++;
}
int newEndOffset = getEnd();
for (int i = getEnd(); i > getStart() && StringUtil.isWhitespace(text.charAt(i - 1)); i--) {
newEndOffset--;
}
if (newStartOffset == getStart() && newEndOffset == getEnd()) {
return this;
} else if (newStartOffset > newEndOffset) {
return new Span(getStart(), getStart(), getType());
} else {
return new Span(newStartOffset, newEndOffset, getType());
}
}
代码示例来源:origin: apache/opennlp
private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash)
throws Exception {
MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
TokenNameFinder nameFinder = new NameFinderME(model);
try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
LeipzigTestSample line;
while ((line = lines.read()) != null) {
Span[] names = nameFinder.find(line.getText());
for (Span name : names) {
digest.update((name.getType() + name.getStart()
+ name.getEnd()).getBytes(StandardCharsets.UTF_8));
}
}
}
Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
}
代码示例来源:origin: apache/opennlp
/**
* Checks if it accepts name type with some special characters
*/
@Test
public void testTypeWithSpecialChars() throws Exception {
NameSample parsedSample = NameSample
.parse(
"<START:type-1> U . S . <END> "
+ "President <START:type_2> Barack Obama <END> is considering sending "
+ "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .",
false);
Assert.assertEquals(3, parsedSample.getNames().length);
Assert.assertEquals("type-1", parsedSample.getNames()[0].getType());
Assert.assertEquals("type_2", parsedSample.getNames()[1].getType());
Assert.assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType());
}
代码示例来源:origin: apache/opennlp
@Test
public void testFindTokenizdPattern() {
Pattern testPattern = Pattern.compile("[0-9]+ year");
String[] sentence = new String[]{"a", "80", "year", "b", "c"};
Pattern[] patterns = new Pattern[]{testPattern};
Map<String, Pattern[]> regexMap = new HashMap<>();
String type = "match";
regexMap.put(type, patterns);
RegexNameFinder finder =
new RegexNameFinder(regexMap);
Span[] result = finder.find(sentence);
Assert.assertTrue(result.length == 1);
Assert.assertTrue(result[0].getStart() == 1);
Assert.assertTrue(result[0].getEnd() == 3);
Assert.assertTrue(result[0].getType().equals("match"));
}
代码示例来源:origin: apache/opennlp
@Test
public void testSingleFilter() throws IOException {
final String[] types = new String[] {organization};
filter = new NameSampleTypeFilter(types, sampleStream(text));
NameSample ns = filter.read();
Assert.assertEquals(1, ns.getNames().length);
Assert.assertEquals(organization, ns.getNames()[0].getType());
}
代码示例来源:origin: apache/opennlp
/**
* Evaluates the given reference {@link NameSample} object.
*
* This is done by finding the names with the
* {@link TokenNameFinder} in the sentence from the reference
* {@link NameSample}. The found names are then used to
* calculate and update the scores.
*
* @param reference the reference {@link NameSample}.
*
* @return the predicted {@link NameSample}.
*/
@Override
protected NameSample processSample(NameSample reference) {
if (reference.isClearAdaptiveDataSet()) {
nameFinder.clearAdaptiveData();
}
Span[] predictedNames = nameFinder.find(reference.getSentence());
Span[] references = reference.getNames();
// OPENNLP-396 When evaluating with a file in the old format
// the type of the span is null, but must be set to default to match
// the output of the name finder.
for (int i = 0; i < references.length; i++) {
if (references[i].getType() == null) {
references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default");
}
}
fmeasure.updateScores(references, predictedNames);
return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet());
}
代码示例来源:origin: apache/opennlp
/**
* Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it.
* This is related to the issue OPENNLP-9
*/
@Test
public void testOnlyWithEntitiesWithTypes() throws Exception {
// train the name finder
ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
new PlainTextByLineStream(new MockInputStreamFactory(
new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8"));
TrainingParameters params = new TrainingParameters();
params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
// now test if it can detect the sample sentences
String[] sentence = "NATO United States Barack Obama".split("\\s+");
Span[] names1 = nameFinder.find(sentence);
Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO
Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States
Assert.assertEquals("person", names1[2].getType());
Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}
代码示例来源:origin: apache/opennlp
/**
* Train NamefinderME using OnlyWithNamesWithTypes.train.
* The goal is to check if the model validator accepts it.
* This is related to the issue OPENNLP-9
*/
@Test
public void testOnlyWithNamesWithTypes() throws Exception {
// train the name finder
ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
new PlainTextByLineStream(new MockInputStreamFactory(
new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8"));
TrainingParameters params = new TrainingParameters();
params.put(TrainingParameters.ITERATIONS_PARAM, 70);
params.put(TrainingParameters.CUTOFF_PARAM, 1);
TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
// now test if it can detect the sample sentences
String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
"Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");
Span[] names1 = nameFinder.find(sentence);
Assert.assertEquals(new Span(0, 2, "person"), names1[0]);
Assert.assertEquals(new Span(2, 4, "person"), names1[1]);
Assert.assertEquals(new Span(4, 6, "person"), names1[2]);
Assert.assertEquals("person", names1[2].getType());
Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}
内容来源于网络,如有侵权,请联系作者删除!