本文整理了Java中opennlp.tools.util.Span
类的一些代码示例,展示了Span
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Span
类的具体详情如下:
包路径:opennlp.tools.util.Span
类名称:Span
[英]Class for storing start and end integer offsets.
[中]用于存储开始和结束整数偏移的类。
代码示例来源:origin: apache/opennlp
/**
* Returns true if the specified span is contained by this span. Identical
* spans are considered to contain each other.
*
* @param s The span to compare with this span.
*
* @return true is the specified span is contained by this span; false otherwise.
*/
public boolean contains(Span s) {
return start <= s.getStart() && s.getEnd() <= end;
}
代码示例来源:origin: apache/opennlp
Parse startToken = tokens[nameTokenSpan.getStart()];
Parse endToken = tokens[nameTokenSpan.getEnd() - 1];
Parse commonParent = startToken.getCommonParent(endToken);
Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());
if (nameSpan.equals(commonParent.getSpan())) {
commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex()));
} else {
boolean crossingKids = false;
for (Parse kid : kids) {
if (nameSpan.crosses(kid.getSpan())) {
crossingKids = true;
if (commonParent.getType().equals("NP")) {
Parse[] grandKids = kids[0].getChildren();
if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) {
commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(),
tag, 1.0, commonParent.getHeadIndex()));
代码示例来源:origin: apache/opennlp
/**
* Generates a hash code of the current span.
*/
@Override
public int hashCode() {
return Objects.hash(getStart(), getEnd(), getType());
}
代码示例来源:origin: apache/opennlp
private void overrideType(Span[] names) {
for (int i = 0; i < names.length; i++) {
Span n = names[i];
names[i] = new Span(n.getStart(), n.getEnd(), this.defaultType,
n.getProb());
}
}
代码示例来源:origin: apache/opennlp
/**
* Initializes a new Span object with an existing Span which is shifted by an
* offset.
*
* @param span
* @param offset
*/
public Span(Span span, int offset) {
this(span.start + offset, span.end + offset, span.getType(), span.getProb());
}
代码示例来源:origin: apache/opennlp
/**
* Returns true if the specified span is the begin of this span and the
* specified span is contained in this span.
*
* @param s The span to compare with this span.
*
* @return true if the specified span starts with this span and is contained
* in this span; false otherwise
*/
public boolean startsWith(Span s) {
return getStart() == s.getStart() && contains(s);
}
代码示例来源:origin: apache/opennlp
params.put(TrainingParameters.CUTOFF_PARAM, 1);
TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
"programs", "." };
Span[] names1 = nameFinder.find(sentence);
Assert.assertEquals(new Span(0, 4, "location"), names1[0]);
Assert.assertEquals(new Span(5, 7, "person"), names1[1]);
Assert.assertEquals(new Span(10, 12, "location"), names1[2]);
Assert.assertEquals(new Span(28, 30, "location"), names1[3]);
Assert.assertEquals("location", names1[0].getType());
Assert.assertEquals("person", names1[1].getType());
Assert.assertEquals("location", names1[2].getType());
Assert.assertEquals("location", names1[3].getType());
Assert.assertEquals(new Span(0, 2, "person"), names2[0]);
Assert.assertEquals(new Span(7, 15, "organization"), names2[1]);
Assert.assertEquals("person", names2[0].getType());
Assert.assertEquals("organization", names2[1].getType());
代码示例来源:origin: apache/opennlp
public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) {
Parse lastChild = parts.get(parseIndex);
Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),
node.getSpan().getEnd()),lastChild.getType(),1,
rules.getHead(new Parse[]{lastChild,node},lastChild.getType()));
adjNode.parts.add(lastChild);
if (node.prevPunctSet != null) {
adjNode.parts.addAll(node.prevPunctSet);
}
adjNode.parts.add(node);
parts.set(parseIndex,adjNode);
return adjNode;
}
代码示例来源:origin: apache/opennlp
/**
* Converts the parse from the tagger back.
*
* @param parseFromTagger
* @return the final parse
*/
Parse transformParseFromTagger(Parse parseFromTagger) {
int start = parseFromTagger.getSpan().getStart();
int end = parseFromTagger.getSpan().getEnd();
Parse transformedParse = new Parse(mSentence, new Span(
mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(),
parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
for (Parse child : parseFromTaggerChildrens) {
transformedParse.insert(transformParseFromTagger(child));
}
return transformedParse;
}
}
代码示例来源:origin: apache/opennlp
public static void fixPossesives(Parse parse) {
Parse[] tags = parse.getTagNodes();
for (int ti = 0; ti < tags.length; ti++) {
if (tags[ti].getType().equals("POS")) {
if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) {
int start = tags[ti + 1].getSpan().getStart();
int end = tags[ti + 1].getSpan().getEnd();
for (int npi = ti + 2; npi < tags.length; npi++) {
if (tags[npi].getParent() == tags[npi - 1].getParent()) {
end = tags[npi].getSpan().getEnd();
}
else {
break;
}
}
Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]);
parse.insert(npPos);
}
}
}
}
代码示例来源:origin: apache/opennlp
for (int i = span.getStart(); i < span.getEnd(); i++) {
coveredIndexes.put(i, span);
Span conflictingName = coveredIndexes.get(sentence.getStart());
conflictingName.getStart() < sentence.getStart()) {
Span lastSentence = sentences.remove(sentences.size() - 1);
sentences.add(new Span(lastSentence.getStart(), sentence.getEnd()));
String sentenceText = sentence.getCoveredText(
sample.getText()).toString();
tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i);
tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1);
if (sentence.contains(entitySpan)) {
entityIdSet.remove(ann.getId());
entitySpan = entitySpan.trim(sample.getText());
Integer nameBeginIndex = tokenIndexMap.get(-entitySpan.getStart());
Integer nameEndIndex = tokenIndexMap.get(entitySpan.getEnd());
mappedFragments.add(new Span(nameBeginIndex, nameEndIndex, entity.getType()));
} else {
System.err.println("Dropped entity " + entity.getId() + " ("
+ entitySpan.getCoveredText(sample.getText()) + ") " + " in document "
+ sample.getId() + ", it is not matching tokenization!");
代码示例来源:origin: apache/opennlp
/**
* Obtain {@code Span}s for every parse in the sentence.
* @param parse the parse from which to obtain the spans
* @return an array containing every span for the parse
*/
private static Span[] getConstituencySpans(final Parse parse) {
Stack<Parse> stack = new Stack<>();
if (parse.getChildCount() > 0) {
for (Parse child : parse.getChildren()) {
stack.push(child);
}
}
List<Span> consts = new ArrayList<>();
while (!stack.isEmpty()) {
Parse constSpan = stack.pop();
if (!constSpan.isPosTag()) {
Span span = constSpan.getSpan();
consts.add(new Span(span.getStart(), span.getEnd(), constSpan.getType()));
for (Parse child : constSpan.getChildren()) {
stack.push(child);
}
}
}
return consts.toArray(new Span[consts.size()]);
}
代码示例来源:origin: apache/opennlp
public void add(Parse daughter, HeadRules rules) {
if (daughter.prevPunctSet != null) {
parts.addAll(daughter.prevPunctSet);
}
parts.add(daughter);
this.span = new Span(span.getStart(),daughter.getSpan().getEnd());
this.head = rules.getHead(getChildren(),type);
this.headIndex = head.headIndex;
}
代码示例来源:origin: apache/opennlp
String tokenString = tokens[i].getCoveredText(sentence).toString();
String escapedToken = escape(tokenString);
tokenList[i] = escapedToken;
int start = tokens[i].getStart();
mIndexMap.put(escapedStart, start);
int end = tokens[i].getEnd();
mIndexMap.put(escapedEnd, end);
mParseForTagger = new Parse(tokenizedSentence,
new Span(0, tokenizedSentence.length()), "INC", 1, null);
mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
start + token.length()),
opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));
代码示例来源:origin: apache/opennlp
@Override
BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(),
tokens[tokens.length - 1].getEnd() );
return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(),
tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(),
noteSpan.getCoveredText(line).toString());
}
}
代码示例来源:origin: apache/opennlp
String sentenceText = sentenceSpan.getCoveredText(text).toString();
String[] tokens = Span.spansToStrings(tokenSpans, sentenceText);
int beginOffset = tokenSpans[name.getStart()].getStart() + sentenceSpan.getStart();
int endOffset = tokenSpans[name.getEnd() - 1].getEnd() + sentenceSpan.getStart();
ann.texts = textSegments.toArray(new String[textSegments.size()]);
ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
ann.type = name.getType();
代码示例来源:origin: apache/opennlp
public void remove(int index) {
parts.remove(index);
if (! parts.isEmpty()) {
if (index == 0 || index == parts.size()) { //size is orig last element
span = new Span((parts.get(0)).span.getStart(),(parts.get(parts.size() - 1)).span.getEnd());
}
}
}
代码示例来源:origin: apache/opennlp
private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash)
throws Exception {
MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
TokenNameFinder nameFinder = new NameFinderME(model);
try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
LeipzigTestSample line;
while ((line = lines.read()) != null) {
Span[] names = nameFinder.find(line.getText());
for (Span name : names) {
digest.update((name.getType() + name.getStart()
+ name.getEnd()).getBytes(StandardCharsets.UTF_8));
}
}
}
Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
}
代码示例来源:origin: apache/opennlp
int start = tokens[0].getStart();
int end = tokens[tokens.length - 1].getEnd();
for (Span candToken : candTokens) {
Span cSpan = candToken;
String ctok = sent.substring(cSpan.getStart(), cSpan.getEnd());
cSpan = new Span(cSpan.getStart() + start, cSpan.getEnd() + start);
if (cSpan.contains(tokens[ti])) {
if (!foundTrainingTokens) {
firstTrainingToken = ti;
else if (cSpan.getEnd() < tokens[ti].getEnd()) {
break;
else if (tokens[ti].getEnd() < cSpan.getStart()) {
" token=" + text.substring(tokens[ti].getStart(), tokens[ti].getEnd()));
int cStart = cSpan.getStart();
for (int i = tSpan.getStart() + 1; i < tSpan.getEnd(); i++) {
String[] context = cg.getContext(ctok, i - cStart);
events.add(new Event(TokenizerME.NO_SPLIT, context));
if (tSpan.getEnd() != cSpan.getEnd()) {
String[] context = cg.getContext(ctok, tSpan.getEnd() - cStart);
events.add(new Event(TokenizerME.SPLIT, context));
代码示例来源:origin: apache/opennlp
/**
* Returns true if the specified span intersects with this span.
*
* @param s The span to compare with this span.
*
* @return true is the spans overlap; false otherwise.
*/
public boolean intersects(Span s) {
int sstart = s.getStart();
//either s's start is in this or this' start is in s
return this.contains(s) || s.contains(this)
|| getStart() <= sstart && sstart < getEnd()
|| sstart <= getStart() && getStart() < s.getEnd();
}
内容来源于网络,如有侵权,请联系作者删除!