opennlp.tools.util.Span类的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(10.6k)|赞(0)|评价(0)|浏览(115)

本文整理了Java中opennlp.tools.util.Span类的一些代码示例,展示了Span类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Span类的具体详情如下:
包路径:opennlp.tools.util.Span
类名称:Span

Span介绍

[英]Class for storing start and end integer offsets.
[中]用于存储开始和结束整数偏移的类。

代码示例

代码示例来源:origin: apache/opennlp

/**
 * Returns true if the specified span is contained by this span. Identical
 * spans are considered to contain each other.
 *
 * @param s The span to compare with this span.
 *
 * @return true is the specified span is contained by this span; false otherwise.
 */
public boolean contains(Span s) {
 return start <= s.getStart() && s.getEnd() <= end;
}

代码示例来源:origin: apache/opennlp

Parse startToken = tokens[nameTokenSpan.getStart()];
Parse endToken = tokens[nameTokenSpan.getEnd() - 1];
Parse commonParent = startToken.getCommonParent(endToken);
 Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());
 if (nameSpan.equals(commonParent.getSpan())) {
  commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex()));
 } else {
  boolean crossingKids = false;
  for (Parse kid : kids) {
   if (nameSpan.crosses(kid.getSpan())) {
    crossingKids = true;
   if (commonParent.getType().equals("NP")) {
    Parse[] grandKids = kids[0].getChildren();
    if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) {
     commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(),
       tag, 1.0, commonParent.getHeadIndex()));

代码示例来源:origin: apache/opennlp

/**
 * Generates a hash code of the current span.
 */
@Override
public int hashCode() {
 return Objects.hash(getStart(), getEnd(), getType());
}

代码示例来源:origin: apache/opennlp

private void overrideType(Span[] names) {
 for (int i = 0; i < names.length; i++) {
  Span n = names[i];
  names[i] = new Span(n.getStart(), n.getEnd(), this.defaultType,
      n.getProb());
 }
}

代码示例来源:origin: apache/opennlp

/**
 * Initializes a new Span object with an existing Span which is shifted by an
 * offset.
 *
 * @param span
 * @param offset
 */
public Span(Span span, int offset) {
 this(span.start + offset, span.end + offset, span.getType(), span.getProb());
}

代码示例来源:origin: apache/opennlp

/**
 * Returns true if the specified span is the begin of this span and the
 * specified span is contained in this span.
 *
 * @param s The span to compare with this span.
 *
 * @return true if the specified span starts with this span and is contained
 *     in this span; false otherwise
 */
public boolean startsWith(Span s) {
 return getStart() == s.getStart() && contains(s);
}

代码示例来源:origin: apache/opennlp

params.put(TrainingParameters.CUTOFF_PARAM, 1);
TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
  params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
  "programs", "." };
Span[] names1 = nameFinder.find(sentence);
Assert.assertEquals(new Span(0, 4, "location"), names1[0]);
Assert.assertEquals(new Span(5, 7, "person"), names1[1]);
Assert.assertEquals(new Span(10, 12, "location"), names1[2]);
Assert.assertEquals(new Span(28, 30, "location"), names1[3]);
Assert.assertEquals("location", names1[0].getType());
Assert.assertEquals("person", names1[1].getType());
Assert.assertEquals("location", names1[2].getType());
Assert.assertEquals("location", names1[3].getType());
Assert.assertEquals(new Span(0, 2, "person"), names2[0]);
Assert.assertEquals(new Span(7, 15, "organization"), names2[1]);
Assert.assertEquals("person", names2[0].getType());
Assert.assertEquals("organization", names2[1].getType());

代码示例来源:origin: apache/opennlp

public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) {
 Parse lastChild = parts.get(parseIndex);
 Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),
   node.getSpan().getEnd()),lastChild.getType(),1,
   rules.getHead(new Parse[]{lastChild,node},lastChild.getType()));
 adjNode.parts.add(lastChild);
 if (node.prevPunctSet != null) {
  adjNode.parts.addAll(node.prevPunctSet);
 }
 adjNode.parts.add(node);
 parts.set(parseIndex,adjNode);
 return adjNode;
}

代码示例来源:origin: apache/opennlp

/**
  * Converts the parse from the tagger back.
  *
  * @param parseFromTagger
  * @return the final parse
  */
 Parse transformParseFromTagger(Parse parseFromTagger) {
  int start = parseFromTagger.getSpan().getStart();
  int end = parseFromTagger.getSpan().getEnd();
  Parse transformedParse = new Parse(mSentence, new Span(
    mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(),
    parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
  Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
  for (Parse child : parseFromTaggerChildrens) {
   transformedParse.insert(transformParseFromTagger(child));
  }
  return transformedParse;
 }
}

代码示例来源:origin: apache/opennlp

public static void fixPossesives(Parse parse) {
 Parse[] tags = parse.getTagNodes();
 for (int ti = 0; ti < tags.length; ti++) {
  if (tags[ti].getType().equals("POS")) {
   if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) {
    int start = tags[ti + 1].getSpan().getStart();
    int end = tags[ti + 1].getSpan().getEnd();
    for (int npi = ti + 2; npi < tags.length; npi++) {
     if (tags[npi].getParent() == tags[npi - 1].getParent()) {
      end = tags[npi].getSpan().getEnd();
     }
     else {
      break;
     }
    }
    Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]);
    parse.insert(npPos);
   }
  }
 }
}

代码示例来源:origin: apache/opennlp

for (int i = span.getStart(); i < span.getEnd(); i++) {
   coveredIndexes.put(i, span);
Span conflictingName = coveredIndexes.get(sentence.getStart());
  conflictingName.getStart() < sentence.getStart()) {
 Span lastSentence = sentences.remove(sentences.size() - 1);
 sentences.add(new Span(lastSentence.getStart(), sentence.getEnd()));
String sentenceText = sentence.getCoveredText(
  sample.getText()).toString();
 tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i);
 tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1);
   if (sentence.contains(entitySpan)) {
    entityIdSet.remove(ann.getId());
    entitySpan = entitySpan.trim(sample.getText());
    Integer nameBeginIndex = tokenIndexMap.get(-entitySpan.getStart());
    Integer nameEndIndex = tokenIndexMap.get(entitySpan.getEnd());
     mappedFragments.add(new Span(nameBeginIndex, nameEndIndex, entity.getType()));
    } else {
     System.err.println("Dropped entity " + entity.getId() + " ("
       + entitySpan.getCoveredText(sample.getText()) + ") " + " in document "
       + sample.getId() + ", it is not matching tokenization!");

代码示例来源:origin: apache/opennlp

/**
 * Obtain {@code Span}s for every parse in the sentence.
 * @param parse the parse from which to obtain the spans
 * @return an array containing every span for the parse
 */
private static Span[] getConstituencySpans(final Parse parse) {
 Stack<Parse> stack = new Stack<>();
 if (parse.getChildCount() > 0) {
  for (Parse child : parse.getChildren()) {
   stack.push(child);
  }
 }
 List<Span> consts = new ArrayList<>();
 while (!stack.isEmpty()) {
  Parse constSpan = stack.pop();
  if (!constSpan.isPosTag()) {
   Span span = constSpan.getSpan();
   consts.add(new Span(span.getStart(), span.getEnd(), constSpan.getType()));
   for (Parse child : constSpan.getChildren()) {
    stack.push(child);
   }
  }
 }
 return consts.toArray(new Span[consts.size()]);
}

代码示例来源:origin: apache/opennlp

public void add(Parse daughter, HeadRules rules) {
 if (daughter.prevPunctSet != null) {
  parts.addAll(daughter.prevPunctSet);
 }
 parts.add(daughter);
 this.span = new Span(span.getStart(),daughter.getSpan().getEnd());
 this.head = rules.getHead(getChildren(),type);
 this.headIndex = head.headIndex;
}

代码示例来源:origin: apache/opennlp

String tokenString = tokens[i].getCoveredText(sentence).toString();
 String escapedToken = escape(tokenString);
 tokenList[i] = escapedToken;
 int start = tokens[i].getStart();
 mIndexMap.put(escapedStart, start);
 int end = tokens[i].getEnd();
 mIndexMap.put(escapedEnd, end);
mParseForTagger = new Parse(tokenizedSentence,
  new Span(0, tokenizedSentence.length()), "INC", 1, null);
 mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
   start + token.length()),
   opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));

代码示例来源:origin: apache/opennlp

@Override
 BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
  
  Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(), 
    tokens[tokens.length - 1].getEnd() );      
  return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(), 
    tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(), 
    noteSpan.getCoveredText(line).toString());
 }
}

代码示例来源:origin: apache/opennlp

String sentenceText = sentenceSpan.getCoveredText(text).toString();
String[] tokens = Span.spansToStrings(tokenSpans, sentenceText);
  int beginOffset = tokenSpans[name.getStart()].getStart() + sentenceSpan.getStart();
  int endOffset = tokenSpans[name.getEnd() - 1].getEnd() + sentenceSpan.getStart();
  ann.texts = textSegments.toArray(new String[textSegments.size()]);
  ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
  ann.type = name.getType();

代码示例来源:origin: apache/opennlp

public void remove(int index) {
 parts.remove(index);
 if (! parts.isEmpty()) {
  if (index == 0 || index == parts.size()) { //size is orig last element
   span = new Span((parts.get(0)).span.getStart(),(parts.get(parts.size() - 1)).span.getEnd());
  }
 }
}

代码示例来源:origin: apache/opennlp

private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash)
  throws Exception {
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 TokenNameFinder nameFinder = new NameFinderME(model);
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   Span[] names = nameFinder.find(line.getText());
   for (Span name : names) {
    digest.update((name.getType() + name.getStart()
      + name.getEnd()).getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
}

代码示例来源:origin: apache/opennlp

int start = tokens[0].getStart();
int end = tokens[tokens.length - 1].getEnd();
for (Span candToken : candTokens) {
 Span cSpan = candToken;
 String ctok = sent.substring(cSpan.getStart(), cSpan.getEnd());
 cSpan = new Span(cSpan.getStart() + start, cSpan.getEnd() + start);
   if (cSpan.contains(tokens[ti])) {
    if (!foundTrainingTokens) {
     firstTrainingToken = ti;
   else if (cSpan.getEnd() < tokens[ti].getEnd()) {
    break;
   else if (tokens[ti].getEnd() < cSpan.getStart()) {
      " token=" + text.substring(tokens[ti].getStart(), tokens[ti].getEnd()));
    int cStart = cSpan.getStart();
    for (int i = tSpan.getStart() + 1; i < tSpan.getEnd(); i++) {
     String[] context = cg.getContext(ctok, i - cStart);
     events.add(new Event(TokenizerME.NO_SPLIT, context));
    if (tSpan.getEnd() != cSpan.getEnd()) {
     String[] context = cg.getContext(ctok, tSpan.getEnd() - cStart);
     events.add(new Event(TokenizerME.SPLIT, context));

代码示例来源:origin: apache/opennlp

/**
 * Returns true if the specified span intersects with this span.
 *
 * @param s The span to compare with this span.
 *
 * @return true is the spans overlap; false otherwise.
 */
public boolean intersects(Span s) {
 int sstart = s.getStart();
 //either s's start is in this or this' start is in s
 return this.contains(s) || s.contains(this)
     || getStart() <= sstart && sstart < getEnd()
     || sstart <= getStart() && getStart() < s.getEnd();
}

相关文章