edu.stanford.nlp.pipeline.Annotation.get()方法的使用及代码示例

x33g5p2x  于2022-01-16 转载在 其他  
字(11.4k)|赞(0)|评价(0)|浏览(82)

本文整理了Java中edu.stanford.nlp.pipeline.Annotation.get()方法的一些代码示例,展示了Annotation.get()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Annotation.get()方法的具体详情如下:
包路径:edu.stanford.nlp.pipeline.Annotation
类名称:Annotation
方法名:get

Annotation.get介绍

暂无

代码示例

代码示例来源:origin: stanfordnlp/CoreNLP

public Document(InputDoc input, List<List<Mention>> mentions) {
 this();
 this.annotation = input.annotation;
 this.predictedMentions = mentions;
 this.goldMentions = input.goldMentions;
 this.docInfo = input.docInfo;
 this.numSentences = input.annotation.get(SentencesAnnotation.class).size();
 this.conllDoc = input.conllDoc;   // null if it's not conll input
}

代码示例来源:origin: stanfordnlp/CoreNLP

protected int getQuoteParagraph(CoreMap quote) {
  List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
  return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(CoreAnnotations.ParagraphIndexAnnotation.class);
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public List<Integer> scanForAnimates(Pair<Integer, Integer> span) {
 List<Integer> animateIndices = new ArrayList<>();
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 for(int i = span.first; i <= span.second && i < tokens.size() ; i++)
 {
  CoreLabel token = tokens.get(i);
  if(animacySet.contains(token.word()))
   animateIndices.add(i);
 }
 return animateIndices;
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static Temporal parseOrNull(String str) {
 Annotation doc = new Annotation(str);
 pipeline.annotate(doc);
 if (doc.get(CoreAnnotations.SentencesAnnotation.class) == null) {
  return null;
 }
 if (doc.get(CoreAnnotations.SentencesAnnotation.class).isEmpty()) {
  return null;
 }
 List<CoreMap> timexAnnotations = doc.get(TimeAnnotations.TimexAnnotations.class);
 if (timexAnnotations.size() > 1) {
  return null;
 } else if (timexAnnotations.isEmpty()) {
  return null;
 }
 CoreMap timex = timexAnnotations.get(0);
 if (timex.get(TimeExpression.Annotation.class) == null) {
  return null;
 } else {
  return timex.get(TimeExpression.Annotation.class).getTemporal();
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public MentionData getClosestMention(CoreMap quote) {
  MentionData closestBackward = findClosestMentionInSpanBackward(new Pair<>(0, quote.get(CoreAnnotations.TokenBeginAnnotation.class) - 1));
  MentionData closestForward = findClosestMentionInSpanForward(new Pair<>(quote.get(CoreAnnotations.TokenEndAnnotation.class), doc.get(CoreAnnotations.TokensAnnotation.class).size() - 1));
  int backDistance = quote.get(CoreAnnotations.TokenBeginAnnotation.class) - closestBackward.end;
  int forwardDistance = closestForward.begin - quote.get(CoreAnnotations.TokenEndAnnotation.class) + 1;
  if(backDistance < forwardDistance) {
    return closestBackward;
  } else {
    return closestForward;
  }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public String tokenRangeToString(int token_idx) {
 return doc.get(CoreAnnotations.TokensAnnotation.class).get(token_idx).word();
}

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * Set index for each token and sentence in the document.
 * @param doc
 */
private static void setTokenIndices(Document doc) {
 int token_index = 0;
 for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) {
  for (CoreLabel token : sent.get(TokensAnnotation.class)) {
   token.set(TokenBeginAnnotation.class, token_index++);
  }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
 List<CoreMap> coreMaps = doc.get(CoreAnnotations.SentencesAnnotation.class);
 int numSentences = goldMentions.size();
 for (int i=0;i<numSentences;i++){
  CoreMap coreMap = coreMaps.get(i);
  List<CoreLabel> words = coreMap.get(CoreAnnotations.TokensAnnotation.class);
  Tree tree = coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
  List<Mention> goldMentionsSent = goldMentions.get(i);
  List<Pair<Integer,Integer>> goldMentionsSpans = extractSpans(goldMentionsSent);
  for (Pair<Integer,Integer> mentionSpan: goldMentionsSpans){
   logger.finer("RECALL ERROR\n");
   logger.finer(coreMap + "\n");
   for (int x=mentionSpan.first;x<mentionSpan.second;x++){
    logger.finer(words.get(x).value() + " ");
   }
   logger.finer("\n"+tree + "\n");
  }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

private static void mentionReordering(Document doc, HeadFinder headFinder) throws Exception {
 List<List<Mention>> mentions = doc.predictedMentions;
 List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
 for (int i=0 ; i<sentences.size() ; i++) {
  List<Mention> mentionsInSent = mentions.get(i);
  mentions.set(i, mentionReorderingBySpan(mentionsInSent));
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

private static int getQuoteChapter(Annotation doc, CoreMap quote) {
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(ChapterAnnotator.ChapterAnnotation.class);
}

代码示例来源:origin: stanfordnlp/CoreNLP

public void oneNameSentence(Annotation doc) {
  List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class);
  for(CoreMap quote : quotes) {
   if (quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) {
    continue;
   }
   Pair<Integer, Integer> range = QuoteAttributionUtils.getRemainderInSentence(doc, quote);
   if(range == null) {
    continue;
   }

   Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(range);
   ArrayList<String> names = namesAndNameIndices.first;
   ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second;

   ArrayList<Integer> pronounsIndices = scanForPronouns(range);
   if (names.size() == 1) {
    List<Person> p = characterMap.get(names.get(0));

    //guess if exactly one name
    if (p.size() == 1 && pronounsIndices.size() == 0) {
     fillInMention(quote, tokenRangeToString(nameIndices.get(0)), nameIndices.get(0).first, nameIndices.get(0).second,
         sieveName, NAME);
    }
   }
  }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

/** {@inheritDoc} */
@Override
public void annotate(Annotation annotation) {
 super.annotate(annotation);
 List<CoreLabel> words = annotation.get(CoreAnnotations.TokensAnnotation.class);
 if (words != null) {
  numWords += words.size();
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public  boolean rangeContainsCharIndex(Pair<Integer, Integer> tokenRange, int charIndex) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 CoreLabel startToken = tokens.get(tokenRange.first());
 CoreLabel endToken = tokens.get(tokenRange.second());
 int startTokenCharBegin  = startToken.beginPosition();
 int endTokenCharEnd = endToken.endPosition();
 return (startTokenCharBegin <= charIndex && charIndex <= endTokenCharEnd);
}

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * Set index for each token and sentence in the document.
 * @param doc
 */
private static void setTokenIndices(Document doc) {
 int token_index = 0;
 for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) {
  for (CoreLabel token : sent.get(TokensAnnotation.class)) {
   token.set(TokenBeginAnnotation.class, token_index++);
  }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static void addEnhancedSentences(Annotation doc) {
 //for every sentence that begins a paragraph: append this sentence and the previous one and see if sentence splitter would make a single sentence out of it. If so, add as extra sentence.
 //for each sieve that potentially uses augmentedSentences in original:
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 WordToSentenceProcessor wsp =
     new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER); //create SentenceSplitter that never splits on newline
 int prevParagraph = 0;
 for(int i = 1; i < sentences.size(); i++) {
  CoreMap sentence = sentences.get(i);
  CoreMap prevSentence = sentences.get(i-1);
  List<CoreLabel> tokensConcat = new ArrayList<>();
  tokensConcat.addAll(prevSentence.get(CoreAnnotations.TokensAnnotation.class));
  tokensConcat.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class));
  List<List<CoreLabel>> sentenceTokens = wsp.process(tokensConcat);
  if(sentenceTokens.size() == 1) { //wsp would have put them into a single sentence --> add enhanced sentence.
   sentence.set(EnhancedSentenceAnnotation.class, constructSentence(sentenceTokens.get(0), prevSentence, sentence));
  }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

private static void findGoldMentionHeads(Document doc) {
 List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
 for (int i=0 ; i<sentences.size() ; i++ ) {
  DependencyCorefMentionFinder.findHeadInDependency(sentences.get(i), doc.goldMentions.get(i));
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static int getQuoteParagraphIndex(Annotation doc, CoreMap quote) {
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(CoreAnnotations.ParagraphIndexAnnotation.class);
}

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * Convert a CoreNLP Annotation object to a Document.
 * @param ann The CoreNLP Annotation object.
 */
@SuppressWarnings("Convert2streamapi")
public Document(Properties props, Annotation ann) {
 this.defaultProps = props;
 StanfordCoreNLP.getDefaultAnnotatorPool(props, new AnnotatorImplementations());  // cache the annotator pool
 this.impl = new ProtobufAnnotationSerializer(false).toProtoBuilder(ann);
 List<CoreMap> sentences = ann.get(CoreAnnotations.SentencesAnnotation.class);
 this.sentences = new ArrayList<>(sentences.size());
 for (CoreMap sentence : sentences) {
  this.sentences.add(new Sentence(this, this.serializer.toProtoBuilder(sentence), sentence.get(CoreAnnotations.TextAnnotation.class), this.defaultProps));
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

/** Print raw document for analysis */
public static String printRawDoc(Document document, boolean gold, boolean printClusterID) throws FileNotFoundException {
 StringBuilder sb = new StringBuilder();
 List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
 StringBuilder doc = new StringBuilder();
 for(int i = 0 ; i<sentences.size(); i++) {
  doc.append(sentenceStringWithMention(i, document, gold, printClusterID));
  doc.append("\n");
 }
 sb.append("PRINT RAW DOC START\n");
 sb.append(document.annotation.get(CoreAnnotations.DocIDAnnotation.class)).append("\n");
 if (gold) {
  sb.append("New DOC: (GOLD MENTIONS) ==================================================\n");
 } else {
  sb.append("New DOC: (Predicted Mentions) ==================================================\n");
 }
 sb.append(doc.toString()).append("\n");
 sb.append("PRINT RAW DOC END").append("\n");
 return sb.toString();
}

代码示例来源:origin: stanfordnlp/CoreNLP

private static CoreMap constructCoreMap(Annotation doc, Pair<Integer, Integer> run) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 // check if the second part of the run is a *NL* token, adjust accordingly
 int endTokenIndex = run.second;
 while (endTokenIndex > 0 && tokens.get(endTokenIndex).get(CoreAnnotations.IsNewlineAnnotation.class)) {
  endTokenIndex--;
 }
 // get the sentence text from the first and last character offsets
 int begin = tokens.get(run.first).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
 int end = tokens.get(endTokenIndex).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
 String sentenceText = doc.get(CoreAnnotations.TextAnnotation.class).substring(begin, end);
 List<CoreLabel> sentenceTokens = tokens.subList(run.first, endTokenIndex+1);
 // create a sentence annotation with text and token offsets
 CoreMap sentence = new Annotation(sentenceText);
 sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
 sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
 sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
 return sentence;
}

相关文章

微信公众号

最新文章

更多