edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation类的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(8.7k)|赞(0)|评价(0)|浏览(119)

本文整理了Java中edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation类的一些代码示例,展示了TextAnnotation类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。TextAnnotation类的具体详情如下:
包路径:edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation
类名称:TextAnnotation

TextAnnotation介绍

[英]This class contains all annotation for a single piece of text (which could contain more than one sentence.)
[中]此类包含单个文本(可能包含多个句子)的所有注释

代码示例

代码示例来源:origin: CogComp/cogcomp-nlp

public void addView(TextAnnotation ta, boolean overwrite) {
  TextAnnotation newTA = null;
  try {
    newTA = annotate(ta.getText(), overwrite);
  } catch (Exception e) {
    e.printStackTrace();
  }
  for (String vu : viewsToAdd) {
    ta.addView(vu, newTA.getView(vu));
  }
}

代码示例来源:origin: CogComp/cogcomp-nlp

@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
  assert (ta.hasView(ViewNames.SENTENCE));
  SpanLabelView quantifierView =
      new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d);
  List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta);
  for (QuantSpan span : quantSpans) {
    int startToken = ta.getTokenIdFromCharacterOffset(span.start);
    int endToken = ta.getTokenIdFromCharacterOffset(span.end);
    quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d);
  }
  ta.addView(ViewNames.QUANTITIES, quantifierView);
}

代码示例来源:origin: CogComp/cogcomp-nlp

static public void printTextAnnotation(PrintStream out, TextAnnotation ta) {
  out.println("TextAnnotation with id: " + ta.getId());
  String rawText = ta.getText();
  out.println("Raw Text: " + rawText);
  out.println(getLineFill());
  out.println("TextAnnotation Views:");
  for (String name : ta.getAvailableViews()) {
    out.println("View Name: " + name);
    out.println(ta.getView(name).toString());
    out.println(getLineFill());
  }
}

代码示例来源:origin: CogComp/cogcomp-nlp

/**
 * Gets the sentence containing the specified token
 */
public Sentence getSentenceFromToken(int tokenId) {
  return this.getSentence(this.getSentenceId(tokenId));
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-comma

public String getWordToRight(int distance) {
  // Dummy symbol for sentence end (in case comma is the second to last word in the sentence)
  if (commaPosition + distance >= s.ta.getTokens().length)
    return "###";
  return s.ta.getToken(commaPosition + distance);
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-edison

private static String getSafeToken(TextAnnotation ta, int wordPosition) {
  String token = "*";
  if (wordPosition >= 0 && wordPosition < ta.size())
    token = ta.getToken(wordPosition);
  return token;
}

代码示例来源:origin: CogComp/cogcomp-nlp

@Override
public void addView(TextAnnotation ta) {
  TokenLabelView posView = new TokenLabelView(ViewNames.POS, "ParsePOS", ta, 1.0);
  int tokenId = 0;
  for (int sentenceId = 0; sentenceId < ta.getNumberOfSentences(); sentenceId++) {
    Tree<String> parseTree = ((TreeView) (ta.getView(parseViewName))).getTree(sentenceId);
    parseTree = ParseUtils.snipNullNodes(parseTree);
    parseTree = ParseUtils.stripFunctionTags(parseTree);
    if (parseTree.getYield().size() != ta.getSentence(sentenceId).size())
      throw new IllegalStateException("Parse tree size != ta.size()");
    for (Tree<String> y : parseTree.getYield()) {
      posView.addTokenLabel(tokenId++, y.getParent().getLabel(), 1.0);
    }
  }
  ta.addView(getViewName(), posView);
}

代码示例来源:origin: edu.illinois.cs.cogcomp/saul-examples

private void addView(TextAnnotation ta, List<String> labels) {
  TokenLabelView labelView = new TokenLabelView(viewName, ta);
  List constituents = ta.getView(ViewNames.TOKENS).getConstituents();
  assert constituents.size() == labels.size();
  for (int i = 0; i < constituents.size(); ++i) {
    Constituent constituent = (Constituent) constituents.get(i);
    labelView.addTokenLabel(constituent.getStartSpan(), labels.get(i), 1.0D);
  }
  ta.addView(viewName, labelView);
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-depparse

TextAnnotation annotate(String corpusId, String sentId, String[] tokens)
    throws AnnotatorException {
  // Ignore the root token
  List<String[]> words =
      Collections.singletonList(Arrays.copyOfRange(tokens, 1, tokens.length));
  TextAnnotation ta =
      BasicTextAnnotationBuilder.createTextAnnotationFromTokens(corpusId, sentId, words);
  ta.addView(pos);
  ta.addView(lemma);
  ta.addView(chunk);
  return ta;
}

代码示例来源:origin: CogComp/cogcomp-nlp

public String getPOSToLeft(int distance) {
  TokenLabelView posView;
  if (GOLD)
    posView = (TokenLabelView) s.goldTa.getView(ViewNames.POS);
  else
    posView = (TokenLabelView) s.ta.getView(ViewNames.POS);
  String pos = posView.getLabel(commaPosition - distance);
  if (pos.equals("DT") && distance == 1 && getWordToRight(distance).equalsIgnoreCase("the"))
    return "DT-the";
  else
    return pos;
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-edison

@Override
public void addView(TextAnnotation input) {
  TokenLabelView view = new TokenLabelView(getViewName(), "PorterStemmer", input, 1.0);
  synchronized (instance) {
    for (int i = 0; i < input.size(); i++) {
      stemmer.setCurrent(input.getToken(i));
      stemmer.stem();
      view.addTokenLabel(i, stemmer.getCurrent(), 1.0);
    }
  }
  input.addView(getViewName(), view);
}

代码示例来源:origin: CogComp/cogcomp-nlp

public static String printTextAnnotation(TextAnnotation ta ) throws IOException
{
  StringBuilder bldr = new StringBuilder();
  bldr.append( "TextAnnotation for text: " );
  bldr.append( ta.getText() );
  for ( String vName: ta.getAvailableViews() ) {
    bldr.append(printView( ta.getView( vName )));
  }
  return bldr.toString();
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-srl

private void addViews(TextAnnotation ta) throws AnnotatorException {
    for (String view : requiredViews) {
      if (!ta.hasView(view))
        annotator.addView(ta, view);
    }
    if (!ta.hasView(ViewNames.CLAUSES_STANFORD))
      ta.addView(ClauseViewGenerator.STANFORD);
    if (!ta.hasView(ViewNames.DEPENDENCY + ":" + ViewNames.PARSE_STANFORD))
      ta.addView(new HeadFinderDependencyViewGenerator(ViewNames.PARSE_STANFORD));

  }
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-srl

private void addPredicateArgs(List<String[]> columns, TextAnnotation ta) {
  PredicateArgumentView predArgView = null;
  if (ta.hasView(predicateArgumentViewName))
    predArgView = (PredicateArgumentView) ta.getView(predicateArgumentViewName);
  convertPredicateArgView(ta, predArgView, columns, true);
}

代码示例来源:origin: CogComp/cogcomp-nlp

@Override
public boolean hasView(String viewName) {
  return this.textAnnotation.hasView(viewName);
}

代码示例来源:origin: CogComp/cogcomp-nlp

public Constituent getChunkToRightOfComma(int distance) {
  // We don't have gold SHALLOW_PARSE
  SpanLabelView chunkView = (SpanLabelView) s.ta.getView(ViewNames.SHALLOW_PARSE);
  List<Constituent> chunksToRight =
      chunkView.getSpanLabels(commaPosition + 1, s.ta.getTokens().length);
  Collections.sort(chunksToRight, TextAnnotationUtilities.constituentStartComparator);
  Constituent chunk;
  if (distance <= 0 || distance > chunksToRight.size())
    chunk = null;
  else
    chunk = chunksToRight.get(distance - 1);
  return chunk;
}

代码示例来源:origin: CogComp/cogcomp-nlp

/**
 * given a {@link TextAnnotation} for a sentence with annotations, map its annotations into a
 * TextAnnotation object for a longer text containing that sentence.
 * @param sentenceTa annotated TextAnnotation for sentence
 * @param textTa TextAnnotation for longer text containing sentence, without annotations for that sentence
 * @param sentenceId index of the sentence in the longer text
 */
static public void mapSentenceAnnotationsToText(TextAnnotation sentenceTa, TextAnnotation textTa, int sentenceId ) {
  assert(sentenceId < textTa.getNumberOfSentences());
  assert(sentenceTa.getText().equals(textTa.getSentence(sentenceId).getText()));
  int start = textTa.getSentence(sentenceId).getStartSpan();
  int end = textTa.getSentence(sentenceId).getEndSpan();
  copyViewsFromTo(sentenceTa, textTa, start, end, start);
}

代码示例来源:origin: CogComp/cogcomp-nlp

/**
 * create a Lemma view in the TextAnnotation argument, and return a reference to that View.
 */
public View createLemmaView(TextAnnotation inputTa) throws IOException {
  String[] toks = inputTa.getTokens();
  TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, NAME, inputTa, 1.0);
  for (int i = 0; i < toks.length; ++i) {
    String lemma = getLemma(inputTa, i);
    Constituent lemmaConstituent =
        new Constituent(lemma, ViewNames.LEMMA, inputTa, i, i + 1);
    lemmaView.addConstituent(lemmaConstituent);
  }
  inputTa.addView(ViewNames.LEMMA, lemmaView);
  return lemmaView;
}

代码示例来源:origin: CogComp/cogcomp-nlp

public String getCommaID() {
  return commaPosition + " " + s.goldTa.getId();
}

代码示例来源:origin: CogComp/cogcomp-nlp

public static void main(String[] args) {

    TextAnnotationBuilder tokenizer = MultiLingualTokenizer.getTokenizer("ja");
    String text = "\"ペンシルベニアドイツ語\",\"text\":\"ペンシルベニアドイツ語(標準ドイ"
            + "ツ語:Pennsylvania-Dutch, Pennsilfaani-Deitsch、アレマン語:Pennsylvania-Ditsch、英語:Pennsylvania-German)"
            + "は、北アメリカのカナダおよびアメリカ中西部でおよそ15万から25万人の人びとに話されているドイツ語の系統である。高地ドイツ語の"
            + "うち上部ドイツ語の一派アレマン語の一方言である。ペンシルベニアアレマン語(Pennsilfaani-Alemanisch, Pennsylvania-Alemannic)"
            + "とも呼ばれる。";

    TextAnnotation ta = tokenizer.createTextAnnotation(text);
    for(int i = 0; i < ta.getNumberOfSentences(); i++)
      System.out.println(ta.getSentence(i).getTokenizedText());

  }
}

相关文章