edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation.getTokens()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(11.9k)|赞(0)|评价(0)|浏览(83)

本文整理了Java中edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation.getTokens()方法的一些代码示例,展示了TextAnnotation.getTokens()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。TextAnnotation.getTokens()方法的具体详情如下:
包路径:edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation
类名称:TextAnnotation
方法名:getTokens

TextAnnotation.getTokens介绍

暂无

代码示例

代码示例来源:origin: CogComp/cogcomp-nlp

public String getWordToRight(int distance) {
  // Dummy symbol for sentence end (in case comma is the second to last word in the sentence)
  if (commaPosition + distance >= s.ta.getTokens().length)
    return "###";
  return s.ta.getToken(commaPosition + distance);
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-comma

public String getWordToRight(int distance) {
  // Dummy symbol for sentence end (in case comma is the second to last word in the sentence)
  if (commaPosition + distance >= s.ta.getTokens().length)
    return "###";
  return s.ta.getToken(commaPosition + distance);
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-edison

@Override
  public Boolean transform(Constituent input) {
    TextAnnotation ta = input.getTextAnnotation();
    boolean found = false;
    for (int i = input.getStartSpan(); i < input.getEndSpan(); i++) {
      if (ta.getTokens()[i].equals("not") || (ta.getTokens()[i].equals("n't"))) {
        found = true;
        break;
      }
    }
    return found;
  }
};

代码示例来源:origin: CogComp/cogcomp-nlp

@Override
  public Boolean transform(Constituent input) {
    TextAnnotation ta = input.getTextAnnotation();
    boolean found = false;
    for (int i = input.getStartSpan(); i < input.getEndSpan(); i++) {
      if (ta.getTokens()[i].equals("not") || (ta.getTokens()[i].equals("n't"))) {
        found = true;
        break;
      }
    }
    return found;
  }
};

代码示例来源:origin: CogComp/cogcomp-nlp

public Constituent getChunkToRightOfComma(int distance) {
  // We don't have gold SHALLOW_PARSE
  SpanLabelView chunkView = (SpanLabelView) s.ta.getView(ViewNames.SHALLOW_PARSE);
  List<Constituent> chunksToRight =
      chunkView.getSpanLabels(commaPosition + 1, s.ta.getTokens().length);
  Collections.sort(chunksToRight, TextAnnotationUtilities.constituentStartComparator);
  Constituent chunk;
  if (distance <= 0 || distance > chunksToRight.size())
    chunk = null;
  else
    chunk = chunksToRight.get(distance - 1);
  return chunk;
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-comma

public Constituent getChunkToRightOfComma(int distance) {
  // We don't have gold SHALLOW_PARSE
  SpanLabelView chunkView = (SpanLabelView) s.ta.getView(ViewNames.SHALLOW_PARSE);
  List<Constituent> chunksToRight =
      chunkView.getSpanLabels(commaPosition + 1, s.ta.getTokens().length);
  Collections.sort(chunksToRight, TextAnnotationUtilities.constituentStartComparator);
  Constituent chunk;
  if (distance <= 0 || distance > chunksToRight.size())
    chunk = null;
  else
    chunk = chunksToRight.get(distance - 1);
  return chunk;
}

代码示例来源:origin: CogComp/cogcomp-nlp

/**
 * If labels are not given construct commas and assign the labels according to the
 * bayraktar-syntax-pattern to comma label mappings
 */
public CommaSRLSentence(TextAnnotation ta, TextAnnotation goldTa) {
  this.ta = ta;
  this.goldTa = goldTa;
  commas = new ArrayList<>();
  for (int i = 0; i < ta.getTokens().length; i++) {
    if (ta.getToken(i).equals(",")) {
      Comma comma = new Comma(i, this);
      commas.add(comma);
    }
  }
}

代码示例来源:origin: CogComp/cogcomp-nlp

public String getAnnotatedText() {
  List<String> tokens = Arrays.asList(s.ta.getTokens());
  return StringUtils.join(" ", tokens.subList(0, commaPosition + 1)) + "["
      + StringUtils.join(",", labels) + "] "
      + StringUtils.join(" ", tokens.subList(commaPosition + 1, tokens.size()));
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-comma

/**
 * If labels are not given construct commas and assign the labels according to the
 * bayraktar-syntax-pattern to comma label mappings
 */
public CommaSRLSentence(TextAnnotation ta, TextAnnotation goldTa) {
  this.ta = ta;
  this.goldTa = goldTa;
  commas = new ArrayList<>();
  for (int i = 0; i < ta.getTokens().length; i++) {
    if (ta.getToken(i).equals(",")) {
      Comma comma = new Comma(i, this);
      commas.add(comma);
    }
  }
}

代码示例来源:origin: CogComp/cogcomp-nlp

public String getBayraktarAnnotatedText() {
    List<String> tokens = Arrays.asList(s.ta.getTokens());
    return StringUtils.join(" ", tokens.subList(0, commaPosition + 1)) + "["
        + getBayraktarLabel() + "] "
        + StringUtils.join(" ", tokens.subList(commaPosition + 1, tokens.size()));
  }
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-comma

public String getBayraktarAnnotatedText() {
    List<String> tokens = Arrays.asList(s.ta.getTokens());
    return StringUtils.join(" ", tokens.subList(0, commaPosition + 1)) + "["
        + getBayraktarLabel() + "] "
        + StringUtils.join(" ", tokens.subList(commaPosition + 1, tokens.size()));
  }
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-comma

public String getAnnotatedText() {
  List<String> tokens = Arrays.asList(s.ta.getTokens());
  return StringUtils.join(" ", tokens.subList(0, commaPosition + 1)) + "["
      + StringUtils.join(",", labels) + "] "
      + StringUtils.join(" ", tokens.subList(commaPosition + 1, tokens.size()));
}

代码示例来源:origin: CogComp/cogcomp-nlp

/**
 *
 * @return String representation of the sentence with all the commas embedded into the string
 */
public String getAnnotatedText() {
  String[] tokens = ta.getTokens();
  int commaNum = 0;
  String annotatedText = tokens[0];
  for (int tokenIdx = 1; tokenIdx < tokens.length; tokenIdx++) {
    annotatedText += " " + tokens[tokenIdx];
    String commaAnnotation = "";
    while (commaNum < commas.size() && commas.get(commaNum).commaPosition == tokenIdx) {
      commaAnnotation += StringUtils.join(",", commas.get(commaNum).getLabels());
      commaNum++;
    }
    if (commaAnnotation.length() > 0)
      annotatedText += String.format("[%s]", commaAnnotation);
  }
  return annotatedText;
}

代码示例来源:origin: CogComp/cogcomp-nlp

/**
 * get a lemma for the token at index tokIndex in TextAnnotation ta.
 *
 * @param ta TextAnnotation to query for lemma; MUST have POS view.
 * @param tokIndex token index for word to lemmatize
 * @return a String representing a lemma with the POS found for the corresponding word
 */
public String getLemma(TextAnnotation ta, int tokIndex) {
  if (tokIndex >= ta.getTokens().length) {
    String msg =
        "ERROR: " + NAME + ".getLemma(): index '" + tokIndex
            + "' is out of range of textAnnotation, " + "which has '"
            + ta.getTokens().length + "' tokens.";
    logger.error(msg);
    throw new IllegalArgumentException(msg);
  }
  String word = ta.getToken(tokIndex).toLowerCase().trim();
  String pos = ta.getView(ViewNames.POS).getLabelsCoveringToken(tokIndex).get(0);
  return getLemma(word, pos);
}

代码示例来源:origin: CogComp/cogcomp-nlp

public static void main(String[] args) {

    String line = "   面对新世纪,  世界各国人民的共同愿望是:继续发展人类以往创造的一切文明成果。克服20世纪困扰着人类的战争和贫困问题,推进和平与发展的崇高事业,创造一个美好的世界。";
//        line = "2006年大西洋颶風季時間軸中記錄有全年大西洋盆地所有熱帶和亞熱帶氣旋形成、增強、減弱、登陸、轉變成溫帶氣旋以及消散的具體信息。2006年大西洋颶風季於2006年6月1日正式開始,同年11月30日結束,傳統上這樣的日期界定了一年中絕大多數熱帶氣旋在大西洋形成的時間段,這一颶風季是繼2001年大西洋颶風季以來第一個沒有任何一場颶風在美國登陸的大西洋颶風季,也是繼1994年大西洋颶風季以來第一次在整個十月份都沒有熱帶氣旋形成。美國國家颶風中心每年都會對前一年颶風季的所有天氣系統進行重新分析,並根據結果更新其風暴資料庫,因此時間軸中還包括實際操作中沒有發布的信息。包括最大持續風速、位置、距離在內的所有數字都是經四捨五入換算成整數。";
    line = "巴拉克 歐巴馬";
    line = "ab-cde";
//        line = "在古巴的美国代表机构是由哈瓦那的United States Interests Section(美国利益科)代理,在美国首都华盛顿有一个类似的Cuban Interests Section(古巴利益科),其则是瑞士大使馆的组成部分。";

    System.out.println(containsHanScript(line));
    String basedir = "/shared/experiments/ctsai12/workspace/stanford-segmenter-2015-04-20/data/";

    ChineseTokenizer ct = new ChineseTokenizer(basedir);
    TextAnnotation ta = ct.getTextAnnotation1(line);
    for(String t: ta.getTokens())
      System.out.println(t);

//        int tid = ta.getTokenIdFromCharacterOffset(5);
//        System.out.println("token id "+tid);
//        System.out.println("token: "+ta.getToken(tid));
//        IntPair offs = ta.getTokenCharacterOffset(tid);
//        System.out.println("start: "+offs.getFirst());
//        System.out.println("edn: "+offs.getSecond());

  }

代码示例来源:origin: edu.illinois.cs.cogcomp/wikipediaAPI-multilingual

public static void main(String[] args) {

    String line = "   面对新世纪,  世界各国人民的共同愿望是:继续发展人类以往创造的一切文明成果。克服20世纪困扰着人类的战争和贫困问题,推进和平与发展的崇高事业,创造一个美好的世界。";
//        line = "2006年大西洋颶風季時間軸中記錄有全年大西洋盆地所有熱帶和亞熱帶氣旋形成、增強、減弱、登陸、轉變成溫帶氣旋以及消散的具體信息。2006年大西洋颶風季於2006年6月1日正式開始,同年11月30日結束,傳統上這樣的日期界定了一年中絕大多數熱帶氣旋在大西洋形成的時間段,這一颶風季是繼2001年大西洋颶風季以來第一個沒有任何一場颶風在美國登陸的大西洋颶風季,也是繼1994年大西洋颶風季以來第一次在整個十月份都沒有熱帶氣旋形成。美國國家颶風中心每年都會對前一年颶風季的所有天氣系統進行重新分析,並根據結果更新其風暴資料庫,因此時間軸中還包括實際操作中沒有發布的信息。包括最大持續風速、位置、距離在內的所有數字都是經四捨五入換算成整數。";
    line = "巴拉克 歐巴馬";
    line = "ab-cde";
//        line = "在古巴的美国代表机构是由哈瓦那的United States Interests Section(美国利益科)代理,在美国首都华盛顿有一个类似的Cuban Interests Section(古巴利益科),其则是瑞士大使馆的组成部分。";

    System.out.println(containsHanScript(line));
    String basedir = "/shared/experiments/ctsai12/workspace/stanford-segmenter-2015-04-20/data/";

    ChineseTokenizer ct = new ChineseTokenizer(basedir);
    TextAnnotation ta = ct.getTextAnnotation1(line);
    for(String t: ta.getTokens())
      System.out.println(t);

//        int tid = ta.getTokenIdFromCharacterOffset(5);
//        System.out.println("token id "+tid);
//        System.out.println("token: "+ta.getToken(tid));
//        IntPair offs = ta.getTokenCharacterOffset(tid);
//        System.out.println("start: "+offs.getFirst());
//        System.out.println("edn: "+offs.getSecond());

  }

代码示例来源:origin: CogComp/cogcomp-nlp

@Override
  protected void addView(TextAnnotation ta) throws AnnotatorException {
    SpanLabelView view = new SpanLabelView(ViewNames.QUESTION_TYPE, ViewNames.QUESTION_TYPE, ta, 1.0);
    assert ta.getAvailableViews().contains(ViewNames.SHALLOW_PARSE) && ta.getAvailableViews().contains(ViewNames.NER_CONLL) &&
        ta.getAvailableViews().contains(ViewNames.NER_ONTONOTES): "the annotator does not have the required views ";
    String fineLabel = fine.discreteValue(ta);
    Double fineLabelScore = fine.scores(ta).getScore(fineLabel).score;
    String coarseLabel = coarse.discreteValue(ta);
    Double coarseLabelScore = coarse.scores(ta).getScore(coarseLabel).score;
    Constituent cFine = new Constituent(fineLabel, fineLabelScore, ViewNames.QUESTION_TYPE,
        ta, 0, ta.getTokens().length);
    Constituent cCoarse = new Constituent(coarseLabel, coarseLabelScore, ViewNames.QUESTION_TYPE, ta, 0, ta.getTokens().length);
    view.addConstituent(cCoarse);
    view.addConstituent(cFine);
    ta.addView(ViewNames.QUESTION_TYPE, view);
  }
}

代码示例来源:origin: CogComp/cogcomp-nlp

/**
 * create a Lemma view in the TextAnnotation argument, and return a reference to that View.
 */
public View createLemmaView(TextAnnotation inputTa) throws IOException {
  String[] toks = inputTa.getTokens();
  TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, NAME, inputTa, 1.0);
  for (int i = 0; i < toks.length; ++i) {
    String lemma = getLemma(inputTa, i);
    Constituent lemmaConstituent =
        new Constituent(lemma, ViewNames.LEMMA, inputTa, i, i + 1);
    lemmaView.addConstituent(lemmaConstituent);
  }
  inputTa.addView(ViewNames.LEMMA, lemmaView);
  return lemmaView;
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-question-typer

@Override
  protected void addView(TextAnnotation ta) throws AnnotatorException {
    SpanLabelView view = new SpanLabelView(ViewNames.QUESTION_TYPE, ViewNames.QUESTION_TYPE, ta, 1.0);
    assert ta.getAvailableViews().contains(ViewNames.SHALLOW_PARSE) && ta.getAvailableViews().contains(ViewNames.NER_CONLL) &&
        ta.getAvailableViews().contains(ViewNames.NER_ONTONOTES): "the annotator does not have the required views ";
    String fineLabel = fine.discreteValue(ta);
    Double fineLabelScore = fine.scores(ta).getScore(fineLabel).score;
    String coarseLabel = coarse.discreteValue(ta);
    Double coarseLabelScore = coarse.scores(ta).getScore(coarseLabel).score;
    Constituent cFine = new Constituent(fineLabel, fineLabelScore, ViewNames.QUESTION_TYPE,
        ta, 0, ta.getTokens().length);
    Constituent cCoarse = new Constituent(coarseLabel, coarseLabelScore, ViewNames.QUESTION_TYPE, ta, 0, ta.getTokens().length);
    view.addConstituent(cCoarse);
    view.addConstituent(cFine);
    ta.addView(ViewNames.QUESTION_TYPE, view);
  }
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-lemmatizer

/**
 * create a Lemma view in the TextAnnotation argument, and return a reference to that View.
 */
public View createLemmaView(TextAnnotation inputTa) throws IOException {
  String[] toks = inputTa.getTokens();
  TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, NAME, inputTa, 1.0);
  for (int i = 0; i < toks.length; ++i) {
    String lemma = getLemma(inputTa, i);
    Constituent lemmaConstituent =
        new Constituent(lemma, ViewNames.LEMMA, inputTa, i, i + 1);
    lemmaView.addConstituent(lemmaConstituent);
  }
  inputTa.addView(ViewNames.LEMMA, lemmaView);
  return lemmaView;
}

相关文章