edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation.<init>()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(9.5k)|赞(0)|评价(0)|浏览(65)

本文整理了Java中edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation.<init>()方法的一些代码示例,展示了TextAnnotation.<init>()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。TextAnnotation.<init>()方法的具体详情如下:
包路径:edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation
类名称:TextAnnotation
方法名:<init>

TextAnnotation.<init>介绍

暂无

代码示例

代码示例来源:origin: CogComp/cogcomp-nlp

/**
 * The default way to create a {@link TextAnnotation} from pre-tokenized text.
 * 
 * @param tokenizedSentences A list of sentences, each one being a list of tokens
 * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
 */
public static TextAnnotation createTextAnnotationFromTokens(String corpusId, String textId,
    List<String[]> tokenizedSentences) {
  Tokenization tokenization = tokenizeTextSpan(tokenizedSentences);
  StringBuilder text = new StringBuilder();
  for (String[] sentenceTokens : tokenizedSentences)
    text.append(StringUtils.join(sentenceTokens, ' '))
        .append(System.lineSeparator());
  return new TextAnnotation(corpusId, textId, text.toString(), tokenization.getCharacterOffsets(),
      tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities

/**
 * The default way to create a {@link TextAnnotation} from pre-tokenized text.
 * 
 * @param tokenizedSentences A list of sentences, each one being a list of tokens
 * @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
 */
public static TextAnnotation createTextAnnotationFromTokens(String corpusId, String textId,
    List<String[]> tokenizedSentences) {
  Tokenization tokenization = tokenizeTextSpan(tokenizedSentences);
  StringBuilder text = new StringBuilder();
  for (String[] sentenceTokens : tokenizedSentences)
    text.append(StringUtils.join(sentenceTokens, ' '))
        .append(System.lineSeparator());
  return new TextAnnotation(corpusId, textId, text.toString(), tokenization.getCharacterOffsets(),
      tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}

代码示例来源:origin: edu.illinois.cs.cogcomp/wikipediaAPI-multilingual

TextAnnotation ta = new TextAnnotation("", "", text, offs,
    surfs, ends);
return ta;

代码示例来源:origin: CogComp/cogcomp-nlp

TextAnnotation ta = new TextAnnotation("", "", text, offs,
    surfs, ends);
return ta;

代码示例来源:origin: CogComp/cogcomp-nlp

@Override
public TextAnnotation createTextAnnotation(String corpusId, String textId, String text,
    Tokenizer.Tokenization tokenization) throws IllegalArgumentException {
  return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
      tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}

代码示例来源:origin: CogComp/cogcomp-nlp

@Override
public TextAnnotation createTextAnnotation(String corpusId, String textId, String text,
    Tokenization tokenization) throws IllegalArgumentException {
  return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
      tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-curator

@Override
public TextAnnotation createTextAnnotation(String corpusId, String textId, String text,
    Tokenizer.Tokenization tokenization) throws IllegalArgumentException {
  return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
      tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities

@Override
public TextAnnotation createTextAnnotation(String corpusId, String textId, String text,
    Tokenization tokenization) throws IllegalArgumentException {
  return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
      tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}

代码示例来源:origin: CogComp/cogcomp-nlp

public TextAnnotation getTextAnnotation(String text){
    Annotation document = new Annotation(text);
    pipeline.annotate(document);

    List<CoreLabel> tokens = new ArrayList<>();
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    int[] sen_ends = new int[sentences.size()];
    int sen_idx = 0;
    for (CoreMap sentence : sentences) {
      for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {

        tokens.add(token);
      }
      sen_ends[sen_idx++] = tokens.size();
    }
    String[] surfaces = new String[tokens.size()];
    IntPair[] tokenCharOffsets = new IntPair[tokens.size()];
    for(int i = 0; i < tokens.size(); i++){

      surfaces[i] = tokens.get(i).originalText();
      tokenCharOffsets[i] = new IntPair(tokens.get(i).beginPosition(), tokens.get(i).endPosition());
//            System.out.println(surfaces[i]);
//            System.out.println(tokenCharOffsets[i]);
    }
//        System.out.println(sen_ends[0]);
    TextAnnotation ta = new TextAnnotation("", "", text, tokenCharOffsets,
        surfaces, sen_ends);
    return ta;
  }

代码示例来源:origin: edu.illinois.cs.cogcomp/wikipediaAPI-multilingual

public TextAnnotation getTextAnnotation(String text){
    Annotation document = new Annotation(text);
    pipeline.annotate(document);

    List<CoreLabel> tokens = new ArrayList<>();
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    int[] sen_ends = new int[sentences.size()];
    int sen_idx = 0;
    for (CoreMap sentence : sentences) {
      for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {

        tokens.add(token);
      }
      sen_ends[sen_idx++] = tokens.size();
    }
    String[] surfaces = new String[tokens.size()];
    IntPair[] tokenCharOffsets = new IntPair[tokens.size()];
    for(int i = 0; i < tokens.size(); i++){

      surfaces[i] = tokens.get(i).originalText();
      tokenCharOffsets[i] = new IntPair(tokens.get(i).beginPosition(), tokens.get(i).endPosition());
//            System.out.println(surfaces[i]);
//            System.out.println(tokenCharOffsets[i]);
    }
//        System.out.println(sen_ends[0]);
    TextAnnotation ta = new TextAnnotation("", "", text, tokenCharOffsets,
        surfaces, sen_ends);
    return ta;
  }

代码示例来源:origin: CogComp/cogcomp-nlp

assert tokensPairs.size() == tokenSize;
String text = ta.getText().substring(tokensPairs.get(0).getFirst() + firstCharOffset, tokensPairs.get(tokensPairs.size()-1).getSecond() + firstCharOffset);
TextAnnotation newTA = new TextAnnotation(ta.corpusId, ta.id, text,
    tokensPairs.toArray(new IntPair[tokenSize]), tokens.toArray(new String[tokenSize]), new int[]{tokenSize});

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities

assert tokensPairs.size() == tokenSize;
String text = ta.getText().substring(tokensPairs.get(0).getFirst() + firstCharOffset, tokensPairs.get(tokensPairs.size()-1).getSecond() + firstCharOffset);
TextAnnotation newTA = new TextAnnotation(ta.corpusId, ta.id, text,
    tokensPairs.toArray(new IntPair[tokenSize]), tokens.toArray(new String[tokenSize]), new int[]{tokenSize});

代码示例来源:origin: CogComp/cogcomp-nlp

new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities

new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());

代码示例来源:origin: CogComp/cogcomp-nlp

public static TextAnnotation readTextAnnotation(TextAnnotationProto taImpl) throws Exception {
  String corpusId = taImpl.getCorpusId();
  String id = taImpl.getId();
  String text = taImpl.getText();
  String[] tokens = taImpl.getTokensList().toArray(new String[0]);
  Pair<Pair<String, Double>, int[]> sentences = readSentences(taImpl.getSentences());
  IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens);
  TextAnnotation ta =
      new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
  for (ViewProto view : taImpl.getViewsList()) {
    String viewName = view.getViewName();
    List<View> topKViews = new ArrayList<>();
    for (ViewDataProto viewData : view.getViewDataList()) {
      topKViews.add(readViewData(viewData, ta));
    }
    if (viewName.equals(ViewNames.SENTENCE))
      ta.removeView(viewName);
    ta.addTopKView(viewName, topKViews);
    if (viewName.equals(ViewNames.SENTENCE))
      ta.setSentences();
  }
  for (Map.Entry<String, String> entry: taImpl.getPropertiesMap().entrySet()) {
    ta.addAttribute(entry.getKey(), entry.getValue());
  }
  return ta;
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities

public static TextAnnotation readTextAnnotation(TextAnnotationProto taImpl) throws Exception {
  String corpusId = taImpl.getCorpusId();
  String id = taImpl.getId();
  String text = taImpl.getText();
  String[] tokens = taImpl.getTokensList().toArray(new String[0]);
  Pair<Pair<String, Double>, int[]> sentences = readSentences(taImpl.getSentences());
  IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens);
  TextAnnotation ta =
      new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
  for (ViewProto view : taImpl.getViewsList()) {
    String viewName = view.getViewName();
    List<View> topKViews = new ArrayList<>();
    for (ViewDataProto viewData : view.getViewDataList()) {
      topKViews.add(readViewData(viewData, ta));
    }
    if (viewName.equals(ViewNames.SENTENCE))
      ta.removeView(viewName);
    ta.addTopKView(viewName, topKViews);
    if (viewName.equals(ViewNames.SENTENCE))
      ta.setSentences();
  }
  for (Map.Entry<String, String> entry: taImpl.getPropertiesMap().entrySet()) {
    ta.addAttribute(entry.getKey(), entry.getValue());
  }
  return ta;
}

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-tokenizer

new TextAnnotation(corpusId, textId, text, offsets, tokens, sentenceEndPositions);

代码示例来源:origin: CogComp/cogcomp-nlp

new TextAnnotation(corpusId, textId, text, offsets, tokens, sentenceEndPositions);

代码示例来源:origin: CogComp/cogcomp-nlp

throws IllegalArgumentException {
Tokenizer.Tokenization tokenization = tokenizer.tokenizeTextSpan(text);
TextAnnotation ta = new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
    tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
SpanLabelView view =

代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-tokenizer

throws IllegalArgumentException {
Tokenizer.Tokenization tokenization = tokenizer.tokenizeTextSpan(text);
TextAnnotation ta = new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
    tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
SpanLabelView view =

相关文章