本文整理了Java中edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation.<init>()
方法的一些代码示例,展示了TextAnnotation.<init>()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。TextAnnotation.<init>()
方法的具体详情如下:
包路径:edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation
类名称:TextAnnotation
方法名:<init>
暂无
代码示例来源:origin: CogComp/cogcomp-nlp
/**
* The default way to create a {@link TextAnnotation} from pre-tokenized text.
*
* @param tokenizedSentences A list of sentences, each one being a list of tokens
* @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
*/
public static TextAnnotation createTextAnnotationFromTokens(String corpusId, String textId,
List<String[]> tokenizedSentences) {
Tokenization tokenization = tokenizeTextSpan(tokenizedSentences);
StringBuilder text = new StringBuilder();
for (String[] sentenceTokens : tokenizedSentences)
text.append(StringUtils.join(sentenceTokens, ' '))
.append(System.lineSeparator());
return new TextAnnotation(corpusId, textId, text.toString(), tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}
代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities
/**
* The default way to create a {@link TextAnnotation} from pre-tokenized text.
*
* @param tokenizedSentences A list of sentences, each one being a list of tokens
* @return A {@link TextAnnotation} containing the SENTENCE and TOKENS views.
*/
public static TextAnnotation createTextAnnotationFromTokens(String corpusId, String textId,
List<String[]> tokenizedSentences) {
Tokenization tokenization = tokenizeTextSpan(tokenizedSentences);
StringBuilder text = new StringBuilder();
for (String[] sentenceTokens : tokenizedSentences)
text.append(StringUtils.join(sentenceTokens, ' '))
.append(System.lineSeparator());
return new TextAnnotation(corpusId, textId, text.toString(), tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}
代码示例来源:origin: edu.illinois.cs.cogcomp/wikipediaAPI-multilingual
TextAnnotation ta = new TextAnnotation("", "", text, offs,
surfs, ends);
return ta;
代码示例来源:origin: CogComp/cogcomp-nlp
TextAnnotation ta = new TextAnnotation("", "", text, offs,
surfs, ends);
return ta;
代码示例来源:origin: CogComp/cogcomp-nlp
@Override
public TextAnnotation createTextAnnotation(String corpusId, String textId, String text,
Tokenizer.Tokenization tokenization) throws IllegalArgumentException {
return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}
代码示例来源:origin: CogComp/cogcomp-nlp
@Override
public TextAnnotation createTextAnnotation(String corpusId, String textId, String text,
Tokenization tokenization) throws IllegalArgumentException {
return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}
代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-curator
@Override
public TextAnnotation createTextAnnotation(String corpusId, String textId, String text,
Tokenizer.Tokenization tokenization) throws IllegalArgumentException {
return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}
代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities
@Override
public TextAnnotation createTextAnnotation(String corpusId, String textId, String text,
Tokenization tokenization) throws IllegalArgumentException {
return new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
}
代码示例来源:origin: CogComp/cogcomp-nlp
public TextAnnotation getTextAnnotation(String text){
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreLabel> tokens = new ArrayList<>();
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
int[] sen_ends = new int[sentences.size()];
int sen_idx = 0;
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
tokens.add(token);
}
sen_ends[sen_idx++] = tokens.size();
}
String[] surfaces = new String[tokens.size()];
IntPair[] tokenCharOffsets = new IntPair[tokens.size()];
for(int i = 0; i < tokens.size(); i++){
surfaces[i] = tokens.get(i).originalText();
tokenCharOffsets[i] = new IntPair(tokens.get(i).beginPosition(), tokens.get(i).endPosition());
// System.out.println(surfaces[i]);
// System.out.println(tokenCharOffsets[i]);
}
// System.out.println(sen_ends[0]);
TextAnnotation ta = new TextAnnotation("", "", text, tokenCharOffsets,
surfaces, sen_ends);
return ta;
}
代码示例来源:origin: edu.illinois.cs.cogcomp/wikipediaAPI-multilingual
public TextAnnotation getTextAnnotation(String text){
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreLabel> tokens = new ArrayList<>();
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
int[] sen_ends = new int[sentences.size()];
int sen_idx = 0;
for (CoreMap sentence : sentences) {
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
tokens.add(token);
}
sen_ends[sen_idx++] = tokens.size();
}
String[] surfaces = new String[tokens.size()];
IntPair[] tokenCharOffsets = new IntPair[tokens.size()];
for(int i = 0; i < tokens.size(); i++){
surfaces[i] = tokens.get(i).originalText();
tokenCharOffsets[i] = new IntPair(tokens.get(i).beginPosition(), tokens.get(i).endPosition());
// System.out.println(surfaces[i]);
// System.out.println(tokenCharOffsets[i]);
}
// System.out.println(sen_ends[0]);
TextAnnotation ta = new TextAnnotation("", "", text, tokenCharOffsets,
surfaces, sen_ends);
return ta;
}
代码示例来源:origin: CogComp/cogcomp-nlp
assert tokensPairs.size() == tokenSize;
String text = ta.getText().substring(tokensPairs.get(0).getFirst() + firstCharOffset, tokensPairs.get(tokensPairs.size()-1).getSecond() + firstCharOffset);
TextAnnotation newTA = new TextAnnotation(ta.corpusId, ta.id, text,
tokensPairs.toArray(new IntPair[tokenSize]), tokens.toArray(new String[tokenSize]), new int[]{tokenSize});
代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities
assert tokensPairs.size() == tokenSize;
String text = ta.getText().substring(tokensPairs.get(0).getFirst() + firstCharOffset, tokensPairs.get(tokensPairs.size()-1).getSecond() + firstCharOffset);
TextAnnotation newTA = new TextAnnotation(ta.corpusId, ta.id, text,
tokensPairs.toArray(new IntPair[tokenSize]), tokens.toArray(new String[tokenSize]), new int[]{tokenSize});
代码示例来源:origin: CogComp/cogcomp-nlp
new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities
new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
代码示例来源:origin: CogComp/cogcomp-nlp
public static TextAnnotation readTextAnnotation(TextAnnotationProto taImpl) throws Exception {
String corpusId = taImpl.getCorpusId();
String id = taImpl.getId();
String text = taImpl.getText();
String[] tokens = taImpl.getTokensList().toArray(new String[0]);
Pair<Pair<String, Double>, int[]> sentences = readSentences(taImpl.getSentences());
IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens);
TextAnnotation ta =
new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
for (ViewProto view : taImpl.getViewsList()) {
String viewName = view.getViewName();
List<View> topKViews = new ArrayList<>();
for (ViewDataProto viewData : view.getViewDataList()) {
topKViews.add(readViewData(viewData, ta));
}
if (viewName.equals(ViewNames.SENTENCE))
ta.removeView(viewName);
ta.addTopKView(viewName, topKViews);
if (viewName.equals(ViewNames.SENTENCE))
ta.setSentences();
}
for (Map.Entry<String, String> entry: taImpl.getPropertiesMap().entrySet()) {
ta.addAttribute(entry.getKey(), entry.getValue());
}
return ta;
}
代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-core-utilities
public static TextAnnotation readTextAnnotation(TextAnnotationProto taImpl) throws Exception {
String corpusId = taImpl.getCorpusId();
String id = taImpl.getId();
String text = taImpl.getText();
String[] tokens = taImpl.getTokensList().toArray(new String[0]);
Pair<Pair<String, Double>, int[]> sentences = readSentences(taImpl.getSentences());
IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens);
TextAnnotation ta =
new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());
for (ViewProto view : taImpl.getViewsList()) {
String viewName = view.getViewName();
List<View> topKViews = new ArrayList<>();
for (ViewDataProto viewData : view.getViewDataList()) {
topKViews.add(readViewData(viewData, ta));
}
if (viewName.equals(ViewNames.SENTENCE))
ta.removeView(viewName);
ta.addTopKView(viewName, topKViews);
if (viewName.equals(ViewNames.SENTENCE))
ta.setSentences();
}
for (Map.Entry<String, String> entry: taImpl.getPropertiesMap().entrySet()) {
ta.addAttribute(entry.getKey(), entry.getValue());
}
return ta;
}
代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-tokenizer
new TextAnnotation(corpusId, textId, text, offsets, tokens, sentenceEndPositions);
代码示例来源:origin: CogComp/cogcomp-nlp
new TextAnnotation(corpusId, textId, text, offsets, tokens, sentenceEndPositions);
代码示例来源:origin: CogComp/cogcomp-nlp
throws IllegalArgumentException {
Tokenizer.Tokenization tokenization = tokenizer.tokenizeTextSpan(text);
TextAnnotation ta = new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
SpanLabelView view =
代码示例来源:origin: edu.illinois.cs.cogcomp/illinois-tokenizer
throws IllegalArgumentException {
Tokenizer.Tokenization tokenization = tokenizer.tokenizeTextSpan(text);
TextAnnotation ta = new TextAnnotation(corpusId, textId, text, tokenization.getCharacterOffsets(),
tokenization.getTokens(), tokenization.getSentenceEndTokenIndexes());
SpanLabelView view =
内容来源于网络,如有侵权,请联系作者删除!