本文整理了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token
类的一些代码示例,展示了Token
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Token
类的具体详情如下:
包路径:de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token
类名称:Token
[英]Token is one of the two types commonly produced by a segmenter (the other being Sentence). A Token usually represents a word, although it may be used to represent multiple tightly connected words (e.g. "New York") or parts of a word (e.g. the possessive "'s"). One may choose to split compound words into multiple tokens, e.g. ("CamelCase" -> "Camel", "Case"; "Zauberstab" -> "Zauber", "stab"). Most processing components operate on Tokens, usually within the limits of the surrounding Sentence. E.g. a part-of-speech tagger analyses each Token in a Sentence and assigns a part-of-speech to each Token.
[中]标记是分词器通常产生的两种类型之一(另一种是句子)。一个标记通常代表一个单词,尽管它可以用来表示多个紧密连接的单词(例如“New York”)或一个单词的一部分(例如所有格“'s”)。可以选择将复合词拆分为多个标记,例如(“CamelCase”->“Camel”,“Case”;“Zauberstab”->“Zauber”,“stab”)。大多数处理组件对标记进行操作,通常在周围句子的限制范围内。词性标注者分析句子中的每个标记,并为每个标记分配一个词性。
代码示例来源:origin: de.unidue.ltl.flextag/flextag-core
private void annotateTaggingResultsLinkToTokens(JCas aJCas)
{
List<Token> tokens = getTokens(aJCas);
List<TextClassificationOutcome> outcomes = getPredictions(aJCas);
for (int i = 0; i < tokens.size(); i++) {
Token token = tokens.get(i);
String outcome = outcomes.get(i).getOutcome();
POS p = createPartOfSpeechAnnotationFromOutcome(aJCas, token.getBegin(),
token.getEnd(), outcome);
token.setPos(p);
}
}
代码示例来源:origin: hltfbk/Excitement-Open-Platform
private static String outToken(Token t) {
return String.format("%s[%s:%s]", t.getCoveredText(), t.getBegin(), t.getEnd());
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.stanfordnlp-gpl
public static CoreLabel tokenToWord(Token aToken)
{
CoreLabel t = new CoreLabel();
t.setOriginalText(aToken.getCoveredText());
t.setWord(aToken.getText());
t.setBeginPosition(aToken.getBegin());
t.setEndPosition(aToken.getEnd());
if (aToken.getLemma() != null) {
t.setLemma(aToken.getLemma().getValue());
}
else {
t.setLemma(aToken.getText());
}
if (aToken.getPos() != null) {
t.setTag(aToken.getPos().getPosValue());
}
return t;
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl
/**
* @return the token text taking into account a {@link TokenForm} annotation that might exist.
*/
public String getText()
{
String form = getFormValue();
return form != null ? form : getCoveredText();
}
代码示例来源:origin: dkpro/dkpro-tc
protected void setToken(JCas aJCas, int begin, int end)
{
Token token = new Token(aJCas, begin, end);
token.addToIndexes();
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.opennlp-asl
/**
* Given a list of tokens (e.g. those from a sentence) return the one at the specified position.
*/
private Token getToken(List<Token> aTokens, int aBegin, int aEnd)
{
for (Token t : aTokens) {
if (aBegin == t.getBegin() && aEnd == t.getEnd()) {
return t;
}
}
throw new IllegalStateException("Token not found");
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.penntree-asl
private void annotateTokenWithTag(JCas aJCas, String aToken, String aTag, int aCurrPosInText)
{
if (readToken) {
// Token
Token token = new Token(aJCas, aCurrPosInText, aToken.length() + aCurrPosInText);
token.addToIndexes();
if (readPOS) {
// Tag
Type posTag = posMappingProvider.getTagType(aTag);
POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(),
token.getEnd());
pos.setPosValue(aTag);
POSUtils.assignCoarseValue(pos);
pos.addToIndexes();
// Set the POS for the Token
token.setPos(pos);
}
}
}
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.opennlp-asl
toks[i] = t.getText();
tags[i] = t.getPosValue();
i++;
Lemma lemmaAnno = new Lemma(aJCas, t.getBegin(), t.getEnd());
lemmaAnno.setValue(lemmas[n]);
lemmaAnno.addToIndexes();
t.setLemma(lemmaAnno);
n++;
代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-tcf
private void convertLemma(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
if (aCorpusData.getLemmasLayer() == null) {
return;
}
for (int i = 0; i < aCorpusData.getLemmasLayer().size(); i++) {
eu.clarin.weblicht.wlfxb.tc.api.Token[] lemmaTokens = aCorpusData.getLemmasLayer()
.getTokens(aCorpusData.getLemmasLayer().getLemma(i));
String value = aCorpusData.getLemmasLayer().getLemma(i).getString();
Lemma outLemma = new Lemma(aJCas);
outLemma.setBegin(aTokens.get(lemmaTokens[0].getID()).getBegin());
outLemma.setEnd(aTokens.get(lemmaTokens[0].getID()).getEnd());
outLemma.setValue(value);
outLemma.addToIndexes();
// Set the lemma to the token
aTokens.get(lemmaTokens[0].getID()).setLemma(outLemma);
}
}
代码示例来源:origin: UKPLab/argument-reasoning-comprehension-task
private static void copyParagraphAndTokenAnnotations(JCas source, JCas target)
{
if (!source.getDocumentText().equals(target.getDocumentText())) {
throw new IllegalArgumentException("Source and target have different content");
}
for (Paragraph p : JCasUtil.select(source, Paragraph.class)) {
Paragraph paragraph = new Paragraph(target);
paragraph.setBegin(p.getBegin());
paragraph.setEnd(p.getEnd());
paragraph.addToIndexes();
}
for (Token t : JCasUtil.select(source, Token.class)) {
Token token = new Token(target);
token.setBegin(t.getBegin());
token.setEnd(t.getEnd());
token.addToIndexes();
}
}
代码示例来源:origin: dkpro/dkpro-core
@Override
public boolean check(JCas aJCas, List<Message> aMessages)
{
List<Token> withoutPOS = select(aJCas, Token.class).stream()
.filter(t -> t.getPos() == null)
.collect(Collectors.toList());
for (Token t : withoutPOS) {
aMessages.add(new Message(this, ERROR, String.format("Token has no POS: %s [%d..%d]", t
.getType().getName(), t.getBegin(), t.getEnd())));
}
List<Token> withoutPOSValue = select(aJCas, Token.class).stream()
.filter(t -> t.getPos() != null && t.getPos().getPosValue() == null)
.collect(Collectors.toList());
for (Token t : withoutPOSValue) {
aMessages.add(new Message(this, ERROR, String.format(
"Token has no POS value: %s [%d..%d]", t.getType().getName(), t.getBegin(),
t.getEnd())));
}
return aMessages.stream().anyMatch(m -> m.level == ERROR);
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.treetagger-asl
@Override
public void token(Token aToken, String aPos, String aLemma)
{
synchronized (cas) {
// Add the Part of Speech
if (writePos && aPos != null) {
Type posTag = posMappingProvider.getTagType(aPos);
POS posAnno = (POS) cas.createAnnotation(posTag, aToken.getBegin(),
aToken.getEnd());
posAnno.setPosValue(aPos.intern());
POSUtils.assignCoarseValue(posAnno);
aToken.setPos(posAnno);
pos[count.get()] = posAnno;
}
// Add the lemma
if (writeLemma && aLemma != null) {
Lemma lemmaAnno = new Lemma(aJCas, aToken.getBegin(), aToken.getEnd());
lemmaAnno.setValue(aLemma.intern());
aToken.setLemma(lemmaAnno);
lemma[count.get()] = lemmaAnno;
}
count.getAndIncrement();
}
}
});
代码示例来源:origin: webanno/webanno
Lemma l1 = new Lemma(jcas, t1.getBegin(), t1.getEnd());
l1.setValue("lemma1");
l1.addToIndexes();
t1.setLemma(l1);
MorphologicalFeatures m1 = new MorphologicalFeatures(jcas, t1.getBegin(), t1.getEnd());
m1.setValue("morph");
m1.setTense("tense1");
m1.addToIndexes();
t1.setMorph(m1);
POS p1 = new POS(jcas, t1.getBegin(), t1.getEnd());
p1.setPosValue("pos1");
p1.addToIndexes();
t1.setPos(p1);
Stem s1 = new Stem(jcas, t1.getBegin(), t1.getEnd());
s1.setValue("stem1");
s1.addToIndexes();
t1.setStem(s1);
代码示例来源:origin: dkpro/dkpro-similarity
public List<String> getSubstitutions(JCas jcas)
{
List<String> tokens = new ArrayList<String>();
List<String> postags = new ArrayList<String>();;
for (Token t : JCasUtil.select(jcas, Token.class))
{
try
{
tokens.add(t.getLemma().getValue().toLowerCase());
postags.add(t.getPos().getPosValue());
}
catch (NullPointerException e) {
System.err.println("Couldn't read lemma value for token \"" + t.getCoveredText() + "\"");
}
}
return getSubstitutions(tokens, postags);
}
代码示例来源:origin: de.tudarmstadt.ukp.dkpro.keyphrases/de.tudarmstadt.ukp.dkpro.keyphrases.decompounding-asl
@Override
public void process(final JCas aJCas)
throws AnalysisEngineProcessException
{
Token token;
for (Compound compound : JCasUtil.select(aJCas, Compound.class)) {
final Token compoundToken = JCasUtil.selectCovered(aJCas, Token.class,
compound.getBegin(), compound.getEnd()).get(0);
for (Split compoundPart : compound.getSplitsWithoutMorpheme(compoundSplitLevel)) {
token = new Token(aJCas);
token.setBegin(compoundPart.getBegin());
token.setEnd(compoundPart.getEnd());
token.setPos(compoundToken.getPos());
token.addToIndexes();
}
}
}
代码示例来源:origin: hltfbk/Excitement-Open-Platform
protected Map<String, String> indexDepTree(JCas text) {
Map<String, String> depTree = new HashMap<String, String>();
// format: key: 1 ### word ### pos; value: dep_rel ## 2 ### word ### pos
// escape: .replace("#", "\\#")
// depTree.put("1 ### The ### Det", "DET ## 2 ### dog ### N");
// depTree.put("2 ### dog ### N", "SUBJ ## 3 ### chases ### V");
// depTree.put("3 ### chases ### V", "ROOT ## 0 ### NULL ### NULL");
// depTree.put("4 ### The ### Det", "DET ## 5 ### cat ### N");
// depTree.put("5 ### cat ### N", "OBJ ## 3 ### chases ### V");
for (Dependency dep : JCasUtil.select(text, Dependency.class)) {
Token child = dep.getDependent();
Token parent = dep.getGovernor();
depTree.put(child.getBegin() + " ### "
+ child.getCoveredText().replace("#", "\\#") + " ### "
+ child.getPos().getPosValue(), dep.getDependencyType()
+ " ## " + parent.getBegin() + " ### "
+ parent.getCoveredText().replace("#", "\\#") + " ### "
+ parent.getPos().getPosValue());
}
return depTree;
}
代码示例来源:origin: dkpro/dkpro-tc
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException
{
if (embedding == null) {
return;
}
Collection<Token> select = JCasUtil.select(aJCas, Token.class);
for (Token t : select) {
if (vocab.contains(t.getCoveredText())) {
continue;
}
POS pos = t.getPos();
if (pos != null) {
pos.removeFromIndexes();
t.setPos(null);
}
t.removeFromIndexes();
droppedVocabulary++;
}
}
代码示例来源:origin: hltfbk/Excitement-Open-Platform
protected Map<String, String> indexLemmaDepTree(JCas text) {
Map<String, String> depTree = new HashMap<String, String>();
for (Dependency dep : JCasUtil.select(text, Dependency.class)) {
Token child = dep.getDependent();
Token parent = dep.getGovernor();
depTree.put(child.getBegin() + " ### "
+ child.getLemma().getValue().replace("#", "\\#") + " ### "
+ child.getPos().getPosValue(), dep.getDependencyType()
+ " ## " + parent.getBegin() + " ### "
+ parent.getLemma().getValue().replace("#", "\\#")
+ " ### " + parent.getPos().getPosValue());
}
return depTree;
}
}
代码示例来源:origin: hltfbk/Excitement-Open-Platform
@Override
public String getTokenBaseForm(Token token) {
return token.getCoveredText();
}
代码示例来源:origin: hltfbk/Excitement-Open-Platform
Token tokenAnnot = new Token(jcas1);
tokenAnnot.setBegin(begin);
tokenAnnot.setEnd(end);
tokenAnnot.addToIndexes();
Lemma lemmaAnnot = new Lemma(jcas1);
lemmaAnnot.setBegin(begin);
tokenAnnot.setLemma(lemmaAnnot);
内容来源于网络,如有侵权,请联系作者删除!