de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token类的使用及代码示例

x33g5p2x  于2022-01-29 转载在 其他  
字(10.8k)|赞(0)|评价(0)|浏览(165)

本文整理了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token类的一些代码示例,展示了Token类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Token类的具体详情如下:
包路径:de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token
类名称:Token

Token介绍

[英]Token is one of the two types commonly produced by a segmenter (the other being Sentence). A Token usually represents a word, although it may be used to represent multiple tightly connected words (e.g. "New York") or parts of a word (e.g. the possessive "'s"). One may choose to split compound words into multiple tokens, e.g. ("CamelCase" -> "Camel", "Case"; "Zauberstab" -> "Zauber", "stab"). Most processing components operate on Tokens, usually within the limits of the surrounding Sentence. E.g. a part-of-speech tagger analyses each Token in a Sentence and assigns a part-of-speech to each Token.
[中]标记是分词器通常产生的两种类型之一(另一种是句子)。一个标记通常代表一个单词,尽管它可以用来表示多个紧密连接的单词(例如“New York”)或一个单词的一部分(例如所有格“'s”)。可以选择将复合词拆分为多个标记,例如(“CamelCase”->“Camel”,“Case”;“Zauberstab”->“Zauber”,“stab”)。大多数处理组件对标记进行操作,通常在周围句子的限制范围内。词性标注者分析句子中的每个标记,并为每个标记分配一个词性。

代码示例

代码示例来源:origin: de.unidue.ltl.flextag/flextag-core

private void annotateTaggingResultsLinkToTokens(JCas aJCas)
{
  List<Token> tokens = getTokens(aJCas);
  List<TextClassificationOutcome> outcomes = getPredictions(aJCas);
  for (int i = 0; i < tokens.size(); i++) {
    Token token = tokens.get(i);
    String outcome = outcomes.get(i).getOutcome();
    POS p = createPartOfSpeechAnnotationFromOutcome(aJCas, token.getBegin(),
        token.getEnd(), outcome);
    token.setPos(p);
  }
}

代码示例来源:origin: hltfbk/Excitement-Open-Platform

private static String outToken(Token t) {
  return String.format("%s[%s:%s]", t.getCoveredText(), t.getBegin(), t.getEnd());
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.stanfordnlp-gpl

public static CoreLabel tokenToWord(Token aToken)
{
  CoreLabel t = new CoreLabel();
  
  t.setOriginalText(aToken.getCoveredText());
  t.setWord(aToken.getText());
  t.setBeginPosition(aToken.getBegin());
  t.setEndPosition(aToken.getEnd());
  
  if (aToken.getLemma() != null) {
    t.setLemma(aToken.getLemma().getValue());
  }
  else {
    t.setLemma(aToken.getText());
  }
  
  if (aToken.getPos() != null) {
    t.setTag(aToken.getPos().getPosValue());
  }
  
  return t;
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl

/**
 * @return the token text taking into account a {@link TokenForm} annotation that might exist.
 */
public String getText()
{
  String form = getFormValue();
  return form != null ? form : getCoveredText();
}

代码示例来源:origin: dkpro/dkpro-tc

protected void setToken(JCas aJCas, int begin, int end)
{
  Token token = new Token(aJCas, begin, end);
  token.addToIndexes();
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.opennlp-asl

/**
 * Given a list of tokens (e.g. those from a sentence) return the one at the specified position.
 */
private Token getToken(List<Token> aTokens, int aBegin, int aEnd)
{
  for (Token t : aTokens) {
    if (aBegin == t.getBegin() && aEnd == t.getEnd()) {
      return t;
    }
  }
  throw new IllegalStateException("Token not found");
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.penntree-asl

private void annotateTokenWithTag(JCas aJCas, String aToken, String aTag, int aCurrPosInText)
  {
    if (readToken) {
      // Token
      Token token = new Token(aJCas, aCurrPosInText, aToken.length() + aCurrPosInText);
      token.addToIndexes();

      if (readPOS) {
        // Tag
        Type posTag = posMappingProvider.getTagType(aTag);
        POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(),
            token.getEnd());
        pos.setPosValue(aTag);
        POSUtils.assignCoarseValue(pos);
        pos.addToIndexes();

        // Set the POS for the Token
        token.setPos(pos);
      }
    }
  }
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.opennlp-asl

toks[i] = t.getText();
tags[i] = t.getPosValue();
i++;
Lemma lemmaAnno = new Lemma(aJCas, t.getBegin(), t.getEnd());
lemmaAnno.setValue(lemmas[n]);
lemmaAnno.addToIndexes();
t.setLemma(lemmaAnno);
n++;

代码示例来源:origin: de.tudarmstadt.ukp.clarin.webanno/webanno-io-tcf

private void convertLemma(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
  if (aCorpusData.getLemmasLayer() == null) {
    return;
  }
  for (int i = 0; i < aCorpusData.getLemmasLayer().size(); i++) {
    eu.clarin.weblicht.wlfxb.tc.api.Token[] lemmaTokens = aCorpusData.getLemmasLayer()
        .getTokens(aCorpusData.getLemmasLayer().getLemma(i));
    String value = aCorpusData.getLemmasLayer().getLemma(i).getString();
    Lemma outLemma = new Lemma(aJCas);
    outLemma.setBegin(aTokens.get(lemmaTokens[0].getID()).getBegin());
    outLemma.setEnd(aTokens.get(lemmaTokens[0].getID()).getEnd());
    outLemma.setValue(value);
    outLemma.addToIndexes();
    // Set the lemma to the token
    aTokens.get(lemmaTokens[0].getID()).setLemma(outLemma);
  }
}

代码示例来源:origin: UKPLab/argument-reasoning-comprehension-task

private static void copyParagraphAndTokenAnnotations(JCas source, JCas target)
{
  if (!source.getDocumentText().equals(target.getDocumentText())) {
    throw new IllegalArgumentException("Source and target have different content");
  }
  for (Paragraph p : JCasUtil.select(source, Paragraph.class)) {
    Paragraph paragraph = new Paragraph(target);
    paragraph.setBegin(p.getBegin());
    paragraph.setEnd(p.getEnd());
    paragraph.addToIndexes();
  }
  for (Token t : JCasUtil.select(source, Token.class)) {
    Token token = new Token(target);
    token.setBegin(t.getBegin());
    token.setEnd(t.getEnd());
    token.addToIndexes();
  }
}

代码示例来源:origin: dkpro/dkpro-core

@Override
public boolean check(JCas aJCas, List<Message> aMessages)
{
  List<Token> withoutPOS = select(aJCas, Token.class).stream()
      .filter(t -> t.getPos() == null)
      .collect(Collectors.toList());
  
  for (Token t : withoutPOS) {
    aMessages.add(new Message(this, ERROR, String.format("Token has no POS: %s [%d..%d]", t
        .getType().getName(), t.getBegin(), t.getEnd())));
  }
  List<Token> withoutPOSValue = select(aJCas, Token.class).stream()
      .filter(t -> t.getPos() != null && t.getPos().getPosValue() == null)
      .collect(Collectors.toList());
  
  for (Token t : withoutPOSValue) {
    aMessages.add(new Message(this, ERROR, String.format(
        "Token has no POS value: %s [%d..%d]", t.getType().getName(), t.getBegin(),
        t.getEnd())));
  }
  return aMessages.stream().anyMatch(m -> m.level == ERROR);
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.treetagger-asl

@Override
  public void token(Token aToken, String aPos, String aLemma)
  {
    synchronized (cas) {
      // Add the Part of Speech
      if (writePos && aPos != null) {
        Type posTag = posMappingProvider.getTagType(aPos);
        POS posAnno = (POS) cas.createAnnotation(posTag, aToken.getBegin(),
            aToken.getEnd());
        posAnno.setPosValue(aPos.intern());
        POSUtils.assignCoarseValue(posAnno);
        aToken.setPos(posAnno);
        pos[count.get()] = posAnno;
      }
      // Add the lemma
      if (writeLemma && aLemma != null) {
        Lemma lemmaAnno = new Lemma(aJCas, aToken.getBegin(), aToken.getEnd());
        lemmaAnno.setValue(aLemma.intern());
        aToken.setLemma(lemmaAnno);
        lemma[count.get()] = lemmaAnno;
      }
      count.getAndIncrement();
    }
  }
});

代码示例来源:origin: webanno/webanno

Lemma l1 = new Lemma(jcas, t1.getBegin(), t1.getEnd());
l1.setValue("lemma1");
l1.addToIndexes();
t1.setLemma(l1);
MorphologicalFeatures m1 = new MorphologicalFeatures(jcas, t1.getBegin(), t1.getEnd());
m1.setValue("morph");
m1.setTense("tense1");
m1.addToIndexes();
t1.setMorph(m1);
POS p1 = new POS(jcas, t1.getBegin(), t1.getEnd());
p1.setPosValue("pos1");
p1.addToIndexes();
t1.setPos(p1);
Stem s1 = new Stem(jcas, t1.getBegin(), t1.getEnd());
s1.setValue("stem1");
s1.addToIndexes();
t1.setStem(s1);

代码示例来源:origin: dkpro/dkpro-similarity

public List<String> getSubstitutions(JCas jcas)
{
  List<String> tokens = new ArrayList<String>();
  List<String> postags = new ArrayList<String>();;
  
  for (Token t : JCasUtil.select(jcas, Token.class))
  {
    try
    {
      tokens.add(t.getLemma().getValue().toLowerCase());
      postags.add(t.getPos().getPosValue());
    }
    catch (NullPointerException e) {
      System.err.println("Couldn't read lemma value for token \"" + t.getCoveredText() + "\"");
    }
  }
  
  return getSubstitutions(tokens, postags);
}

代码示例来源:origin: de.tudarmstadt.ukp.dkpro.keyphrases/de.tudarmstadt.ukp.dkpro.keyphrases.decompounding-asl

@Override
public void process(final JCas aJCas)
  throws AnalysisEngineProcessException
{
  Token token;
  for (Compound compound : JCasUtil.select(aJCas, Compound.class)) {
    final Token compoundToken = JCasUtil.selectCovered(aJCas, Token.class,
        compound.getBegin(), compound.getEnd()).get(0);
    for (Split compoundPart : compound.getSplitsWithoutMorpheme(compoundSplitLevel)) {
      token = new Token(aJCas);
      token.setBegin(compoundPart.getBegin());
      token.setEnd(compoundPart.getEnd());
      token.setPos(compoundToken.getPos());
      token.addToIndexes();
    }
  }
}

代码示例来源:origin: hltfbk/Excitement-Open-Platform

protected Map<String, String> indexDepTree(JCas text) {
  Map<String, String> depTree = new HashMap<String, String>();
  // format: key: 1 ### word ### pos; value: dep_rel ## 2 ### word ### pos
  // escape: .replace("#", "\\#")
  // depTree.put("1 ### The ### Det", "DET ## 2 ### dog ### N");
  // depTree.put("2 ### dog ### N", "SUBJ ## 3 ### chases ### V");
  // depTree.put("3 ### chases ### V", "ROOT ## 0 ### NULL ### NULL");
  // depTree.put("4 ### The ### Det", "DET ## 5 ### cat ### N");
  // depTree.put("5 ### cat ### N", "OBJ ## 3 ### chases ### V");
  for (Dependency dep : JCasUtil.select(text, Dependency.class)) {
    Token child = dep.getDependent();
    Token parent = dep.getGovernor();
    depTree.put(child.getBegin() + " ### "
        + child.getCoveredText().replace("#", "\\#") + " ### "
        + child.getPos().getPosValue(), dep.getDependencyType()
        + " ## " + parent.getBegin() + " ### "
        + parent.getCoveredText().replace("#", "\\#") + " ### "
        + parent.getPos().getPosValue());
  }
  return depTree;
}

代码示例来源:origin: dkpro/dkpro-tc

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException
{
  if (embedding == null) {
    return;
  }
  Collection<Token> select = JCasUtil.select(aJCas, Token.class);
  for (Token t : select) {
    if (vocab.contains(t.getCoveredText())) {
      continue;
    }
    POS pos = t.getPos();
    if (pos != null) {
      pos.removeFromIndexes();
      t.setPos(null);
    }
    t.removeFromIndexes();
    droppedVocabulary++;
  }
}

代码示例来源:origin: hltfbk/Excitement-Open-Platform

protected Map<String, String> indexLemmaDepTree(JCas text) {
    Map<String, String> depTree = new HashMap<String, String>();

    for (Dependency dep : JCasUtil.select(text, Dependency.class)) {
      Token child = dep.getDependent();
      Token parent = dep.getGovernor();
      depTree.put(child.getBegin() + " ### "
          + child.getLemma().getValue().replace("#", "\\#") + " ### "
          + child.getPos().getPosValue(), dep.getDependencyType()
          + " ## " + parent.getBegin() + " ### "
          + parent.getLemma().getValue().replace("#", "\\#")
          + " ### " + parent.getPos().getPosValue());
    }

    return depTree;
  }
}

代码示例来源:origin: hltfbk/Excitement-Open-Platform

@Override
public String getTokenBaseForm(Token token) {
  
  return token.getCoveredText();
  
}

代码示例来源:origin: hltfbk/Excitement-Open-Platform

Token tokenAnnot = new Token(jcas1);  
tokenAnnot.setBegin(begin);  
tokenAnnot.setEnd(end); 
tokenAnnot.addToIndexes(); 
Lemma lemmaAnnot = new Lemma(jcas1); 
lemmaAnnot.setBegin(begin); 
tokenAnnot.setLemma(lemmaAnnot);

相关文章