edu.stanford.nlp.ling.Word.word()方法的使用及代码示例

x33g5p2x  于2022-02-03 转载在 其他  
字(7.6k)|赞(0)|评价(0)|浏览(116)

本文整理了Java中edu.stanford.nlp.ling.Word.word()方法的一些代码示例,展示了Word.word()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Word.word()方法的具体详情如下:
包路径:edu.stanford.nlp.ling.Word
类名称:Word
方法名:word

Word.word介绍

暂无

代码示例

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * Returns a "pretty" version of the words in this Document suitable for
 * display. The default implementation returns each of the words in
 * this Document separated
 * by spaces. Specifically, each element that implements {@link HasWord}
 * has its
 * {@link HasWord#word} printed, and other elements are skipped.
 *
 * Subclasses that maintain additional information may which to
 * override this method.
 */
public String presentableText() {
 StringBuilder sb = new StringBuilder();
 for (Word cur : this) {
  if (sb.length() > 0) {
   sb.append(' ');
  }
  sb.append(cur.word());
 }
 return (sb.toString());
}

代码示例来源:origin: stanfordnlp/CoreNLP

public static String tokensToString(Word [] tokens) {
 StringBuilder  sb = new StringBuilder(512);
 for(int i = 0; i < tokens.length; i ++){
  if(i > 0) sb.append(" ");
  Word l = tokens[i];
  sb.append(l.word() + "{" + l.beginPosition() + ", " + l.endPosition() + "}");
 }
 return sb.toString();
}

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * Stems <code>w</code> and returns stemmed <code>Word</code>.
 */
public Word stem(Word w) {
 return (new Word(stem(w.word())));
}

代码示例来源:origin: stanfordnlp/CoreNLP

boolean justInsertedNewline = false; // to prevent contiguous newlines
for (Word w : in) {
 String ws = w.word();
 if (ws.startsWith("<") && ws.endsWith(">")) {
  if (markLineBreaks && !justInsertedNewline) {

代码示例来源:origin: stanfordnlp/CoreNLP

/** Return the tokens using PTB tokenizer.
 *
 *  @param str String to tokenize
 *  @return List of tokens
 */
private String[] ptbTokenize(String str) {
 // todo [cdm 2017]: Someday should generalize this to allow use of other tokenizers
 if (ptbFactory==null) {
  ptbFactory = PTBTokenizer.factory();
 }
 Tokenizer<Word> tokenizer = ptbFactory.getTokenizer(new StringReader(str));
 List<Word> words = tokenizer.tokenize();
 String[] res = new String[words.size()];
 for (int i = 0, sz = words.size(); i < sz; i++) {
  res[i] = words.get(i).word();
 }
 return res;
}

代码示例来源:origin: stanfordnlp/CoreNLP

Matcher hasArabic = utf8ArabicChart.matcher(token.word());
if(hasArabic.find()) {
 token.setWord(escaper.apply(token.word()));
 token.setWord(lexMapper.map(null, token.word()));

代码示例来源:origin: stanfordnlp/CoreNLP

/**
 * Test program for demonstrating the Stemmer.  It reads text from a
 * a list of files, stems each word, and writes the result to standard
 * output. Note that the word stemmed is expected to be in lower case:
 * forcing lower case must be done outside the Stemmer class.
 * Usage: Stemmer file-name file-name ...
 */
public static void main(String[] args) throws IOException {
 Stemmer s = new Stemmer();
 if (args[0].equals("-file")) {
  Iterator<Word> it = PTBTokenizer.newPTBTokenizer(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
  while (it.hasNext()) {
   Word token = it.next();
   System.out.print(s.stem(token.word()));
   System.out.print(' ');
  }
 } else {
  for (String arg : args) {
   System.out.print(s.stem(arg));
   System.out.print(' ');
  }
 }
 System.out.println();
}

代码示例来源:origin: stanfordnlp/CoreNLP

int numAdded = 0;
while (tok.hasNext()) {
 String s = tok.next().word();

代码示例来源:origin: stanfordnlp/CoreNLP

DFSAState<Word, Integer> fromState = tr.getSource();
Word word = tr.getInput();
if (!word.word().equals(" "))
 segmentedWords.add(0, word);
i = fromState.stateID();

代码示例来源:origin: stanfordnlp/CoreNLP

for (; ;) {
 Word word = (Word) sentIter.next();
 pw.print(word.word());
 if (sentIter.hasNext()) {
  pw.print(" ");

代码示例来源:origin: stackoverflow.com

import edu.stanford.nlp.ling.Word;

List<Word> words = ...
for (Word word : words) {
 if (word.word().equals(args(1))) {
  System.err.println("Yes!");
 }
}

代码示例来源:origin: pilehvar/ADW

public List<String> tokenizeString(String string)
{ 
  final List<String> tokens = new ArrayList<String>();
  for (Word w : tokenize(string))
  {
    tokens.add(w.word());
  }
  return tokens;
}

代码示例来源:origin: edu.stanford.nlp/stanford-corenlp

/**
 * Returns a "pretty" version of the words in this Document suitable for
 * display. The default implementation returns each of the words in
 * this Document separated
 * by spaces. Specifically, each element that implements {@link HasWord}
 * has its
 * {@link HasWord#word} printed, and other elements are skipped.
 *
 * Subclasses that maintain additional information may which to
 * override this method.
 */
public String presentableText() {
 StringBuilder sb = new StringBuilder();
 for (Word cur : this) {
  if (sb.length() > 0) {
   sb.append(' ');
  }
  sb.append(cur.word());
 }
 return (sb.toString());
}

代码示例来源:origin: edu.stanford.nlp/corenlp

/**
 * Returns a "pretty" version of the words in this Document suitable for
 * display. The default implementation returns each of the words in
 * this Document separated
 * by spaces. Specifically, each element that implements {@link HasWord}
 * has its
 * {@link HasWord#word} printed, and other elements are skipped.
 * <p/>
 * <p>Subclasses that maintain additional information may which to
 * override this method.</p>
 */
public String presentableText() {
 StringBuilder sb = new StringBuilder();
 for (Word cur : this) {
  if (sb.length() > 0) {
   sb.append(' ');
  }
  sb.append(cur.word());
 }
 return (sb.toString());
}

代码示例来源:origin: com.googlecode.mate-tools/srl

@Override
public String[] tokenize(String sentence) {
  Reader r=new StringReader(sentence);
  PTBTokenizer<Word> tokenizer=PTBTokenizer.newPTBTokenizer(r);
  List<String> l=new ArrayList<String>();
  while(tokenizer.hasNext())
    l.add(tokenizer.next().word());
  
  String[] tok=new String[l.size()+1];
  tok[0]=is2.io.CONLLReader09.ROOT;
  int i=1;
  for(String s:l)
    tok[i++]=s;
  return tok;
}

代码示例来源:origin: microth/PathLSTM

@Override
public String[] tokenize(String sentence) {
  Reader r = new StringReader(sentence);
  PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(r);
  List<String> l = new ArrayList<>();
  while (tokenizer.hasNext()) {
    Word w = tokenizer.next();
    l.add(w.word());
  }
  String[] tok = new String[l.size() + 1];
  tok[0] = is2.io.CONLLReader09.ROOT;
  int i = 1;
  for (String s : l)
    tok[i++] = s;
  return tok;
}

代码示例来源:origin: edu.stanford.nlp/stanford-corenlp

/** Return the tokens using PTB tokenizer.
 *
 *  @param str String to tokenize
 *  @return List of tokens
 */
private String[] ptbTokenize(String str) {
 // todo [cdm 2017]: Someday should generalize this to allow use of other tokenizers
 if (ptbFactory==null) {
  ptbFactory = PTBTokenizer.factory();
 }
 Tokenizer<Word> tokenizer = ptbFactory.getTokenizer(new StringReader(str));
 List<Word> words = tokenizer.tokenize();
 String[] res = new String[words.size()];
 for (int i = 0, sz = words.size(); i < sz; i++) {
  res[i] = words.get(i).word();
 }
 return res;
}

代码示例来源:origin: edu.stanford.nlp/corenlp

@Override
protected Word getNext() {
 while (wordIter == null || ! wordIter.hasNext()) {
  if ( ! tok.hasNext()) {
   return null;
  }
  String s = tok.next().word();
  if (s == null) {
   return null;
  }
  ArrayList<Word> se = segmentWords(s);
  wordIter = se.iterator();
 }
 return wordIter.next();
}

代码示例来源:origin: microth/PathLSTM

@Override
  public StringInText[] tokenizeplus(String sentence) {
    Reader r = new StringReader(sentence);
    List<StringInText> l = new ArrayList<>();
    for (String s : tokenize(sentence)) {
      Word w = new Word(s);
      l.add(new StringInText(w.word(), w.beginPosition() + startpos, w
          .endPosition() + startpos));
    }
    StringInText[] tok = new StringInText[l.size()];
    // tok[0]=new StringInText(is2.io.CONLLReader09.ROOT,0,0);
    int i = 0;
    for (StringInText s : l)
      tok[i++] = s;

    startpos += (1 + sentence.length());

    return tok;
  }
}

代码示例来源:origin: microth/PathLSTM

public StringInText[] tokenizeplus(String sentence) {
  Reader r = new StringReader(sentence);
  PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(r);
  List<StringInText> l = new ArrayList<>();
  while (tokenizer.hasNext()) {
    Word w = tokenizer.next();
    l.add(new StringInText(w.word(), w.beginPosition() + startpos, w
        .endPosition() + startpos));
  }
  StringInText[] tok = new StringInText[l.size() + 1];
  tok[0] = new StringInText(is2.io.CONLLReader09.ROOT, 0, 0);
  int i = 1;
  for (StringInText s : l)
    tok[i++] = s;
  startpos += (1 + sentence.length());
  return tok;
}

相关文章