本文整理了Java中edu.stanford.nlp.ling.Word.word()
方法的一些代码示例,展示了Word.word()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Word.word()
方法的具体详情如下:
包路径:edu.stanford.nlp.ling.Word
类名称:Word
方法名:word
暂无
代码示例来源:origin: stanfordnlp/CoreNLP
/**
* Returns a "pretty" version of the words in this Document suitable for
* display. The default implementation returns each of the words in
* this Document separated
* by spaces. Specifically, each element that implements {@link HasWord}
* has its
* {@link HasWord#word} printed, and other elements are skipped.
*
* Subclasses that maintain additional information may which to
* override this method.
*/
public String presentableText() {
StringBuilder sb = new StringBuilder();
for (Word cur : this) {
if (sb.length() > 0) {
sb.append(' ');
}
sb.append(cur.word());
}
return (sb.toString());
}
代码示例来源:origin: stanfordnlp/CoreNLP
public static String tokensToString(Word [] tokens) {
StringBuilder sb = new StringBuilder(512);
for(int i = 0; i < tokens.length; i ++){
if(i > 0) sb.append(" ");
Word l = tokens[i];
sb.append(l.word() + "{" + l.beginPosition() + ", " + l.endPosition() + "}");
}
return sb.toString();
}
代码示例来源:origin: stanfordnlp/CoreNLP
/**
* Stems <code>w</code> and returns stemmed <code>Word</code>.
*/
public Word stem(Word w) {
return (new Word(stem(w.word())));
}
代码示例来源:origin: stanfordnlp/CoreNLP
boolean justInsertedNewline = false; // to prevent contiguous newlines
for (Word w : in) {
String ws = w.word();
if (ws.startsWith("<") && ws.endsWith(">")) {
if (markLineBreaks && !justInsertedNewline) {
代码示例来源:origin: stanfordnlp/CoreNLP
/** Return the tokens using PTB tokenizer.
*
* @param str String to tokenize
* @return List of tokens
*/
private String[] ptbTokenize(String str) {
// todo [cdm 2017]: Someday should generalize this to allow use of other tokenizers
if (ptbFactory==null) {
ptbFactory = PTBTokenizer.factory();
}
Tokenizer<Word> tokenizer = ptbFactory.getTokenizer(new StringReader(str));
List<Word> words = tokenizer.tokenize();
String[] res = new String[words.size()];
for (int i = 0, sz = words.size(); i < sz; i++) {
res[i] = words.get(i).word();
}
return res;
}
代码示例来源:origin: stanfordnlp/CoreNLP
Matcher hasArabic = utf8ArabicChart.matcher(token.word());
if(hasArabic.find()) {
token.setWord(escaper.apply(token.word()));
token.setWord(lexMapper.map(null, token.word()));
代码示例来源:origin: stanfordnlp/CoreNLP
/**
* Test program for demonstrating the Stemmer. It reads text from a
* a list of files, stems each word, and writes the result to standard
* output. Note that the word stemmed is expected to be in lower case:
* forcing lower case must be done outside the Stemmer class.
* Usage: Stemmer file-name file-name ...
*/
public static void main(String[] args) throws IOException {
Stemmer s = new Stemmer();
if (args[0].equals("-file")) {
Iterator<Word> it = PTBTokenizer.newPTBTokenizer(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
while (it.hasNext()) {
Word token = it.next();
System.out.print(s.stem(token.word()));
System.out.print(' ');
}
} else {
for (String arg : args) {
System.out.print(s.stem(arg));
System.out.print(' ');
}
}
System.out.println();
}
代码示例来源:origin: stanfordnlp/CoreNLP
int numAdded = 0;
while (tok.hasNext()) {
String s = tok.next().word();
代码示例来源:origin: stanfordnlp/CoreNLP
DFSAState<Word, Integer> fromState = tr.getSource();
Word word = tr.getInput();
if (!word.word().equals(" "))
segmentedWords.add(0, word);
i = fromState.stateID();
代码示例来源:origin: stanfordnlp/CoreNLP
for (; ;) {
Word word = (Word) sentIter.next();
pw.print(word.word());
if (sentIter.hasNext()) {
pw.print(" ");
代码示例来源:origin: stackoverflow.com
import edu.stanford.nlp.ling.Word;
List<Word> words = ...
for (Word word : words) {
if (word.word().equals(args(1))) {
System.err.println("Yes!");
}
}
代码示例来源:origin: pilehvar/ADW
public List<String> tokenizeString(String string)
{
final List<String> tokens = new ArrayList<String>();
for (Word w : tokenize(string))
{
tokens.add(w.word());
}
return tokens;
}
代码示例来源:origin: edu.stanford.nlp/stanford-corenlp
/**
* Returns a "pretty" version of the words in this Document suitable for
* display. The default implementation returns each of the words in
* this Document separated
* by spaces. Specifically, each element that implements {@link HasWord}
* has its
* {@link HasWord#word} printed, and other elements are skipped.
*
* Subclasses that maintain additional information may which to
* override this method.
*/
public String presentableText() {
StringBuilder sb = new StringBuilder();
for (Word cur : this) {
if (sb.length() > 0) {
sb.append(' ');
}
sb.append(cur.word());
}
return (sb.toString());
}
代码示例来源:origin: edu.stanford.nlp/corenlp
/**
* Returns a "pretty" version of the words in this Document suitable for
* display. The default implementation returns each of the words in
* this Document separated
* by spaces. Specifically, each element that implements {@link HasWord}
* has its
* {@link HasWord#word} printed, and other elements are skipped.
* <p/>
* <p>Subclasses that maintain additional information may which to
* override this method.</p>
*/
public String presentableText() {
StringBuilder sb = new StringBuilder();
for (Word cur : this) {
if (sb.length() > 0) {
sb.append(' ');
}
sb.append(cur.word());
}
return (sb.toString());
}
代码示例来源:origin: com.googlecode.mate-tools/srl
@Override
public String[] tokenize(String sentence) {
Reader r=new StringReader(sentence);
PTBTokenizer<Word> tokenizer=PTBTokenizer.newPTBTokenizer(r);
List<String> l=new ArrayList<String>();
while(tokenizer.hasNext())
l.add(tokenizer.next().word());
String[] tok=new String[l.size()+1];
tok[0]=is2.io.CONLLReader09.ROOT;
int i=1;
for(String s:l)
tok[i++]=s;
return tok;
}
代码示例来源:origin: microth/PathLSTM
@Override
public String[] tokenize(String sentence) {
Reader r = new StringReader(sentence);
PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(r);
List<String> l = new ArrayList<>();
while (tokenizer.hasNext()) {
Word w = tokenizer.next();
l.add(w.word());
}
String[] tok = new String[l.size() + 1];
tok[0] = is2.io.CONLLReader09.ROOT;
int i = 1;
for (String s : l)
tok[i++] = s;
return tok;
}
代码示例来源:origin: edu.stanford.nlp/stanford-corenlp
/** Return the tokens using PTB tokenizer.
*
* @param str String to tokenize
* @return List of tokens
*/
private String[] ptbTokenize(String str) {
// todo [cdm 2017]: Someday should generalize this to allow use of other tokenizers
if (ptbFactory==null) {
ptbFactory = PTBTokenizer.factory();
}
Tokenizer<Word> tokenizer = ptbFactory.getTokenizer(new StringReader(str));
List<Word> words = tokenizer.tokenize();
String[] res = new String[words.size()];
for (int i = 0, sz = words.size(); i < sz; i++) {
res[i] = words.get(i).word();
}
return res;
}
代码示例来源:origin: edu.stanford.nlp/corenlp
@Override
protected Word getNext() {
while (wordIter == null || ! wordIter.hasNext()) {
if ( ! tok.hasNext()) {
return null;
}
String s = tok.next().word();
if (s == null) {
return null;
}
ArrayList<Word> se = segmentWords(s);
wordIter = se.iterator();
}
return wordIter.next();
}
代码示例来源:origin: microth/PathLSTM
@Override
public StringInText[] tokenizeplus(String sentence) {
Reader r = new StringReader(sentence);
List<StringInText> l = new ArrayList<>();
for (String s : tokenize(sentence)) {
Word w = new Word(s);
l.add(new StringInText(w.word(), w.beginPosition() + startpos, w
.endPosition() + startpos));
}
StringInText[] tok = new StringInText[l.size()];
// tok[0]=new StringInText(is2.io.CONLLReader09.ROOT,0,0);
int i = 0;
for (StringInText s : l)
tok[i++] = s;
startpos += (1 + sentence.length());
return tok;
}
}
代码示例来源:origin: microth/PathLSTM
public StringInText[] tokenizeplus(String sentence) {
Reader r = new StringReader(sentence);
PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(r);
List<StringInText> l = new ArrayList<>();
while (tokenizer.hasNext()) {
Word w = tokenizer.next();
l.add(new StringInText(w.word(), w.beginPosition() + startpos, w
.endPosition() + startpos));
}
StringInText[] tok = new StringInText[l.size() + 1];
tok[0] = new StringInText(is2.io.CONLLReader09.ROOT, 0, 0);
int i = 1;
for (StringInText s : l)
tok[i++] = s;
startpos += (1 + sentence.length());
return tok;
}
内容来源于网络,如有侵权,请联系作者删除!