本文整理了Java中opennlp.tools.util.Span.getCoveredText()
方法的一些代码示例,展示了Span.getCoveredText()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Span.getCoveredText()
方法的具体详情如下:
包路径:opennlp.tools.util.Span
类名称:Span
方法名:getCoveredText
[英]Retrieves the string covered by the current span of the specified text.
[中]检索指定文本的当前范围所覆盖的字符串。
代码示例来源:origin: apache/opennlp
/**
* Converts an array of {@link Span}s to an array of {@link String}s.
*
* @param spans
* @param s
* @return the strings
*/
public static String[] spansToStrings(Span[] spans, CharSequence s) {
String[] tokens = new String[spans.length];
for (int si = 0, sl = spans.length; si < sl; si++) {
tokens[si] = spans[si].getCoveredText(s).toString();
}
return tokens;
}
代码示例来源:origin: apache/opennlp
@Override
public String toString() {
StringBuilder documentBuilder = new StringBuilder();
for (Span sentSpan : sentences) {
documentBuilder.append(sentSpan.getCoveredText(document).toString()
.replace("\r", "<CR>").replace("\n", "<LF>"));
documentBuilder.append("\n");
}
return documentBuilder.toString();
}
代码示例来源:origin: apache/opennlp
@Override
BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
return new RelationAnnotation(tokens[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString(),
tokens[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString(),
parseArg(tokens[ARG1_OFFSET].getCoveredText(line).toString()),
parseArg(tokens[ARG2_OFFSET].getCoveredText(line).toString()));
}
}
代码示例来源:origin: apache/opennlp
@Override
BratAnnotation parse(Span[] values, CharSequence line) throws IOException {
if (values.length == 3 || values.length == 4) {
String value = null;
if (values.length == 4) {
value = values[VALUE_OFFSET].getCoveredText(line).toString();
}
return new AttributeAnnotation(values[ID_OFFSET].getCoveredText(line).toString(),
values[TYPE_OFFSET].getCoveredText(line).toString(),
values[ATTACHED_TO_OFFSET].getCoveredText(line).toString(), value);
}
else {
throw new InvalidFormatException("Line must have 3 or 4 fields");
}
}
}
代码示例来源:origin: apache/opennlp
/**
* Auxiliary method to print span errors
*
* @param falsePositives
* false positives span
* @param falseNegatives
* false negative span
* @param doc
* the document text
*/
private void printErrors(List<Span> falsePositives,
List<Span> falseNegatives, String doc) {
printStream.println("False positives: {");
for (Span span : falsePositives) {
printStream.println(span.getCoveredText(doc));
}
printStream.println("} False negatives: {");
for (Span span : falseNegatives) {
printStream.println(span.getCoveredText(doc));
}
printStream.println("}\n");
}
代码示例来源:origin: apache/opennlp
@Override
BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(),
tokens[tokens.length - 1].getEnd() );
return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(),
tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(),
noteSpan.getCoveredText(line).toString());
}
}
代码示例来源:origin: apache/opennlp
@Override
BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
String[] typeParts = tokens[TYPE_OFFSET].getCoveredText(line).toString().split(":");
if (typeParts.length != 2) {
throw new InvalidFormatException(String.format(
"Failed to parse [%s], type part must be in the format type:trigger", line));
}
String type = typeParts[0];
String eventTrigger = typeParts[1];
Map<String, String> arguments = new HashMap<>();
for (int i = TYPE_OFFSET + 1; i < tokens.length; i++) {
String[] parts = tokens[i].getCoveredText(line).toString().split(":");
if (parts.length != 2) {
throw new InvalidFormatException(String.format(
"Failed to parse [%s], argument parts must be in form argument:value", line));
}
arguments.put(parts[0], parts[1]);
}
return new EventAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(),type, eventTrigger,
arguments);
}
}
代码示例来源:origin: apache/opennlp
/**
* Detect sentences in a String.
*
* @param s The string to be processed.
*
* @return A string array containing individual sentences as elements.
*/
public String[] sentDetect(String s) {
Span[] spans = sentPosDetect(s);
String[] sentences;
if (spans.length != 0) {
sentences = new String[spans.length];
for (int si = 0; si < spans.length; si++) {
sentences[si] = spans[si].getCoveredText(s).toString();
}
}
else {
sentences = new String[] {};
}
return sentences;
}
代码示例来源:origin: apache/opennlp
@Override
public String toString() {
StringBuilder sentence = new StringBuilder();
int lastEndIndex = -1;
for (Span token : tokenSpans) {
if (lastEndIndex != -1) {
// If there are no chars between last token
// and this token insert the separator chars
// otherwise insert a space
String separator;
if (lastEndIndex == token.getStart())
separator = separatorChars;
else
separator = " ";
sentence.append(separator);
}
sentence.append(token.getCoveredText(text));
lastEndIndex = token.getEnd();
}
return sentence.toString();
}
代码示例来源:origin: apache/opennlp
public String read() throws IOException {
TokenSample tokenSample = samples.read();
if (tokenSample != null) {
StringBuilder whitespaceSeparatedTokenString = new StringBuilder();
for (Span token : tokenSample.getTokenSpans()) {
whitespaceSeparatedTokenString.append(
token.getCoveredText(tokenSample.getText()));
whitespaceSeparatedTokenString.append(' ');
}
// Shorten string by one to get rid of last space
if (whitespaceSeparatedTokenString.length() > 0) {
whitespaceSeparatedTokenString.setLength(
whitespaceSeparatedTokenString.length() - 1 );
}
return whitespaceSeparatedTokenString.toString();
}
return null;
}
}
代码示例来源:origin: apache/opennlp
String type = values[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString();
int beginIndex = parseInt(values[BEGIN_OFFSET].getCoveredText(line).toString());
if (values[i].getCoveredText(line).toString().contains(";")) {
String[] parts = values[i].getCoveredText(line).toString().split(";");
endOffset = parseInt(parts[0]);
fragments.add(new Span(beginIndex, endOffset, type));
endOffset = parseInt(values[i].getCoveredText(line).toString());
firstTextTokenIndex = i + 1;
fragments.add(new Span(beginIndex, endOffset, type));
String id = values[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString();
代码示例来源:origin: apache/opennlp
String whitespaceToken = whiteSpaceTokenSpan.getCoveredText(sampleString).toString();
代码示例来源:origin: apache/opennlp
@Override
protected Iterator<Event> createEvents(SentenceSample sample) {
Collection<Event> events = new ArrayList<>();
for (Span sentenceSpan : sample.getSentences()) {
String sentenceString = sentenceSpan.getCoveredText(sample.getDocument()).toString();
for (Iterator<Integer> it = scanner.getPositions(
sentenceString).iterator(); it.hasNext();) {
int candidate = it.next();
String type = SentenceDetectorME.NO_SPLIT;
if (!it.hasNext()) {
type = SentenceDetectorME.SPLIT;
}
events.add(new Event(type, cg.getContext(sample.getDocument(),
sentenceSpan.getStart() + candidate)));
}
}
return events.iterator();
}
}
代码示例来源:origin: apache/opennlp
@Override
protected final Parse processSample(final Parse reference) {
List<String> tokens = new ArrayList<>();
for (Parse token : reference.getTokenNodes()) {
tokens.add(token.getSpan().getCoveredText(reference.getText()).toString());
}
Parse[] predictions = ParserTool.parseLine(String.join(" ", tokens), parser, 1);
Parse prediction = null;
if (predictions.length > 0) {
prediction = predictions[0];
fmeasure.updateScores(getConstituencySpans(reference), getConstituencySpans(prediction));
}
return prediction;
}
代码示例来源:origin: apache/opennlp
String tokenString = tokens[i].getCoveredText(sentence).toString();
String escapedToken = escape(tokenString);
tokenList[i] = escapedToken;
代码示例来源:origin: apache/opennlp
@Test
public void testTrim() {
String string1 = " 12 34 ";
Span span1 = new Span(0, string1.length());
Assert.assertEquals("12 34", span1.trim(string1).getCoveredText(string1));
}
代码示例来源:origin: apache/opennlp
@Test
public void testTrimWhitespaceSpan() {
String string1 = " ";
Span span1 = new Span(0, string1.length());
Assert.assertEquals("", span1.trim(string1).getCoveredText(string1));
}
代码示例来源:origin: apache/opennlp
/**
* Tests if the {@link TokenSample} correctly tokenizes tokens which
* are separated by a whitespace.
*
*/
@Test
public void testParsingWhitespaceSeparatedTokens() throws IOException {
String sampleTokens = "Slave to the wage";
ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
ObjectStreamUtils.createObjectStream(sampleTokens));
TokenSample tokenSample = sampleTokenStream.read();
Span[] tokenSpans = tokenSample.getTokenSpans();
Assert.assertEquals(4, tokenSpans.length);
Assert.assertEquals("Slave", tokenSpans[0].getCoveredText(sampleTokens));
Assert.assertEquals("to", tokenSpans[1].getCoveredText(sampleTokens));
Assert.assertEquals("the", tokenSpans[2].getCoveredText(sampleTokens));
Assert.assertEquals("wage", tokenSpans[3].getCoveredText(sampleTokens));
}
代码示例来源:origin: apache/opennlp
/**
* Tests if the {@link TokenSample} correctly tokenizes tokens which
* are separated by whitespace and by the split chars.
*
*/
@Test
public void testParsingWhitespaceAndSeparatedString() throws IOException {
String sampleTokens = "a b<SPLIT>c d<SPLIT>e";
try (ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
ObjectStreamUtils.createObjectStream(sampleTokens))) {
TokenSample tokenSample = sampleTokenStream.read();
Span[] tokenSpans = tokenSample.getTokenSpans();
Assert.assertEquals(5, tokenSpans.length);
Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText()));
Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText()));
Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText()));
Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText()));
Assert.assertEquals("e", tokenSpans[4].getCoveredText(tokenSample.getText()));
}
}
}
代码示例来源:origin: apache/opennlp
/**
* Tests if the {@link TokenSample} correctly tokenizes tokens which
* are separated by the split chars.
*
*/
@Test
public void testParsingSeparatedString() throws IOException {
String sampleTokens = "a<SPLIT>b<SPLIT>c<SPLIT>d";
ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
ObjectStreamUtils.createObjectStream(sampleTokens));
TokenSample tokenSample = sampleTokenStream.read();
Span[] tokenSpans = tokenSample.getTokenSpans();
Assert.assertEquals(4, tokenSpans.length);
Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText()));
Assert.assertEquals(new Span(0,1), tokenSpans[0]);
Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText()));
Assert.assertEquals(new Span(1,2), tokenSpans[1]);
Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText()));
Assert.assertEquals(new Span(2,3), tokenSpans[2]);
Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText()));
Assert.assertEquals(new Span(3,4), tokenSpans[3]);
}
内容来源于网络,如有侵权,请联系作者删除!