opennlp.tools.util.Span.getCoveredText()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(10.6k)|赞(0)|评价(0)|浏览(116)

本文整理了Java中opennlp.tools.util.Span.getCoveredText()方法的一些代码示例,展示了Span.getCoveredText()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Span.getCoveredText()方法的具体详情如下:
包路径:opennlp.tools.util.Span
类名称:Span
方法名:getCoveredText

Span.getCoveredText介绍

[英]Retrieves the string covered by the current span of the specified text.
[中]检索指定文本的当前范围所覆盖的字符串。

代码示例

代码示例来源:origin: apache/opennlp

/**
 * Converts an array of {@link Span}s to an array of {@link String}s.
 *
 * @param spans
 * @param s
 * @return the strings
 */
public static String[] spansToStrings(Span[] spans, CharSequence s) {
 String[] tokens = new String[spans.length];
 for (int si = 0, sl = spans.length; si < sl; si++) {
  tokens[si] = spans[si].getCoveredText(s).toString();
 }
 return tokens;
}

代码示例来源:origin: apache/opennlp

@Override
public String toString() {
 StringBuilder documentBuilder = new StringBuilder();
 for (Span sentSpan : sentences) {
  documentBuilder.append(sentSpan.getCoveredText(document).toString()
    .replace("\r", "<CR>").replace("\n", "<LF>"));
  documentBuilder.append("\n");
 }
 return documentBuilder.toString();
}

代码示例来源:origin: apache/opennlp

@Override
 BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
  return new RelationAnnotation(tokens[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString(),
    tokens[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString(),
    parseArg(tokens[ARG1_OFFSET].getCoveredText(line).toString()),
    parseArg(tokens[ARG2_OFFSET].getCoveredText(line).toString()));
 }
}

代码示例来源:origin: apache/opennlp

@Override
 BratAnnotation parse(Span[] values, CharSequence line) throws IOException {
  if (values.length == 3 || values.length == 4) {
   String value = null;
   if (values.length == 4) {
    value = values[VALUE_OFFSET].getCoveredText(line).toString();
   }
   return new AttributeAnnotation(values[ID_OFFSET].getCoveredText(line).toString(),
     values[TYPE_OFFSET].getCoveredText(line).toString(),
     values[ATTACHED_TO_OFFSET].getCoveredText(line).toString(), value);
  }
  else {
   throw new InvalidFormatException("Line must have 3 or 4 fields");
  }
 }
}

代码示例来源:origin: apache/opennlp

/**
 * Auxiliary method to print span errors
 *
 * @param falsePositives
 *          false positives span
 * @param falseNegatives
 *          false negative span
 * @param doc
 *          the document text
 */
private void printErrors(List<Span> falsePositives,
  List<Span> falseNegatives, String doc) {
 printStream.println("False positives: {");
 for (Span span : falsePositives) {
  printStream.println(span.getCoveredText(doc));
 }
 printStream.println("} False negatives: {");
 for (Span span : falseNegatives) {
  printStream.println(span.getCoveredText(doc));
 }
 printStream.println("}\n");
}

代码示例来源:origin: apache/opennlp

@Override
 BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
  
  Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(), 
    tokens[tokens.length - 1].getEnd() );      
  return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(), 
    tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(), 
    noteSpan.getCoveredText(line).toString());
 }
}

代码示例来源:origin: apache/opennlp

@Override
 BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
  String[] typeParts = tokens[TYPE_OFFSET].getCoveredText(line).toString().split(":");
  if (typeParts.length != 2) {
   throw new InvalidFormatException(String.format(
     "Failed to parse [%s], type part must be in the format type:trigger", line));
  }
  String type = typeParts[0];
  String eventTrigger = typeParts[1];
  Map<String, String> arguments = new HashMap<>();
  for (int i = TYPE_OFFSET + 1; i < tokens.length; i++) {
   String[] parts = tokens[i].getCoveredText(line).toString().split(":");
   if (parts.length != 2) {
    throw new InvalidFormatException(String.format(
      "Failed to parse [%s], argument parts must be in form argument:value", line));
   }
   arguments.put(parts[0], parts[1]);
  }
  return new EventAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(),type, eventTrigger,
    arguments);
 }
}

代码示例来源:origin: apache/opennlp

/**
 * Detect sentences in a String.
 *
 * @param s  The string to be processed.
 *
 * @return   A string array containing individual sentences as elements.
 */
public String[] sentDetect(String s) {
 Span[] spans = sentPosDetect(s);
 String[] sentences;
 if (spans.length != 0) {
  sentences = new String[spans.length];
  for (int si = 0; si < spans.length; si++) {
   sentences[si] = spans[si].getCoveredText(s).toString();
  }
 }
 else {
  sentences = new String[] {};
 }
 return sentences;
}

代码示例来源:origin: apache/opennlp

@Override
public String toString() {
 StringBuilder sentence = new StringBuilder();
 int lastEndIndex = -1;
 for (Span token : tokenSpans) {
  if (lastEndIndex != -1) {
   // If there are no chars between last token
   // and this token insert the separator chars
   // otherwise insert a space
   String separator;
   if (lastEndIndex == token.getStart())
    separator = separatorChars;
   else
    separator = " ";
   sentence.append(separator);
  }
  sentence.append(token.getCoveredText(text));
  lastEndIndex = token.getEnd();
 }
 return sentence.toString();
}

代码示例来源:origin: apache/opennlp

public String read() throws IOException {
  TokenSample tokenSample = samples.read();

  if (tokenSample != null) {
   StringBuilder whitespaceSeparatedTokenString = new StringBuilder();

   for (Span token : tokenSample.getTokenSpans()) {
    whitespaceSeparatedTokenString.append(
      token.getCoveredText(tokenSample.getText()));
    whitespaceSeparatedTokenString.append(' ');
   }

   // Shorten string by one to get rid of last space
   if (whitespaceSeparatedTokenString.length() > 0) {
    whitespaceSeparatedTokenString.setLength(
      whitespaceSeparatedTokenString.length() - 1 );
   }

   return whitespaceSeparatedTokenString.toString();
  }

  return null;
 }
}

代码示例来源:origin: apache/opennlp

String type = values[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString();
int beginIndex = parseInt(values[BEGIN_OFFSET].getCoveredText(line).toString());
 if (values[i].getCoveredText(line).toString().contains(";")) {
  String[] parts = values[i].getCoveredText(line).toString().split(";");
  endOffset = parseInt(parts[0]);
  fragments.add(new Span(beginIndex, endOffset, type));
  endOffset = parseInt(values[i].getCoveredText(line).toString());
  firstTextTokenIndex = i + 1;
  fragments.add(new Span(beginIndex, endOffset, type));
String id = values[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString();

代码示例来源:origin: apache/opennlp

String whitespaceToken = whiteSpaceTokenSpan.getCoveredText(sampleString).toString();

代码示例来源:origin: apache/opennlp

@Override
 protected Iterator<Event> createEvents(SentenceSample sample) {

  Collection<Event> events = new ArrayList<>();

  for (Span sentenceSpan : sample.getSentences()) {
   String sentenceString = sentenceSpan.getCoveredText(sample.getDocument()).toString();

   for (Iterator<Integer> it = scanner.getPositions(
     sentenceString).iterator(); it.hasNext();) {

    int candidate = it.next();
    String type = SentenceDetectorME.NO_SPLIT;
    if (!it.hasNext()) {
     type = SentenceDetectorME.SPLIT;
    }

    events.add(new Event(type, cg.getContext(sample.getDocument(),
      sentenceSpan.getStart() + candidate)));
   }
  }

  return events.iterator();
 }
}

代码示例来源:origin: apache/opennlp

@Override
protected final Parse processSample(final Parse reference) {
 List<String> tokens = new ArrayList<>();
 for (Parse token : reference.getTokenNodes()) {
  tokens.add(token.getSpan().getCoveredText(reference.getText()).toString());
 }
 Parse[] predictions = ParserTool.parseLine(String.join(" ", tokens), parser, 1);
 Parse prediction = null;
 if (predictions.length > 0) {
  prediction = predictions[0];
  fmeasure.updateScores(getConstituencySpans(reference), getConstituencySpans(prediction));
 }
 return prediction;
}

代码示例来源:origin: apache/opennlp

String tokenString = tokens[i].getCoveredText(sentence).toString();
String escapedToken = escape(tokenString);
tokenList[i] = escapedToken;

代码示例来源:origin: apache/opennlp

@Test
public void testTrim() {
 String string1 = "  12 34  ";
 Span span1 = new Span(0, string1.length());
 Assert.assertEquals("12 34", span1.trim(string1).getCoveredText(string1));
}

代码示例来源:origin: apache/opennlp

@Test
public void testTrimWhitespaceSpan() {
 String string1 = "              ";
 Span span1 = new Span(0, string1.length());
 Assert.assertEquals("", span1.trim(string1).getCoveredText(string1));
}

代码示例来源:origin: apache/opennlp

/**
 * Tests if the {@link TokenSample} correctly tokenizes tokens which
 * are separated by a whitespace.
 *
 */
@Test
public void testParsingWhitespaceSeparatedTokens() throws IOException {
 String sampleTokens = "Slave to the wage";
 ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
   ObjectStreamUtils.createObjectStream(sampleTokens));
 TokenSample tokenSample = sampleTokenStream.read();
 Span[] tokenSpans = tokenSample.getTokenSpans();
 Assert.assertEquals(4, tokenSpans.length);
 Assert.assertEquals("Slave", tokenSpans[0].getCoveredText(sampleTokens));
 Assert.assertEquals("to", tokenSpans[1].getCoveredText(sampleTokens));
 Assert.assertEquals("the", tokenSpans[2].getCoveredText(sampleTokens));
 Assert.assertEquals("wage", tokenSpans[3].getCoveredText(sampleTokens));
}

代码示例来源:origin: apache/opennlp

/**
  * Tests if the {@link TokenSample} correctly tokenizes tokens which
  * are separated by whitespace and by the split chars.
  *
  */
 @Test
 public void testParsingWhitespaceAndSeparatedString() throws IOException {
  String sampleTokens = "a b<SPLIT>c d<SPLIT>e";

  try (ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
    ObjectStreamUtils.createObjectStream(sampleTokens))) {
   TokenSample tokenSample = sampleTokenStream.read();

   Span[] tokenSpans = tokenSample.getTokenSpans();

   Assert.assertEquals(5, tokenSpans.length);

   Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText()));
   Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText()));
   Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText()));
   Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText()));
   Assert.assertEquals("e", tokenSpans[4].getCoveredText(tokenSample.getText()));
  }
 }
}

代码示例来源:origin: apache/opennlp

/**
 * Tests if the {@link TokenSample} correctly tokenizes tokens which
 * are separated by the split chars.
 *
 */
@Test
public void testParsingSeparatedString() throws IOException {
 String sampleTokens = "a<SPLIT>b<SPLIT>c<SPLIT>d";
 ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream(
   ObjectStreamUtils.createObjectStream(sampleTokens));
 TokenSample tokenSample = sampleTokenStream.read();
 Span[] tokenSpans = tokenSample.getTokenSpans();
 Assert.assertEquals(4, tokenSpans.length);
 Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText()));
 Assert.assertEquals(new Span(0,1), tokenSpans[0]);
 Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText()));
 Assert.assertEquals(new Span(1,2), tokenSpans[1]);
 Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText()));
 Assert.assertEquals(new Span(2,3), tokenSpans[2]);
 Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText()));
 Assert.assertEquals(new Span(3,4), tokenSpans[3]);
}

相关文章