org.apache.tika.metadata.Metadata.names()方法的使用及代码示例

x33g5p2x  于2022-01-24 转载在 其他  
字(8.3k)|赞(0)|评价(0)|浏览(102)

本文整理了Java中org.apache.tika.metadata.Metadata.names()方法的一些代码示例,展示了Metadata.names()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Metadata.names()方法的具体详情如下:
包路径:org.apache.tika.metadata.Metadata
类名称:Metadata
方法名:names

Metadata.names介绍

[英]Returns an array of the names contained in the metadata.
[中]返回元数据中包含的名称数组。

代码示例

代码示例来源:origin: apache/nifi

final Pattern metadataKeyFilter = metadataKeyFilterRef.get();
final StringBuilder dataBuilder = new StringBuilder();
for (final String key : metadata.names()) {
  if (metadataKeyFilter != null && !metadataKeyFilter.matcher(key).matches()) {
    continue;

代码示例来源:origin: apache/tika

/**
   * Override to get a custom sort order
   * or to filter names.
   * 
   * @param metadata metadata from which to grab names
   * @return list of names in the order in which they should be serialized
   */
  protected String[] getNames(Metadata metadata) {
    String[] names = metadata.names();
    Arrays.sort(names);
    return names;
  }
}

代码示例来源:origin: apache/tika

@Override
  public String[] getNames(Metadata m) {
    String[] names = m.names();
    Arrays.sort(names, new PrettyMetadataKeyComparator());
    return names;
  }
}

代码示例来源:origin: apache/tika

int countMetadataValues(Metadata m) {
  if (m == null) {
    return 0;
  }
  int i = 0;
  for (String n : m.names()) {
    i += m.getValues(n).length;
  }
  return i;
}

代码示例来源:origin: apache/tika

@Override
public void endDocument() {
  String[] names = metadata.names();
  Arrays.sort(names);
  outputMetadata(names);
  writer.flush();
  this.metOutput = true;
}

代码示例来源:origin: apache/tika

@Override
  @SuppressWarnings("resource")
  public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations,
            MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException,
      WebApplicationException {

    if (metadata.names().length != 1) {
      throw new WebApplicationException("Metadata object must only have one entry!");
    }
    Writer writer = new OutputStreamWriter(entityStream, UTF_8);

    for (String name : metadata.names()) {
      writer.write(metadata.get(name));
    }

    // Don't close, just flush the stream
    writer.flush();
  }
}

代码示例来源:origin: apache/tika

public String toString() {
  StringBuffer buf = new StringBuffer();
  String[] names = names();
  for (int i = 0; i < names.length; i++) {
    String[] values = _getValues(names[i]);
    for (int j = 0; j < values.length; j++) {
      if (buf.length() > 0) {
        buf.append(" ");
      }
      buf.append(names[i]).append("=").append(values[j]);
    }
  }
  return buf.toString();
}

代码示例来源:origin: apache/tika

private int findMatchingDigests(Metadata metadata, List<Metadata> metadataListB) {
  Set<String> digestKeys = new HashSet<>();
  for (String n : metadata.names()) {
    if (n.startsWith(DIGEST_KEY_PREFIX)) {
      String digestA = metadata.get(n);
      for (int i = 0; i < metadataListB.size(); i++) {
        String digestB = metadataListB.get(i).get(n);
        if (digestA != null && digestA.equals(digestB)) {
          return i;
        }
      }
    }
  }
  return -1;
}

代码示例来源:origin: apache/tika

public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException {
  CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, UTF_8));
  for (String name : metadata.names()) {
    String[] values = metadata.getValues(name);
    ArrayList<String> list = new ArrayList<>(values.length + 1);
    list.add(name);
    list.addAll(Arrays.asList(values));
    writer.writeNext(list.toArray(values));
  }
  writer.close();
}

代码示例来源:origin: apache/tika

@Override
  @SuppressWarnings("resource")
  public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations,
            MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException,
      WebApplicationException {

    CSVWriter writer = new CSVWriter(new OutputStreamWriter(entityStream, UTF_8));

    for (String name : metadata.names()) {
      String[] values = metadata.getValues(name);
      ArrayList<String> list = new ArrayList<String>(values.length + 1);
      list.add(name);
      list.addAll(Arrays.asList(values));
      writer.writeNext(list.toArray(values));
    }

    // Don't close, just flush the stream
    writer.flush();
  }
}

代码示例来源:origin: apache/tika

public void indexContentSpecificMet(File file) throws Exception {
  Metadata met = new Metadata();
  try (InputStream is = new FileInputStream(file)) {
    tika.parse(is, met);
    Document document = new Document();
    for (String key : met.names()) {
      String[] values = met.getValues(key);
      for (String val : values) {
        document.add(new TextField(key, val, Store.YES));
      }
      writer.addDocument(document);
    }
  }
}

代码示例来源:origin: apache/tika

@Override
  public List<RecognisedObject> recognise(InputStream stream, ContentHandler handler,
                      Metadata metadata, ParseContext context)
      throws IOException, SAXException, TikaException {
    Metadata md = new Metadata();
    parse(stream, handler, md, context);
    List<RecognisedObject> objects = new ArrayList<>();
    for (String key: md.names()) {
      double confidence = Double.parseDouble(md.get(key));
      objects.add(new RecognisedObject(key, "eng", key, confidence));
    }
    return objects;
  }
}

代码示例来源:origin: apache/tika

public static void debug(Metadata metadata) {
    for (String n : metadata.names()) {
      for (String v : metadata.getValues(n)) {
        System.out.println(n + " : "+v);
      }
    }
  }
}

代码示例来源:origin: apache/tika

public static void debug(List<Metadata> list) {
  int i = 0;
  for (Metadata m : list) {
    for (String n : m.names()) {
      for (String v : m.getValues(n)) {
        System.out.println(i + ": "+n + " : "+v);
      }
    }
    i++;
  }
}

代码示例来源:origin: vector4wang/spring-boot-quick

public static Map<String, String> handleStreamMetaDate(byte[] file)
    throws Exception {
  Map<String, String> meta = new HashMap<>();
  Metadata md = new Metadata();
  TikaInputStream input = TikaInputStream.get(file, md);
  StringWriter textBuffer = new StringWriter();
  ContentHandler handler = new TeeContentHandler(
      getTextContentHandler(textBuffer)
  );
  parser.parse(input, handler, md, context);
  String[] names = md.names();
  Arrays.sort(names);
  for (String name : names) {
    meta.put(name, md.get(name));
  }
  return meta;
}

代码示例来源:origin: apache/tika

private byte[] toString(ContentHandler contentHandler, Metadata metadata) {
    StringBuilder sb = new StringBuilder();
    for (String n : metadata.names()) {
      for (String v : metadata.getValues(n)) {
        sb.append(n).append(" : ").append(v).append("\n");;
      }
    }
    if (! contentHandler.getClass().equals(DefaultHandler.class)) {
      sb.append("\n");
      sb.append("CONTENT: "+ contentHandler.toString());
      sb.append("\n\n");
    }
    return sb.toString().getBytes(StandardCharsets.UTF_8);
  }
}

代码示例来源:origin: apache/tika

public void parse(String filePath, ContentHandler handler, Metadata metadata,
         ParseContext context) throws FileNotFoundException {
  File pdfFile = new File(filePath);
  ContentDisposition cd = new ContentDisposition(
      "form-data; name=\"input\"; filename=\"" + pdfFile.getName() + "\"");
  Attachment att = new Attachment("input", new FileInputStream(pdfFile), cd);
  MultipartBody body = new MultipartBody(att);
  Response response = WebClient
      .create(restHostUrlStr + GROBID_PROCESSHEADER_PATH)
      .accept(MediaType.APPLICATION_XML).type(MediaType.MULTIPART_FORM_DATA)
      .post(body);
  try {
    String resp = response.readEntity(String.class);
    Metadata teiMet = new TEIDOMParser().parse(resp, context);
    for (String key : teiMet.names()) {
      metadata.add("grobid:header_" + key, teiMet.get(key));
    }
  } catch (Exception e) {
    LOG.warn("Couldn't read response", e);
  }
}

代码示例来源:origin: apache/tika

/** Test for <code>names</code> method. */
@Test
public void testNames() {
  String[] names = null;
  Metadata meta = new Metadata();
  names = meta.names();
  assertEquals(0, names.length);
  meta.add("name-one", "value");
  names = meta.names();
  assertEquals(1, names.length);
  assertEquals("name-one", names[0]);
  meta.add("name-two", "value");
  names = meta.names();
  assertEquals(2, names.length);
}

代码示例来源:origin: apache/tika

/**
 * Does a deep clone of a Metadata object.
 */
public static Metadata cloneMetadata(Metadata m) {
  Metadata clone = new Metadata();
  
  for (String n : m.names()){
    if (! m.isMultiValued(n)) {
      clone.set(n, m.get(n));
    } else {
      String[] vals = m.getValues(n);
      for (int i = 0; i < vals.length; i++) {
        clone.add(n, vals[i]);
      }
    }
  }
  return clone;
}

代码示例来源:origin: apache/tika

@Test
public void testGetField_Author_JSON_Partial_Found() throws Exception {
  InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
  Response response = WebClient.create(endPoint + META_PATH + "/"+TikaCoreProperties.CREATOR.getName())
      .type("application/msword")
      .accept(MediaType.APPLICATION_JSON).put(copy(stream, 12000));
  Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
  Metadata metadata = JsonMetadata.fromJson(new InputStreamReader(
      (InputStream) response.getEntity(), UTF_8));
  assertEquals("Maxim Valyanskiy", metadata.get(TikaCoreProperties.CREATOR));
  assertEquals(1, metadata.names().length);
}

相关文章