org.kitesdk.data.Dataset类的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(11.2k)|赞(0)|评价(0)|浏览(176)

本文整理了Java中org.kitesdk.data.Dataset类的一些代码示例,展示了Dataset类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Dataset类的具体详情如下:
包路径:org.kitesdk.data.Dataset
类名称:Dataset

Dataset介绍

[英]A logical representation of a set of data entities.

Logically, all datasets have two generic properties: a name, and a descriptor that holds information such as the dataset's schema and its partitioning information. Concrete implementations of Dataset can support additional properties, mandatory or otherwise, as needed. Datasets are not normally instantiated directly, but managed by a repository (also implementation-specific).

Implementations of Dataset are immutable.
[中]一组数据实体的逻辑表示。
从逻辑上讲,所有数据集都有两个通用属性:一个名称和一个保存数据集模式及其分区信息等信息的描述符。Dataset的具体实现可以根据需要支持其他属性(强制或其他)。数据集通常不是直接实例化的,而是由存储库管理的(也是特定于实现的)。
Dataset的实现是不可变的。

代码示例

代码示例来源:origin: apache/nifi

return Datasets.load(uri).getDataset().getDescriptor().getSchema();
} else if ("resource".equals(uri.getScheme())) {
  try (InputStream in = Resources.getResource(uri.getSchemeSpecificPart()).openStream()) {

代码示例来源:origin: apache/flume

DatasetDescriptor descriptor = view.getDataset().getDescriptor();
Format format = descriptor.getFormat();
Preconditions.checkArgument(allowedFormats().contains(format.getName()),
  DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format));
this.datasetName = view.getDataset().getName();

代码示例来源:origin: kite-sdk/kite

boolean deleteAllUnsafe(boolean useTrash) {
 boolean deleted = false;
 if (dataset.getDescriptor().isPartitioned()) {
  for (StorageKey key : partitionIterator()) {
   deleted = (useTrash ? FileSystemUtil.cleanlyMoveToTrash(fs, root, key.getPath())
     : FileSystemUtil.cleanlyDelete(fs, root, key.getPath())) || deleted;
   if (listener != null) {
    // the relative path is the partition name, so we can simply delete it
    // in Hive
    listener.partitionDeleted(dataset.getNamespace(),
      dataset.getName(), key.getPath().toString());
   }
  }
 }
 else {
  for (Path path : pathIterator()) {
   deleted = (useTrash ? FileSystemUtil.cleanlyMoveToTrash(fs, root, path)
     : FileSystemUtil.cleanlyDelete(fs, root, path)) || deleted;
  }
 }
 return deleted;
}

代码示例来源:origin: org.springframework.data/spring-data-hadoop-store

@Override
protected DatasetWriter<GenericRecord> createWriter() {
  if (Formats.PARQUET.getName().equals(getDatasetDefinition().getFormat().getName())) {
    Dataset<GenericRecord> dataset =
        DatasetUtils.getOrCreateDataset(getDatasetRepositoryFactory(), getDatasetDefinition(), getEntityClass(), GenericRecord.class);
    schema = dataset.getDescriptor().getSchema();
    return dataset.newWriter();
  } else {
    throw new StoreException("Invalid format " + getDatasetDefinition().getFormat() +
        " specified, you must use 'parquet' with " + this.getClass().getSimpleName() + ".");
  }
}

代码示例来源:origin: org.kitesdk/kite-tools

Schema schema = target.getDataset().getDescriptor().getSchema();
    target.getDataset().getNamespace(),
    UUID.randomUUID().toString());

代码示例来源:origin: org.kitesdk/kite-tools

private static void printInfo(Logger console, Dataset<?> dataset) {
 DatasetDescriptor desc = dataset.getDescriptor();
 String schema = ColumnMappingParser.removeEmbeddedMapping(
   PartitionStrategyParser.removeEmbeddedStrategy(desc.getSchema()))
   .toString(true);
 Collection<String> properties = desc.listProperties();
 console.info("\nDataset \"{}\":", dataset.getName());
 console.info("\tURI: \"{}\"", dataset.getUri());
 console.info("\tSchema: {}", indent(schema));
 if (desc.isPartitioned()) {
  console.info("\tPartition strategy: {}",
    indent(desc.getPartitionStrategy().toString(true)));
 } else {
  console.info("\tNot partitioned");
 }
 if (desc.isColumnMapped()) {
  console.info("\tColumn mapping: {}",
    indent(desc.getColumnMapping().toString(true)));
 }
 if (!properties.isEmpty()) {
  StringBuilder sb = new StringBuilder();
  for (String prop : properties) {
   sb.append("\n\t\t").append(prop).append("=")
     .append(desc.getProperty(prop));
  }
  console.info("\tProperties:{}", sb.toString());
 }
}

代码示例来源:origin: kite-sdk/kite

@Override
 public void run() {
  dataset.newReader();
 }
});

代码示例来源:origin: org.springframework.data/spring-data-hadoop-store

@Override
protected DatasetReader<GenericRecord> createReader() {
  Dataset<GenericRecord> dataset = DatasetUtils.getOrCreateDataset(getDatasetRepositoryFactory(),
      getDatasetDefinition(), getEntityClass(), GenericRecord.class);
  schema = dataset.getDescriptor().getSchema();
  return dataset.newReader();
}

代码示例来源:origin: kite-sdk/kite

protected AbstractRefinableView(AbstractRefinableView<?> view, Schema schema, Class<E> type) {
 if (view.dataset instanceof AbstractDataset) {
  this.dataset = ((AbstractDataset<?>) view.dataset).asType(type);
 } else {
  this.dataset = Datasets.load(view.dataset.getUri(), type);
 }
 this.comparator = view.comparator;
 this.constraints = view.constraints;
 // thread-safe, so okay to reuse when views share a partition strategy
 this.keys = view.keys;
 // Resolve our type according to the given schema
 this.accessor = DataModelUtil.accessor(type, schema);
 this.entityTest = constraints.toEntityPredicate(accessor);
 Schema datasetSchema = dataset.getDescriptor().getSchema();
 this.canRead = SchemaValidationUtil.canRead(
   datasetSchema, accessor.getReadSchema());
 this.canWrite = SchemaValidationUtil.canRead(
   accessor.getWriteSchema(), datasetSchema);
 IncompatibleSchemaException.check(canRead || canWrite,
   "The type cannot be used to read from or write to the dataset:\n" +
   "Type schema: %s\nDataset schema: %s",
   getSchema(), datasetSchema);
}

代码示例来源:origin: org.springframework.data/spring-data-hadoop-store

@Override
protected DatasetWriter<T> createWriter() {
  if (Formats.AVRO.getName().equals(getDatasetDefinition().getFormat().getName())) {
    Dataset<T> dataset = DatasetUtils.getOrCreateDataset(getDatasetRepositoryFactory(), getDatasetDefinition(),
        getEntityClass(), getEntityClass());
    return dataset.newWriter();
  } else {
    throw new StoreException("Invalid format " + getDatasetDefinition().getFormat()
        + " specified, you must use 'avro' with " + this.getClass().getSimpleName() + ".");
  }
}

代码示例来源:origin: kite-sdk/kite

@Test
public void testSimpleViews() {
 assertViewUriEquivalent("dataset",
   "dataset:file:/tmp/test_name", test);
 assertViewUriEquivalent("to constraint",
   "view:file:/tmp/test_name?timestamp=(,0]",
   test.to("timestamp", 0L));
 assertViewUriEquivalent("View with toBefore constraint",
   "view:file:/tmp/test_name?timestamp=(,0)",
   test.toBefore("timestamp", 0L));
 assertViewUriEquivalent("View with from constraint",
   "view:file:/tmp/test_name?timestamp=[0,)",
   test.from("timestamp", 0L));
 assertViewUriEquivalent("View with fromAfter constraint",
   "view:file:/tmp/test_name?timestamp=(0,)",
   test.fromAfter("timestamp", 0L));
 assertViewUriEquivalent("View with in(\"\") constraint",
   "view:file:/tmp/test_name?color=in()",
   test.with("color", ""));
 assertViewUriEquivalent("View with in constraint",
   "view:file:/tmp/test_name?color=orange,red",
   test.with("color", "orange", "red"));
 assertViewUriEquivalent("View with exists constraint",
   "view:file:/tmp/test_name?id=",
   test.with("id"));
}

代码示例来源:origin: stackoverflow.com

ObjectMapper mapper = new ObjectMapper();
 mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
 JsonNode node = mapper.readTree(new URL(url));
 node = node.get("duObjects");
 TypeReference<List<Dataset>> typeRef = new TypeReference<List<Dataset>>() {
 };
 List<Dataset> list = mapper.readValue(node.traverse(), typeRef);
 for (int i = 0; i < list.size(); i++) {
   Dataset dataSet = list.get(i);
   System.out.println(dataSet.getName());
 }

代码示例来源:origin: kite-sdk/kite

@Test
public void testMixedProjection() throws IOException {
 Dataset<StandardEvent> original = repo.create("ns", "mixedProjection",
   new DatasetDescriptor.Builder()
     .schema(StandardEvent.class)
     .build(), StandardEvent.class);
 DatasetWriter<StandardEvent> writer = null;
 try {
  writer = original.newWriter();
  writer.write(sepEvent);
  writer.write(octEvent);
  writer.write(novEvent);
 } finally {
  Closeables.close(writer, false);
 }
 Dataset<ReflectSmallEvent> dataset = repo.load("ns", original.getName(),
   ReflectSmallEvent.class);
 Set<ReflectSmallEvent> expected = Sets.newHashSet(
   new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent),
   new ReflectSmallEvent(novEvent));
 assertContentEquals(expected, dataset);
}

代码示例来源:origin: kite-sdk/kite-examples

writer = hellos.newWriter();
reader = hellos.newReader();

代码示例来源:origin: org.kitesdk/kite-tools

private URI getLegacyRepoUri(Dataset<GenericRecord> dataset) {
 return FlumeConfigCommand.this.getLegacyRepoUri(dataset.getUri(), dataset.getNamespace());
}

代码示例来源:origin: kite-sdk/kite

@Test
 public void testRefineIdentity() throws Exception {
   PartitionStrategy strategy = new PartitionStrategy.Builder()
       .identity("user_id")
       .build();

   DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
       .schemaUri("resource:standard_event.avsc")
       .partitionStrategy(strategy)
       .build();

   // Create a separate dataset to avoid conflicts with the above.
   Dataset<StandardEvent> identityDataset = repo.create(
     "ns", "test_identity", descriptor);

   DatasetWriter<StandardEvent> writer = null;

   try {
     writer = identityDataset.newWriter();
     writer.write(sepEvent);
     writer.write(octEvent);
     writer.write(novEvent);
   } finally {
     Closeables.close(writer, false);
   }

   assertContentEquals(Sets.newHashSet(sepEvent, novEvent),
       identityDataset.with("user_id", 0L));
 }
}

代码示例来源:origin: kite-sdk/kite

private static DatasetRepository getDatasetRepository(JobContext jobContext) {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI));
 if (repo instanceof TemporaryDatasetRepositoryAccessor) {
  Dataset<Object> dataset = load(jobContext).getDataset();
  String namespace = dataset.getNamespace();
  repo = ((TemporaryDatasetRepositoryAccessor) repo)
    .getTemporaryRepository(namespace, getJobDatasetName(jobContext));
 }
 return repo;
}

代码示例来源:origin: kite-sdk/kite

@Override
public URI getUri() {
 URIBuilder builder = new URIBuilder(dataset.getUri());
 for (Map.Entry<String, String> entry : constraints.toQueryMap().entrySet()) {
  builder.with(entry.getKey(), entry.getValue());
 }
 return builder.build();
}

代码示例来源:origin: kite-sdk/kite-examples

eventsToProcess = eventsDataset.toBefore("timestamp", currentMinute);

代码示例来源:origin: kite-sdk/kite

@Test
public void testRelative() {
 DatasetRepository repo = DatasetRepositories.repositoryFor("repo:file:target/data");
 repo.delete("ns", "test");
 repo.create("ns", "test", descriptor);
 Dataset<Record> ds = Datasets.<Record, Dataset<Record>>
   load("dataset:file:target/data/ns/test", Record.class);
 Assert.assertNotNull("Should load dataset", ds);
 Assert.assertTrue(ds instanceof FileSystemDataset);
 Path cwd = localFS.makeQualified(new Path("."));
 Assert.assertEquals("Locations should match",
   new Path(cwd, "target/data/ns/test").toUri(), ds.getDescriptor().getLocation());
 Assert.assertEquals("Descriptors should match",
   repo.load("ns", "test").getDescriptor(), ds.getDescriptor());
 Assert.assertEquals("Should report correct namespace",
   "ns", ds.getNamespace());
 Assert.assertEquals("Should report correct name",
   "test", ds.getName());
 repo.delete("ns", "test");
}

相关文章