org.kitesdk.data.Dataset.getDescriptor()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(6.4k)|赞(0)|评价(0)|浏览(135)

本文整理了Java中org.kitesdk.data.Dataset.getDescriptor()方法的一些代码示例,展示了Dataset.getDescriptor()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Dataset.getDescriptor()方法的具体详情如下:
包路径:org.kitesdk.data.Dataset
类名称:Dataset
方法名:getDescriptor

Dataset.getDescriptor介绍

[英]Get the DatasetDescriptor associated with this dataset.
[中]获取与此数据集关联的DatasetDescriptor。

代码示例

代码示例来源:origin: apache/nifi

return Datasets.load(uri).getDataset().getDescriptor().getSchema();
} else if ("resource".equals(uri.getScheme())) {
  try (InputStream in = Resources.getResource(uri.getSchemeSpecificPart()).openStream()) {

代码示例来源:origin: apache/flume

@Override
public void sync() throws EventDeliveryException {
 if (nEventsHandled > 0) {
  if (Formats.PARQUET.equals(
    dataset.getDataset().getDescriptor().getFormat())) {
   // We need to close the writer on sync if we're writing to a Parquet
   // dataset
   close();
  } else {
   if (writer instanceof Syncable) {
    ((Syncable) writer).sync();
   }
  }
 }
}

代码示例来源:origin: apache/flume

DatasetDescriptor descriptor = view.getDataset().getDescriptor();
Format format = descriptor.getFormat();
Preconditions.checkArgument(allowedFormats().contains(format.getName()),

代码示例来源:origin: apache/nifi

final Schema schema = target.getDataset().getDescriptor().getSchema();

代码示例来源:origin: kite-sdk/kite

public CSVRecordParser(CSVProperties props, View<E> view,
            @Nullable List<String> header) {
 this(props, view.getDataset().getDescriptor().getSchema(), view.getType(),
    header);
}

代码示例来源:origin: kite-sdk/kite

protected void checkSchemaForRead() {
  IncompatibleSchemaException.check(canRead,
    "Cannot read data with this view's schema:\n" +
    "Current schema: %s\nDataset schema: %s",
    dataset.getDescriptor().getSchema(), getSchema());
 }
}

代码示例来源:origin: kite-sdk/kite

public DatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) {
 this.view = view;
 this.convert = new PathConversion(
   view.getDataset().getDescriptor().getSchema());
 this.conf = conf;
}

代码示例来源:origin: kite-sdk/kite

protected void checkSchemaForWrite() {
 IncompatibleSchemaException.check(canWrite,
   "Cannot write data with this view's schema, " +
   "it cannot be read with the dataset's schema:\n" +
   "Current schema: %s\nDataset schema: %s",
   getSchema(), dataset.getDescriptor().getSchema());
}

代码示例来源:origin: kite-sdk/kite

public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view,
                      ConfAccessor conf) {
 this.view = view;
 this.convert = new PathConversion(
   view.getDataset().getDescriptor().getSchema());
 this.conf = conf;
}

代码示例来源:origin: org.kitesdk/kite-tools

@SuppressWarnings("unchecked")
private static <T> AvroType<T> ptype(View<T> view) {
 Class<T> recordClass = view.getType();
 if (GenericRecord.class.isAssignableFrom(recordClass)) {
  return (AvroType<T>) Avros.generics(
    view.getDataset().getDescriptor().getSchema());
 } else {
  return Avros.records(recordClass);
 }
}

代码示例来源:origin: kite-sdk/kite

PathIterator pathIterator() {
 if (dataset.getDescriptor().isPartitioned()) {
  return new PathIterator(fs, root, partitionIterator());
 } else {
  return new PathIterator(fs, root, null);
 }
}

代码示例来源:origin: org.kitesdk/kite-data-mapreduce

@Override
public void setConf(Configuration configuration) {
 conf = configuration;
 View<E> view = load(configuration);
 String partitionDir = conf.get(KITE_PARTITION_DIR);
 if (view.getDataset().getDescriptor().isPartitioned() && partitionDir != null) {
  delegate = getDelegateInputFormatForPartition(view.getDataset(), partitionDir, conf);
 } else {
  delegate = getDelegateInputFormat(view, conf);
 }
}

代码示例来源:origin: kite-sdk/kite

public DatasetRecordWriter(View<E> view, boolean copyRecords) {
 this.datasetWriter = view.newWriter();
 this.schema = view.getDataset().getDescriptor().getSchema();
 this.dataModel = DataModelUtil.getDataModelForType(
   view.getType());
 this.copyRecords = copyRecords;
}

代码示例来源:origin: kite-sdk/kite

@Override
public DatasetReader<E> newReader() {
 checkSchemaForRead();
 AbstractDatasetReader<E> reader = new MultiFileDatasetReader<E>(fs,
   pathIterator(), dataset.getDescriptor(), constraints, getAccessor());
 reader.initialize();
 return reader;
}

代码示例来源:origin: org.springframework.data/spring-data-hadoop-store

@Override
protected DatasetWriter<GenericRecord> createWriter() {
  if (Formats.PARQUET.getName().equals(getDatasetDefinition().getFormat().getName())) {
    Dataset<GenericRecord> dataset =
        DatasetUtils.getOrCreateDataset(getDatasetRepositoryFactory(), getDatasetDefinition(), getEntityClass(), GenericRecord.class);
    schema = dataset.getDescriptor().getSchema();
    return dataset.newWriter();
  } else {
    throw new StoreException("Invalid format " + getDatasetDefinition().getFormat() +
        " specified, you must use 'parquet' with " + this.getClass().getSimpleName() + ".");
  }
}

代码示例来源:origin: kite-sdk/kite

@Override
public <T> View<T> asType(Class<T> type) {
 if (DataModelUtil.isGeneric(type)) {
  // if the type is generic, don't reset the schema
  return project(getSchema(), type);
 }
 // otherwise, the type determines the schema
 return project(getDataset().getDescriptor().getSchema(), type);
}

代码示例来源:origin: org.springframework.data/spring-data-hadoop-store

@Override
protected DatasetReader<GenericRecord> createReader() {
  Dataset<GenericRecord> dataset = DatasetUtils.getOrCreateDataset(getDatasetRepositoryFactory(),
      getDatasetDefinition(), getEntityClass(), GenericRecord.class);
  schema = dataset.getDescriptor().getSchema();
  return dataset.newReader();
}

代码示例来源:origin: kite-sdk/kite

private static <E> Dataset<E> loadOrCreateTaskAttemptDataset(TaskAttemptContext taskContext) {
 String taskAttemptDatasetName = getTaskAttemptDatasetName(taskContext);
 DatasetRepository repo = getDatasetRepository(taskContext);
 Dataset<E> jobDataset = loadJobDataset(taskContext);
 if (repo.exists(TEMP_NAMESPACE, taskAttemptDatasetName)) {
  return repo.load(TEMP_NAMESPACE, taskAttemptDatasetName);
 } else {
  return repo.create(TEMP_NAMESPACE, taskAttemptDatasetName,
    copy(jobDataset.getDescriptor()));
 }
}

代码示例来源:origin: kite-sdk/kite

@Test
public void testDeleteRemovesDatasetPath() throws IOException {
 ensureCreated();
 Dataset<Record> dataset = repo.load(NAMESPACE, NAME);
 Path dataPath = new Path(dataset.getDescriptor().getLocation());
 Assert.assertTrue(fileSystem.exists(dataPath));
 repo.delete(NAMESPACE, NAME);
 Assert.assertFalse(fileSystem.exists(dataPath));
}

代码示例来源:origin: kite-sdk/kite

@Test
public void testLoad() {
 ensureCreated();
 Dataset dataset = repo.load(NAMESPACE, NAME);
 Assert.assertNotNull("Dataset is loaded and produced", dataset);
 Assert.assertEquals("Dataset name is propagated",
   NAME, dataset.getName());
 Assert.assertEquals("Dataset schema is loaded",
   testSchema, dataset.getDescriptor().getSchema());
}

相关文章