parquet.schema.MessageType.getColumns()方法的使用及代码示例

x33g5p2x  于2022-01-25 转载在 其他  
字(12.3k)|赞(0)|评价(0)|浏览(138)

本文整理了Java中parquet.schema.MessageType.getColumns()方法的一些代码示例,展示了MessageType.getColumns()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。MessageType.getColumns()方法的具体详情如下:
包路径:parquet.schema.MessageType
类名称:MessageType
方法名:getColumns

MessageType.getColumns介绍

暂无

代码示例

代码示例来源:origin: com.facebook.presto.hive/hive-apache

public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
 for (ColumnDescriptor path : schema.getColumns()) {
  ColumnChunkPageWriter pageWriter = writers.get(path);
  pageWriter.writeToFileWriter(writer);
 }
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, int pageSize) {
 this.schema = schema;
 for (ColumnDescriptor path : schema.getColumns()) {
  writers.put(path,  new ColumnChunkPageWriter(path, compressor, pageSize));
 }
}

代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive

private void initializeColumnReaders()
  {
    for (ColumnDescriptor column : requestedSchema.getColumns()) {
      columnReadersMap.put(column, ParquetColumnReader.createReader(column));
    }
  }
}

代码示例来源:origin: org.apache.tajo/tajo-storage

public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
 List<ColumnDescriptor> columns = schema.getColumns();
 for (ColumnDescriptor columnDescriptor : columns) {
  ColumnChunkPageWriter pageWriter = writers.get(columnDescriptor);
  pageWriter.writeToFileWriter(writer);
 }
}

代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive

private static int lookupParquetColumn(HiveColumnHandle column, MessageType fileSchema)
{
  // map column has more than one primitive columns in parquet file
  // the column ordinal number does not always equal to hive column index
  // need to do a look up in parquet file schema columns
  int parquetFieldIndex = 0;
  for (; parquetFieldIndex < fileSchema.getColumns().size(); parquetFieldIndex++) {
    String[] path = fileSchema.getColumns().get(parquetFieldIndex).getPath();
    String columnName = path[path.length - 1];
    if (column.getName().equals(columnName)) {
      break;
    }
  }
  return parquetFieldIndex;
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

public ColumnWriteStoreV2(
  MessageType schema,
  PageWriteStore pageWriteStore,
  int pageSizeThreshold,
  ParquetProperties parquetProps) {
 super();
 this.pageSizeThreshold = pageSizeThreshold;
 this.thresholdTolerance = (long)(pageSizeThreshold * THRESHOLD_TOLERANCE_RATIO);
 Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>();
 for (ColumnDescriptor path : schema.getColumns()) {
  PageWriter pageWriter = pageWriteStore.getPageWriter(path);
  mcolumns.put(path, new ColumnWriterV2(path, pageWriter, parquetProps, pageSizeThreshold));
 }
 this.columns = unmodifiableMap(mcolumns);
 this.writers = this.columns.values();
}

代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive

public int nextBatch()
    throws IOException, InterruptedException
{
  if (nextRowInGroup >= currentGroupRowCount) {
    if (!advanceToNextRowGroup()) {
      return -1;
    }
  }
  int batchSize = Ints.checkedCast(Math.min(MAX_VECTOR_LENGTH, currentGroupRowCount - nextRowInGroup));
  nextRowInGroup += batchSize;
  currentPosition += batchSize;
  for (ColumnDescriptor column : requestedSchema.getColumns()) {
    ParquetColumnReader columnReader = columnReadersMap.get(column);
    columnReader.prepareNextRead(batchSize);
  }
  return batchSize;
}

代码示例来源:origin: asakusafw/asakusafw

private ParquetFileReader createFileReader(ParquetMetadata meta, List<BlockMetaData> blocks) throws IOException {
  FileMetaData fileMetaData = meta.getFileMetaData();
  if (FILE_READER_NEWER_CTOR != null) {
    try {
      return FILE_READER_NEWER_CTOR.newInstance(
          hadoopConfiguration,
          fileMetaData,
          path,
          blocks,
          fileMetaData.getSchema().getColumns());
    } catch (ReflectiveOperationException | IllegalArgumentException | SecurityException e) {
      LOG.debug("failed ParquetFileReader.<init>", e);
    }
  }
  return new ParquetFileReader(
      hadoopConfiguration,
      path,
      blocks,
      fileMetaData.getSchema().getColumns());
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

maxColCount = Math.max(w.getSchema().getColumns().size(), maxColCount);

代码示例来源:origin: org.apache.tajo/tajo-storage

private void initStore() {
 // we don't want this number to be too small
 // ideally we divide the block equally across the columns
 // it is unlikely all columns are going to be the same size.
 int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / schema.getColumns().size() / 5);
 pageStore = new ColumnChunkPageWriteStore(compressor, schema, initialBlockBufferSize);
 // we don't want this number to be too small either
 // ideally, slightly bigger than the page size, but not bigger than the block buffer
 int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
 store = new ColumnWriteStoreImpl(pageStore, pageSize, initialPageBufferSize, dictionaryPageSize, enableDictionary, writerVersion);
 MessageColumnIO columnIO = new ColumnIOFactory(validating).getColumnIO(schema);
 writeSupport.prepareForWrite(columnIO.getRecordWriter(store));
}

代码示例来源:origin: asakusafw/asakusafw

private static List<Mapping> computeMappingByPosition(
    DataModelDescriptor target, MessageType source) {
  if (LOG.isDebugEnabled()) {
    LOG.debug(MessageFormat.format(
        "Mapping columns by their position: model={0}", //$NON-NLS-1$
        target.getDataModelClass().getName()));
  }
  List<ColumnDescriptor> sources = source.getColumns();
  List<? extends PropertyDescriptor> targets = target.getPropertyDescriptors();
  List<Mapping> mappings = new ArrayList<>();
  int limit = Math.min(sources.size(), targets.size());
  for (int i = 0; i < limit; i++) {
    ColumnDescriptor s = sources.get(i);
    Type sType = source.getType(s.getPath());
    PropertyDescriptor t = targets.get(i);
    mappings.add(new Mapping(s, sType, t));
  }
  for (int i = limit, n = sources.size(); i < n; i++) {
    ColumnDescriptor s = sources.get(i);
    Type sType = source.getType(s.getPath());
    mappings.add(new Mapping(s, sType, null));
  }
  for (int i = limit, n = targets.size(); i < n; i++) {
    mappings.add(new Mapping(null, null, targets.get(i)));
  }
  return mappings;
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

private SchemaCompatibilityValidator(MessageType schema) {
 for (ColumnDescriptor cd : schema.getColumns()) {
  ColumnPath columnPath = ColumnPath.get(cd.getPath());
  columnsAccordingToSchema.put(columnPath, cd);
  OriginalType ot = schema.getType(cd.getPath()).getOriginalType();
  if (ot != null) {
   originalTypes.put(columnPath, ot);
  }
 }
}

代码示例来源:origin: asakusafw/asakusafw

private static List<Mapping> computeMappingByName(
    DataModelDescriptor target, MessageType source) {
  if (LOG.isDebugEnabled()) {
    LOG.debug(MessageFormat.format(
        "Mapping columns by their name: model={0}", //$NON-NLS-1$
        target.getDataModelClass().getName()));
  }
  Set<PropertyDescriptor> rest = new LinkedHashSet<>(target.getPropertyDescriptors());
  List<Mapping> mappings = new ArrayList<>();
  for (ColumnDescriptor s : source.getColumns()) {
    String name = s.getPath()[0];
    Type sType = source.getType(s.getPath());
    PropertyDescriptor t = target.findPropertyDescriptor(name);
    if (t != null) {
      mappings.add(new Mapping(s, sType, t));
      rest.remove(t);
    } else {
      mappings.add(new Mapping(s, sType, null));
    }
  }
  for (PropertyDescriptor t : rest) {
    mappings.add(new Mapping(null, null, t));
  }
  return mappings;
}

代码示例来源:origin: org.apache.tajo/tajo-storage

public void initialize(MessageType requestedSchema, MessageType fileSchema,
            Map<String, String> extraMetadata, Map<String, String> readSupportMetadata,
            Path file, List<BlockMetaData> blocks, Configuration configuration)
  throws IOException {
 this.requestedSchema = requestedSchema;
 this.fileSchema = fileSchema;
 this.file = file;
 this.columnCount = this.requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, extraMetadata, fileSchema,
   new ReadSupport.ReadContext(requestedSchema, readSupportMetadata));
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 reader = new ParquetFileReader(configuration, file, blocks, columns);
 for (BlockMetaData block : blocks) {
  total += block.getRowCount();
 }
 LOG.info("RecordReader initialized will read a total of " + total + " records.");
}

代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive

ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(fieldIndex);
blocks[fieldId] = new LazyBlock(batchSize, new ParquetBlockLoader(columnDescriptor, type));

代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive

ColumnChunkMetaData columnChunkMetaData = blockMetadata.getColumns().get(ordinal);
for (int i = 0; i < requestedSchema.getColumns().size(); i++) {
  ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(i);
  if (isColumnPredicate(columnDescriptor, effectivePredicate) &&
      columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath())) &&

代码示例来源:origin: com.facebook.presto.hive/hive-apache

public void initialize(MessageType fileSchema,
  Map<String, String> fileMetadata,
  Path file, List<BlockMetaData> blocks, Configuration configuration)
  throws IOException {
 // initialize a ReadContext for this file
 ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
   configuration, toSetMultiMap(fileMetadata), fileSchema));
 this.requestedSchema = readContext.getRequestedSchema();
 this.fileSchema = fileSchema;
 this.file = file;
 this.columnCount = requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, fileMetadata, fileSchema, readContext);
 this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 reader = new ParquetFileReader(configuration, file, blocks, columns);
 for (BlockMetaData block : blocks) {
  total += block.getRowCount();
 }
 LOG.info("RecordReader initialized will read a total of " + total + " records.");
}

代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive

Iterator<?> iterator = expectedValues.iterator();
for (int batchSize = parquetReader.nextBatch(); batchSize >= 0; batchSize = parquetReader.nextBatch()) {
  ColumnDescriptor columnDescriptor = fileSchema.getColumns().get(0);
  Block block = parquetReader.readBlock(columnDescriptor, type);
  for (int i = 0; i < batchSize; i++) {

代码示例来源:origin: uber/hudi

@Test
 public void testMultiPartitionKeySync()
   throws IOException, InitializationError, URISyntaxException, TException,
   InterruptedException {
  String commitTime = "100";
  TestUtil.createCOWDataset(commitTime, 5);

  HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig);
  hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName();
  hiveSyncConfig.tableName = "multi_part_key";
  hiveSyncConfig.partitionFields = Lists.newArrayList("year", "month", "day");
  TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);

  HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig,
    TestUtil.getHiveConf(), TestUtil.fileSystem);
  assertFalse("Table " + hiveSyncConfig.tableName + " should not exist initially",
    hiveClient.doesTableExist());
  // Lets do the sync
  HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
  tool.syncHoodieTable();
  assertTrue("Table " + hiveSyncConfig.tableName + " should exist after sync completes",
    hiveClient.doesTableExist());
  assertEquals("Hive Schema should match the dataset schema + partition fields",
    hiveClient.getTableSchema().size(), hiveClient.getDataSchema().getColumns().size() + 3);
  assertEquals("Table partitions should match the number of partitions we wrote", 5,
    hiveClient.scanTablePartitions().size());
  assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
    commitTime, hiveClient.getLastCommitTimeSynced().get());
 }
}

代码示例来源:origin: uber/hudi

@Test
public void testBasicSync()
  throws IOException, InitializationError, URISyntaxException, TException,
  InterruptedException {
 String commitTime = "100";
 TestUtil.createCOWDataset(commitTime, 5);
 HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig,
   TestUtil.getHiveConf(), TestUtil.fileSystem);
 assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially",
   hiveClient.doesTableExist());
 // Lets do the sync
 HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
   TestUtil.fileSystem);
 tool.syncHoodieTable();
 assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes",
   hiveClient.doesTableExist());
 assertEquals("Hive Schema should match the dataset schema + partition field",
   hiveClient.getTableSchema().size(), hiveClient.getDataSchema().getColumns().size() + 1);
 assertEquals("Table partitions should match the number of partitions we wrote", 5,
   hiveClient.scanTablePartitions().size());
 assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
   commitTime, hiveClient.getLastCommitTimeSynced().get());
}

相关文章