本文整理了Java中parquet.schema.MessageType.getColumns()
方法的一些代码示例,展示了MessageType.getColumns()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。MessageType.getColumns()
方法的具体详情如下:
包路径:parquet.schema.MessageType
类名称:MessageType
方法名:getColumns
暂无
代码示例来源:origin: com.facebook.presto.hive/hive-apache
public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
for (ColumnDescriptor path : schema.getColumns()) {
ColumnChunkPageWriter pageWriter = writers.get(path);
pageWriter.writeToFileWriter(writer);
}
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, int pageSize) {
this.schema = schema;
for (ColumnDescriptor path : schema.getColumns()) {
writers.put(path, new ColumnChunkPageWriter(path, compressor, pageSize));
}
}
代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive
private void initializeColumnReaders()
{
for (ColumnDescriptor column : requestedSchema.getColumns()) {
columnReadersMap.put(column, ParquetColumnReader.createReader(column));
}
}
}
代码示例来源:origin: org.apache.tajo/tajo-storage
public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
List<ColumnDescriptor> columns = schema.getColumns();
for (ColumnDescriptor columnDescriptor : columns) {
ColumnChunkPageWriter pageWriter = writers.get(columnDescriptor);
pageWriter.writeToFileWriter(writer);
}
}
代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive
private static int lookupParquetColumn(HiveColumnHandle column, MessageType fileSchema)
{
// map column has more than one primitive columns in parquet file
// the column ordinal number does not always equal to hive column index
// need to do a look up in parquet file schema columns
int parquetFieldIndex = 0;
for (; parquetFieldIndex < fileSchema.getColumns().size(); parquetFieldIndex++) {
String[] path = fileSchema.getColumns().get(parquetFieldIndex).getPath();
String columnName = path[path.length - 1];
if (column.getName().equals(columnName)) {
break;
}
}
return parquetFieldIndex;
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
public ColumnWriteStoreV2(
MessageType schema,
PageWriteStore pageWriteStore,
int pageSizeThreshold,
ParquetProperties parquetProps) {
super();
this.pageSizeThreshold = pageSizeThreshold;
this.thresholdTolerance = (long)(pageSizeThreshold * THRESHOLD_TOLERANCE_RATIO);
Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>();
for (ColumnDescriptor path : schema.getColumns()) {
PageWriter pageWriter = pageWriteStore.getPageWriter(path);
mcolumns.put(path, new ColumnWriterV2(path, pageWriter, parquetProps, pageSizeThreshold));
}
this.columns = unmodifiableMap(mcolumns);
this.writers = this.columns.values();
}
代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive
public int nextBatch()
throws IOException, InterruptedException
{
if (nextRowInGroup >= currentGroupRowCount) {
if (!advanceToNextRowGroup()) {
return -1;
}
}
int batchSize = Ints.checkedCast(Math.min(MAX_VECTOR_LENGTH, currentGroupRowCount - nextRowInGroup));
nextRowInGroup += batchSize;
currentPosition += batchSize;
for (ColumnDescriptor column : requestedSchema.getColumns()) {
ParquetColumnReader columnReader = columnReadersMap.get(column);
columnReader.prepareNextRead(batchSize);
}
return batchSize;
}
代码示例来源:origin: asakusafw/asakusafw
private ParquetFileReader createFileReader(ParquetMetadata meta, List<BlockMetaData> blocks) throws IOException {
FileMetaData fileMetaData = meta.getFileMetaData();
if (FILE_READER_NEWER_CTOR != null) {
try {
return FILE_READER_NEWER_CTOR.newInstance(
hadoopConfiguration,
fileMetaData,
path,
blocks,
fileMetaData.getSchema().getColumns());
} catch (ReflectiveOperationException | IllegalArgumentException | SecurityException e) {
LOG.debug("failed ParquetFileReader.<init>", e);
}
}
return new ParquetFileReader(
hadoopConfiguration,
path,
blocks,
fileMetaData.getSchema().getColumns());
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
maxColCount = Math.max(w.getSchema().getColumns().size(), maxColCount);
代码示例来源:origin: org.apache.tajo/tajo-storage
private void initStore() {
// we don't want this number to be too small
// ideally we divide the block equally across the columns
// it is unlikely all columns are going to be the same size.
int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / schema.getColumns().size() / 5);
pageStore = new ColumnChunkPageWriteStore(compressor, schema, initialBlockBufferSize);
// we don't want this number to be too small either
// ideally, slightly bigger than the page size, but not bigger than the block buffer
int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
store = new ColumnWriteStoreImpl(pageStore, pageSize, initialPageBufferSize, dictionaryPageSize, enableDictionary, writerVersion);
MessageColumnIO columnIO = new ColumnIOFactory(validating).getColumnIO(schema);
writeSupport.prepareForWrite(columnIO.getRecordWriter(store));
}
代码示例来源:origin: asakusafw/asakusafw
private static List<Mapping> computeMappingByPosition(
DataModelDescriptor target, MessageType source) {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Mapping columns by their position: model={0}", //$NON-NLS-1$
target.getDataModelClass().getName()));
}
List<ColumnDescriptor> sources = source.getColumns();
List<? extends PropertyDescriptor> targets = target.getPropertyDescriptors();
List<Mapping> mappings = new ArrayList<>();
int limit = Math.min(sources.size(), targets.size());
for (int i = 0; i < limit; i++) {
ColumnDescriptor s = sources.get(i);
Type sType = source.getType(s.getPath());
PropertyDescriptor t = targets.get(i);
mappings.add(new Mapping(s, sType, t));
}
for (int i = limit, n = sources.size(); i < n; i++) {
ColumnDescriptor s = sources.get(i);
Type sType = source.getType(s.getPath());
mappings.add(new Mapping(s, sType, null));
}
for (int i = limit, n = targets.size(); i < n; i++) {
mappings.add(new Mapping(null, null, targets.get(i)));
}
return mappings;
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
private SchemaCompatibilityValidator(MessageType schema) {
for (ColumnDescriptor cd : schema.getColumns()) {
ColumnPath columnPath = ColumnPath.get(cd.getPath());
columnsAccordingToSchema.put(columnPath, cd);
OriginalType ot = schema.getType(cd.getPath()).getOriginalType();
if (ot != null) {
originalTypes.put(columnPath, ot);
}
}
}
代码示例来源:origin: asakusafw/asakusafw
private static List<Mapping> computeMappingByName(
DataModelDescriptor target, MessageType source) {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Mapping columns by their name: model={0}", //$NON-NLS-1$
target.getDataModelClass().getName()));
}
Set<PropertyDescriptor> rest = new LinkedHashSet<>(target.getPropertyDescriptors());
List<Mapping> mappings = new ArrayList<>();
for (ColumnDescriptor s : source.getColumns()) {
String name = s.getPath()[0];
Type sType = source.getType(s.getPath());
PropertyDescriptor t = target.findPropertyDescriptor(name);
if (t != null) {
mappings.add(new Mapping(s, sType, t));
rest.remove(t);
} else {
mappings.add(new Mapping(s, sType, null));
}
}
for (PropertyDescriptor t : rest) {
mappings.add(new Mapping(null, null, t));
}
return mappings;
}
代码示例来源:origin: org.apache.tajo/tajo-storage
public void initialize(MessageType requestedSchema, MessageType fileSchema,
Map<String, String> extraMetadata, Map<String, String> readSupportMetadata,
Path file, List<BlockMetaData> blocks, Configuration configuration)
throws IOException {
this.requestedSchema = requestedSchema;
this.fileSchema = fileSchema;
this.file = file;
this.columnCount = this.requestedSchema.getPaths().size();
this.recordConverter = readSupport.prepareForRead(
configuration, extraMetadata, fileSchema,
new ReadSupport.ReadContext(requestedSchema, readSupportMetadata));
List<ColumnDescriptor> columns = requestedSchema.getColumns();
reader = new ParquetFileReader(configuration, file, blocks, columns);
for (BlockMetaData block : blocks) {
total += block.getRowCount();
}
LOG.info("RecordReader initialized will read a total of " + total + " records.");
}
代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive
ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(fieldIndex);
blocks[fieldId] = new LazyBlock(batchSize, new ParquetBlockLoader(columnDescriptor, type));
代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive
ColumnChunkMetaData columnChunkMetaData = blockMetadata.getColumns().get(ordinal);
for (int i = 0; i < requestedSchema.getColumns().size(); i++) {
ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(i);
if (isColumnPredicate(columnDescriptor, effectivePredicate) &&
columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath())) &&
代码示例来源:origin: com.facebook.presto.hive/hive-apache
public void initialize(MessageType fileSchema,
Map<String, String> fileMetadata,
Path file, List<BlockMetaData> blocks, Configuration configuration)
throws IOException {
// initialize a ReadContext for this file
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
configuration, toSetMultiMap(fileMetadata), fileSchema));
this.requestedSchema = readContext.getRequestedSchema();
this.fileSchema = fileSchema;
this.file = file;
this.columnCount = requestedSchema.getPaths().size();
this.recordConverter = readSupport.prepareForRead(
configuration, fileMetadata, fileSchema, readContext);
this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
List<ColumnDescriptor> columns = requestedSchema.getColumns();
reader = new ParquetFileReader(configuration, file, blocks, columns);
for (BlockMetaData block : blocks) {
total += block.getRowCount();
}
LOG.info("RecordReader initialized will read a total of " + total + " records.");
}
代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive
Iterator<?> iterator = expectedValues.iterator();
for (int batchSize = parquetReader.nextBatch(); batchSize >= 0; batchSize = parquetReader.nextBatch()) {
ColumnDescriptor columnDescriptor = fileSchema.getColumns().get(0);
Block block = parquetReader.readBlock(columnDescriptor, type);
for (int i = 0; i < batchSize; i++) {
代码示例来源:origin: uber/hudi
@Test
public void testMultiPartitionKeySync()
throws IOException, InitializationError, URISyntaxException, TException,
InterruptedException {
String commitTime = "100";
TestUtil.createCOWDataset(commitTime, 5);
HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig);
hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName();
hiveSyncConfig.tableName = "multi_part_key";
hiveSyncConfig.partitionFields = Lists.newArrayList("year", "month", "day");
TestUtil.getCreatedTablesSet().add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig,
TestUtil.getHiveConf(), TestUtil.fileSystem);
assertFalse("Table " + hiveSyncConfig.tableName + " should not exist initially",
hiveClient.doesTableExist());
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue("Table " + hiveSyncConfig.tableName + " should exist after sync completes",
hiveClient.doesTableExist());
assertEquals("Hive Schema should match the dataset schema + partition fields",
hiveClient.getTableSchema().size(), hiveClient.getDataSchema().getColumns().size() + 3);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClient.scanTablePartitions().size());
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
commitTime, hiveClient.getLastCommitTimeSynced().get());
}
}
代码示例来源:origin: uber/hudi
@Test
public void testBasicSync()
throws IOException, InitializationError, URISyntaxException, TException,
InterruptedException {
String commitTime = "100";
TestUtil.createCOWDataset(commitTime, 5);
HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig,
TestUtil.getHiveConf(), TestUtil.fileSystem);
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially",
hiveClient.doesTableExist());
// Lets do the sync
HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
TestUtil.fileSystem);
tool.syncHoodieTable();
assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes",
hiveClient.doesTableExist());
assertEquals("Hive Schema should match the dataset schema + partition field",
hiveClient.getTableSchema().size(), hiveClient.getDataSchema().getColumns().size() + 1);
assertEquals("Table partitions should match the number of partitions we wrote", 5,
hiveClient.scanTablePartitions().size());
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
commitTime, hiveClient.getLastCommitTimeSynced().get());
}
内容来源于网络,如有侵权,请联系作者删除!