parquet.schema.MessageType类的使用及代码示例

x33g5p2x  于2022-01-25 转载在 其他  
字(8.6k)|赞(0)|评价(0)|浏览(251)

本文整理了Java中parquet.schema.MessageType类的一些代码示例,展示了MessageType类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。MessageType类的具体详情如下:
包路径:parquet.schema.MessageType
类名称:MessageType

MessageType介绍

[英]The root of a schema
[中]模式的根

代码示例

代码示例来源:origin: prestodb/presto

public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes)
{
  return new MessageType("hive_schema", convertTypes(columnNames, columnTypes));
}

代码示例来源:origin: prestodb/presto

public static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType)
{
  if (messageType.containsField(columnName)) {
    return messageType.getType(columnName);
  }
  // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase
  // check for direct match above but if no match found, try case-insensitive match
  for (parquet.schema.Type type : messageType.getFields()) {
    if (type.getName().equalsIgnoreCase(columnName)) {
      return type;
    }
  }
  return null;
}

代码示例来源:origin: prestodb/presto

.collect(toList());
MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);

代码示例来源:origin: prestodb/presto

public static int getFieldIndex(MessageType fileSchema, String name)
{
  try {
    return fileSchema.getFieldIndex(name.toLowerCase(Locale.ENGLISH));
  }
  catch (InvalidRecordException e) {
    for (parquet.schema.Type type : fileSchema.getFields()) {
      if (type.getName().equalsIgnoreCase(name)) {
        return fileSchema.getFieldIndex(type.getName());
      }
    }
    return -1;
  }
}

代码示例来源:origin: prestodb/presto

public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
  {
    if (useParquetColumnNames) {
      return getParquetTypeByName(column.getName(), messageType);
    }

    if (column.getHiveColumnIndex() < messageType.getFieldCount()) {
      return messageType.getType(column.getHiveColumnIndex());
    }
    return null;
  }
}

代码示例来源:origin: julienledem/redelm

public List<ColumnDescriptor> getColumns() {
 List<String[]> paths = this.getPaths(0);
 List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>(paths.size());
 for (String[] path : paths) {
  // TODO: optimize this
  columns.add(new ColumnDescriptor(path, getType(path).asPrimitiveType().getPrimitiveTypeName(), getMaxRepetitionLevel(path), getMaxDefinitionLevel(path)));
 }
 return columns;
}

代码示例来源:origin: com.twitter/parquet-cascading

public SchemaIntersection(MessageType fileSchema, Fields requestedFields) {
 if(requestedFields == Fields.UNKNOWN)
  requestedFields = Fields.ALL;
 Fields newFields = Fields.NONE;
 List<Type> newSchemaFields = new ArrayList<Type>();
 int schemaSize = fileSchema.getFieldCount();
 for (int i = 0; i < schemaSize; i++) {
  Type type = fileSchema.getType(i);
  Fields name = new Fields(type.getName());
  if(requestedFields.contains(name)) {
   newFields = newFields.append(name);
   newSchemaFields.add(type);
  }
 }
 this.sourceFields = newFields;
 this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields);
}

代码示例来源:origin: com.twitter/parquet-cascading

@Override
public void write(TupleEntry record) {
 recordConsumer.startMessage();
 final List<Type> fields = rootSchema.getFields();
 for (int i = 0; i < fields.size(); i++) {
  Type field = fields.get(i);
  if (record == null || record.getObject(field.getName()) == null) {
   continue;
  }
  recordConsumer.startField(field.getName(), i);
  if (field.isPrimitive()) {
   writePrimitive(record, field.asPrimitiveType());
  } else {
   throw new UnsupportedOperationException("Complex type not implemented");
  }
  recordConsumer.endField(field.getName(), i);
 }
 recordConsumer.endMessage();
}

代码示例来源:origin: prestodb/presto

.toArray(String[]::new);
ColumnPath columnPath = ColumnPath.get(path);
PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName();
ColumnChunkMetaData column = ColumnChunkMetaData.get(
    columnPath,

代码示例来源:origin: uber/hudi

/**
 * Returns equivalent Hive table schema read from a parquet file
 *
 * @param messageType : Parquet Schema
 * @return : Hive Table schema read from parquet file MAP[String,String]
 */
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
  throws IOException {
 Map<String, String> schema = Maps.newLinkedHashMap();
 List<Type> parquetFields = messageType.getFields();
 for (Type parquetType : parquetFields) {
  StringBuilder result = new StringBuilder();
  String key = parquetType.getName();
  if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
   result.append(createHiveArray(parquetType, ""));
  } else {
   result.append(convertField(parquetType));
  }
  schema.put(hiveCompatibleFieldName(key, false), result.toString());
 }
 return schema;
}

代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive

private void initializeColumnReaders()
  {
    for (ColumnDescriptor column : requestedSchema.getColumns()) {
      columnReadersMap.put(column, ParquetColumnReader.createReader(column));
    }
  }
}

代码示例来源:origin: prestodb/presto

String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));

代码示例来源:origin: com.twitter/parquet-thrift

public MessageType getConvertedMessageType() {
 // the root should be a GroupType
 if (currentType == null)
  return new MessageType(currentName, new ArrayList<Type>());
 GroupType rootType = currentType.asGroupType();
 return new MessageType(currentName, rootType.getFields());
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

private SchemaCompatibilityValidator(MessageType schema) {
 for (ColumnDescriptor cd : schema.getColumns()) {
  ColumnPath columnPath = ColumnPath.get(cd.getPath());
  columnsAccordingToSchema.put(columnPath, cd);
  OriginalType ot = schema.getType(cd.getPath()).getOriginalType();
  if (ot != null) {
   originalTypes.put(columnPath, ot);
  }
 }
}

代码示例来源:origin: asakusafw/asakusafw

private static List<Mapping> computeMappingByPosition(
    DataModelDescriptor target, MessageType source) {
  if (LOG.isDebugEnabled()) {
    LOG.debug(MessageFormat.format(
        "Mapping columns by their position: model={0}", //$NON-NLS-1$
        target.getDataModelClass().getName()));
  }
  List<ColumnDescriptor> sources = source.getColumns();
  List<? extends PropertyDescriptor> targets = target.getPropertyDescriptors();
  List<Mapping> mappings = new ArrayList<>();
  int limit = Math.min(sources.size(), targets.size());
  for (int i = 0; i < limit; i++) {
    ColumnDescriptor s = sources.get(i);
    Type sType = source.getType(s.getPath());
    PropertyDescriptor t = targets.get(i);
    mappings.add(new Mapping(s, sType, t));
  }
  for (int i = limit, n = sources.size(); i < n; i++) {
    ColumnDescriptor s = sources.get(i);
    Type sType = source.getType(s.getPath());
    mappings.add(new Mapping(s, sType, null));
  }
  for (int i = limit, n = targets.size(); i < n; i++) {
    mappings.add(new Mapping(null, null, targets.get(i)));
  }
  return mappings;
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

public MessageType union(MessageType toMerge, boolean strict) {
 return new MessageType(this.getName(), mergeFields(toMerge, strict));
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

/**
 * {@inheritDoc}
 */
@Override
public void writeToStringBuilder(StringBuilder sb, String indent) {
 sb.append("message ")
   .append(getName())
   .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")")
   .append(" {\n");
 membersDisplayString(sb, "  ");
 sb.append("}\n");
}

代码示例来源:origin: org.apache.tajo/tajo-storage

public void initialize(MessageType requestedSchema, MessageType fileSchema,
            Map<String, String> extraMetadata, Map<String, String> readSupportMetadata,
            Path file, List<BlockMetaData> blocks, Configuration configuration)
  throws IOException {
 this.requestedSchema = requestedSchema;
 this.fileSchema = fileSchema;
 this.file = file;
 this.columnCount = this.requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, extraMetadata, fileSchema,
   new ReadSupport.ReadContext(requestedSchema, readSupportMetadata));
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 reader = new ParquetFileReader(configuration, file, blocks, columns);
 for (BlockMetaData block : blocks) {
  total += block.getRowCount();
 }
 LOG.info("RecordReader initialized will read a total of " + total + " records.");
}

代码示例来源:origin: julienledem/redelm

MessageType parsed = MessageTypeParser.parseMessageType(example);
MessageType manuallyMade =
  new MessageType("Document",
    new PrimitiveType(REQUIRED, INT64, "DocId"),
    new GroupType(OPTIONAL, "Links",
assertEquals(manuallyMade, parsed);
MessageType parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
  new MessageType("m",
    new GroupType(REQUIRED, "a",
      new PrimitiveType(REQUIRED, BINARY, "b")),
parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());

代码示例来源:origin: com.twitter/parquet-tools

public static void showDetails(PrettyPrintWriter out, MessageType type) {
 List<String> cpath = new ArrayList<String>();
 for (Type ftype : type.getFields()) {
  showDetails(out, ftype, 0, type, cpath);
 }
}

相关文章