本文整理了Java中parquet.schema.MessageType
类的一些代码示例,展示了MessageType
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。MessageType
类的具体详情如下:
包路径:parquet.schema.MessageType
类名称:MessageType
[英]The root of a schema
[中]模式的根
代码示例来源:origin: prestodb/presto
public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes)
{
return new MessageType("hive_schema", convertTypes(columnNames, columnTypes));
}
代码示例来源:origin: prestodb/presto
public static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType)
{
if (messageType.containsField(columnName)) {
return messageType.getType(columnName);
}
// parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase
// check for direct match above but if no match found, try case-insensitive match
for (parquet.schema.Type type : messageType.getFields()) {
if (type.getName().equalsIgnoreCase(columnName)) {
return type;
}
}
return null;
}
代码示例来源:origin: prestodb/presto
.collect(toList());
MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
代码示例来源:origin: prestodb/presto
public static int getFieldIndex(MessageType fileSchema, String name)
{
try {
return fileSchema.getFieldIndex(name.toLowerCase(Locale.ENGLISH));
}
catch (InvalidRecordException e) {
for (parquet.schema.Type type : fileSchema.getFields()) {
if (type.getName().equalsIgnoreCase(name)) {
return fileSchema.getFieldIndex(type.getName());
}
}
return -1;
}
}
代码示例来源:origin: prestodb/presto
public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
{
if (useParquetColumnNames) {
return getParquetTypeByName(column.getName(), messageType);
}
if (column.getHiveColumnIndex() < messageType.getFieldCount()) {
return messageType.getType(column.getHiveColumnIndex());
}
return null;
}
}
代码示例来源:origin: julienledem/redelm
public List<ColumnDescriptor> getColumns() {
List<String[]> paths = this.getPaths(0);
List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>(paths.size());
for (String[] path : paths) {
// TODO: optimize this
columns.add(new ColumnDescriptor(path, getType(path).asPrimitiveType().getPrimitiveTypeName(), getMaxRepetitionLevel(path), getMaxDefinitionLevel(path)));
}
return columns;
}
代码示例来源:origin: com.twitter/parquet-cascading
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) {
if(requestedFields == Fields.UNKNOWN)
requestedFields = Fields.ALL;
Fields newFields = Fields.NONE;
List<Type> newSchemaFields = new ArrayList<Type>();
int schemaSize = fileSchema.getFieldCount();
for (int i = 0; i < schemaSize; i++) {
Type type = fileSchema.getType(i);
Fields name = new Fields(type.getName());
if(requestedFields.contains(name)) {
newFields = newFields.append(name);
newSchemaFields.add(type);
}
}
this.sourceFields = newFields;
this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields);
}
代码示例来源:origin: com.twitter/parquet-cascading
@Override
public void write(TupleEntry record) {
recordConsumer.startMessage();
final List<Type> fields = rootSchema.getFields();
for (int i = 0; i < fields.size(); i++) {
Type field = fields.get(i);
if (record == null || record.getObject(field.getName()) == null) {
continue;
}
recordConsumer.startField(field.getName(), i);
if (field.isPrimitive()) {
writePrimitive(record, field.asPrimitiveType());
} else {
throw new UnsupportedOperationException("Complex type not implemented");
}
recordConsumer.endField(field.getName(), i);
}
recordConsumer.endMessage();
}
代码示例来源:origin: prestodb/presto
.toArray(String[]::new);
ColumnPath columnPath = ColumnPath.get(path);
PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName();
ColumnChunkMetaData column = ColumnChunkMetaData.get(
columnPath,
代码示例来源:origin: uber/hudi
/**
* Returns equivalent Hive table schema read from a parquet file
*
* @param messageType : Parquet Schema
* @return : Hive Table schema read from parquet file MAP[String,String]
*/
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
throws IOException {
Map<String, String> schema = Maps.newLinkedHashMap();
List<Type> parquetFields = messageType.getFields();
for (Type parquetType : parquetFields) {
StringBuilder result = new StringBuilder();
String key = parquetType.getName();
if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
result.append(createHiveArray(parquetType, ""));
} else {
result.append(convertField(parquetType));
}
schema.put(hiveCompatibleFieldName(key, false), result.toString());
}
return schema;
}
代码示例来源:origin: uk.co.nichesolutions.presto/presto-hive
private void initializeColumnReaders()
{
for (ColumnDescriptor column : requestedSchema.getColumns()) {
columnReadersMap.put(column, ParquetColumnReader.createReader(column));
}
}
}
代码示例来源:origin: prestodb/presto
String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));
代码示例来源:origin: com.twitter/parquet-thrift
public MessageType getConvertedMessageType() {
// the root should be a GroupType
if (currentType == null)
return new MessageType(currentName, new ArrayList<Type>());
GroupType rootType = currentType.asGroupType();
return new MessageType(currentName, rootType.getFields());
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
private SchemaCompatibilityValidator(MessageType schema) {
for (ColumnDescriptor cd : schema.getColumns()) {
ColumnPath columnPath = ColumnPath.get(cd.getPath());
columnsAccordingToSchema.put(columnPath, cd);
OriginalType ot = schema.getType(cd.getPath()).getOriginalType();
if (ot != null) {
originalTypes.put(columnPath, ot);
}
}
}
代码示例来源:origin: asakusafw/asakusafw
private static List<Mapping> computeMappingByPosition(
DataModelDescriptor target, MessageType source) {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Mapping columns by their position: model={0}", //$NON-NLS-1$
target.getDataModelClass().getName()));
}
List<ColumnDescriptor> sources = source.getColumns();
List<? extends PropertyDescriptor> targets = target.getPropertyDescriptors();
List<Mapping> mappings = new ArrayList<>();
int limit = Math.min(sources.size(), targets.size());
for (int i = 0; i < limit; i++) {
ColumnDescriptor s = sources.get(i);
Type sType = source.getType(s.getPath());
PropertyDescriptor t = targets.get(i);
mappings.add(new Mapping(s, sType, t));
}
for (int i = limit, n = sources.size(); i < n; i++) {
ColumnDescriptor s = sources.get(i);
Type sType = source.getType(s.getPath());
mappings.add(new Mapping(s, sType, null));
}
for (int i = limit, n = targets.size(); i < n; i++) {
mappings.add(new Mapping(null, null, targets.get(i)));
}
return mappings;
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
public MessageType union(MessageType toMerge, boolean strict) {
return new MessageType(this.getName(), mergeFields(toMerge, strict));
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
/**
* {@inheritDoc}
*/
@Override
public void writeToStringBuilder(StringBuilder sb, String indent) {
sb.append("message ")
.append(getName())
.append(getOriginalType() == null ? "" : " (" + getOriginalType() +")")
.append(" {\n");
membersDisplayString(sb, " ");
sb.append("}\n");
}
代码示例来源:origin: org.apache.tajo/tajo-storage
public void initialize(MessageType requestedSchema, MessageType fileSchema,
Map<String, String> extraMetadata, Map<String, String> readSupportMetadata,
Path file, List<BlockMetaData> blocks, Configuration configuration)
throws IOException {
this.requestedSchema = requestedSchema;
this.fileSchema = fileSchema;
this.file = file;
this.columnCount = this.requestedSchema.getPaths().size();
this.recordConverter = readSupport.prepareForRead(
configuration, extraMetadata, fileSchema,
new ReadSupport.ReadContext(requestedSchema, readSupportMetadata));
List<ColumnDescriptor> columns = requestedSchema.getColumns();
reader = new ParquetFileReader(configuration, file, blocks, columns);
for (BlockMetaData block : blocks) {
total += block.getRowCount();
}
LOG.info("RecordReader initialized will read a total of " + total + " records.");
}
代码示例来源:origin: julienledem/redelm
MessageType parsed = MessageTypeParser.parseMessageType(example);
MessageType manuallyMade =
new MessageType("Document",
new PrimitiveType(REQUIRED, INT64, "DocId"),
new GroupType(OPTIONAL, "Links",
assertEquals(manuallyMade, parsed);
MessageType parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
new MessageType("m",
new GroupType(REQUIRED, "a",
new PrimitiveType(REQUIRED, BINARY, "b")),
parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
代码示例来源:origin: com.twitter/parquet-tools
public static void showDetails(PrettyPrintWriter out, MessageType type) {
List<String> cpath = new ArrayList<String>();
for (Type ftype : type.getFields()) {
showDetails(out, ftype, 0, type, cpath);
}
}
内容来源于网络,如有侵权,请联系作者删除!