parquet.schema.Type.isRepetition()方法的使用及代码示例

x33g5p2x  于2022-01-30 转载在 其他  
字(9.5k)|赞(0)|评价(0)|浏览(203)

本文整理了Java中parquet.schema.Type.isRepetition()方法的一些代码示例,展示了Type.isRepetition()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Type.isRepetition()方法的具体详情如下:
包路径:parquet.schema.Type
类名称:Type
方法名:isRepetition

Type.isRepetition介绍

暂无

代码示例

代码示例来源:origin: apache/incubator-gobblin

public void add(int fieldIndex, Primitive value) {
 Type type = this.schema.getType(fieldIndex);
 List<Object> list = this.data[fieldIndex];
 if (!type.isRepetition(REPEATED) && !list.isEmpty()) {
  throw new IllegalStateException(
    "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list);
 } else {
  list.add(value);
 }
}

代码示例来源:origin: prestodb/presto

private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
{
  if (!nested.isRepetition(Repetition.REPEATED)) {
    throw new IllegalArgumentException("Nested type should be repeated: " + nested);
  }
  return new GroupType(repetition, alias, originalType, nested);
}

代码示例来源:origin: prestodb/presto

private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
{
  if (!nested.isRepetition(Repetition.REPEATED)) {
    throw new IllegalArgumentException("Nested type should be repeated: " + nested);
  }
  return new GroupType(repetition, alias, originalType, nested);
}

代码示例来源:origin: prestodb/presto

public static ColumnIO getArrayElementColumn(ColumnIO columnIO)
{
  while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) {
    columnIO = ((GroupColumnIO) columnIO).getChild(0);
  }
  /* If array has a standard 3-level structure with middle level repeated group with a single field:
   *  optional group my_list (LIST) {
   *     repeated group element {
   *        required binary str (UTF8);
   *     };
   *  }
   */
  if (columnIO instanceof GroupColumnIO &&
      columnIO.getType().getOriginalType() == null &&
      ((GroupColumnIO) columnIO).getChildrenCount() == 1 &&
      !columnIO.getName().equals("array") &&
      !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) {
    return ((GroupColumnIO) columnIO).getChild(0);
  }
  /* Backward-compatibility support for 2-level arrays where a repeated field is not a group:
   *   optional group my_list (LIST) {
   *      repeated int32 element;
   *   }
   */
  return columnIO;
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

/**
 * to preserve the difference between empty list and null when optional
 *
 * @param repetition
 * @param alias        name of the field
 * @param originalType
 * @param nested       the nested repeated field
 * @return a group type
 */
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) {
 if (!nested.isRepetition(Repetition.REPEATED)) {
  throw new IllegalArgumentException("Nested type should be repeated: " + nested);
 }
 return new GroupType(repetition, alias, originalType, nested);
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

private void add(int fieldIndex, Primitive value) {
 Type type = schema.getType(fieldIndex);
 List<Object> list = data[fieldIndex];
 if (!type.isRepetition(Type.Repetition.REPEATED)
   && !list.isEmpty()) {
  throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list);
 }
 list.add(value);
}

代码示例来源:origin: org.apache.gobblin/gobblin-parquet

public void add(int fieldIndex, Primitive value) {
 Type type = this.schema.getType(fieldIndex);
 List<Object> list = this.data[fieldIndex];
 if (!type.isRepetition(REPEATED) && !list.isEmpty()) {
  throw new IllegalStateException(
    "field " + fieldIndex + " (" + type.getName() + ") can not have more than one value: " + list);
 } else {
  list.add(value);
 }
}

代码示例来源:origin: org.apache.tajo/tajo-storage

private Schema convertFields(List<Type> parquetFields) {
 List<Column> columns = new ArrayList<Column>();
 for (int i = 0; i < parquetFields.size(); ++i) {
  Type fieldType = parquetFields.get(i);
  if (fieldType.isRepetition(Type.Repetition.REPEATED)) {
   throw new RuntimeException("REPEATED not supported outside LIST or" +
     " MAP. Type: " + fieldType);
  }
  columns.add(convertField(fieldType));
 }
 Column[] columnsArray = new Column[columns.size()];
 columnsArray = columns.toArray(columnsArray);
 return new Schema(columnsArray);
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

private void validateMissingFields(int index) {
 for (int i = previousField.peek() + 1; i < index; i++) {
  Type type = types.peek().asGroupType().getType(i);
  if (type.isRepetition(Repetition.REQUIRED)) {
   throw new InvalidRecordException("required field is missing " + type);
  }
 }
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

@Override
void setLevels(int r, int d, String[] fieldPath, int[] indexFieldPath, List<ColumnIO> repetition, List<ColumnIO> path) {
 super.setLevels(r, d, fieldPath, indexFieldPath, repetition, path);
 for (ColumnIO child : this.children) {
  String[] newFieldPath = Arrays.copyOf(fieldPath, fieldPath.length + 1);
  int[] newIndexFieldPath = Arrays.copyOf(indexFieldPath, indexFieldPath.length + 1);
  newFieldPath[fieldPath.length] = child.getType().getName();
  newIndexFieldPath[indexFieldPath.length] = child.getIndex();
  List<ColumnIO> newRepetition;
  if (child.getType().isRepetition(REPEATED)) {
   newRepetition = new ArrayList<ColumnIO>(repetition);
   newRepetition.add(child);
  } else {
   newRepetition = repetition;
  }
  List<ColumnIO> newPath = new ArrayList<ColumnIO>(path);
  newPath.add(child);
  child.setLevels(
    // the type repetition level increases whenever there's a possible repetition
    child.getType().isRepetition(REPEATED) ? r + 1 : r,
    // the type definition level increases whenever a field can be missing (not required)
    !child.getType().isRepetition(REQUIRED) ? d + 1 : d,
    newFieldPath,
    newIndexFieldPath,
    newRepetition,
    newPath
    );
 }
}

代码示例来源:origin: uber/hudi

/**
 * Returns equivalent Hive table schema read from a parquet file
 *
 * @param messageType : Parquet Schema
 * @return : Hive Table schema read from parquet file MAP[String,String]
 */
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
  throws IOException {
 Map<String, String> schema = Maps.newLinkedHashMap();
 List<Type> parquetFields = messageType.getFields();
 for (Type parquetType : parquetFields) {
  StringBuilder result = new StringBuilder();
  String key = parquetType.getName();
  if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
   result.append(createHiveArray(parquetType, ""));
  } else {
   result.append(convertField(parquetType));
  }
  schema.put(hiveCompatibleFieldName(key, false), result.toString());
 }
 return schema;
}

代码示例来源:origin: com.facebook.presto/presto-hive

private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
{
  if (!nested.isRepetition(Repetition.REPEATED)) {
    throw new IllegalArgumentException("Nested type should be repeated: " + nested);
  }
  return new GroupType(repetition, alias, originalType, nested);
}

代码示例来源:origin: com.facebook.presto/presto-hive

private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
{
  if (!nested.isRepetition(Repetition.REPEATED)) {
    throw new IllegalArgumentException("Nested type should be repeated: " + nested);
  }
  return new GroupType(repetition, alias, originalType, nested);
}

代码示例来源:origin: com.twitter/parquet-pig

private Schema convertFields(List<Type> parquetFields) {
 List<FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
 for (Type parquetType : parquetFields) {
  try{
   FieldSchema innerfieldSchema = getFieldSchema(parquetType);
   if (parquetType.isRepetition(Repetition.REPEATED)) {
    Schema bagSchema = new Schema(Arrays.asList(innerfieldSchema));
    fields.add(new FieldSchema(null, bagSchema, DataType.BAG));
   } else {
    fields.add(innerfieldSchema);
   }
  }
  catch (FrontendException fe) {
   throw new SchemaConversionException("can't convert "+ parquetType, fe);
  }
 }
 return new Schema(fields);
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

private Converter getFieldConverter(Type type, int fieldIndex) {
 Converter converter;
 if (type.isRepetition(Type.Repetition.REPEATED)) {
  if (type.isPrimitive()) {
   converter = new Repeated.RepeatedPrimitiveConverter(
     type.asPrimitiveType(), this, fieldIndex);
  } else {
   converter = new Repeated.RepeatedGroupConverter(
     type.asGroupType(), this, fieldIndex);
  }
  repeatedConverters.add((Repeated) converter);
 } else {
  converter = getConverterFromDescription(type, fieldIndex, this);
 }
 return converter;
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

ColumnIO getParent(int r) {
 if (getRepetitionLevel() == r && getType().isRepetition(Repetition.REPEATED)) {
  return this;
 } else  if (getParent()!=null && getParent().getDefinitionLevel()>=r) {
  return getParent().getParent(r);
 } else {
  throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath()));
 }
}

代码示例来源:origin: org.apache.tajo/tajo-storage

private void writeRecordFields(GroupType schema, Schema tajoSchema,
                Tuple tuple) {
 List<Type> fields = schema.getFields();
 // Parquet ignores Tajo NULL_TYPE columns, so the index may differ.
 int index = 0;
 for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) {
  Column column = tajoSchema.getColumn(tajoIndex);
  if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
   continue;
  }
  Datum datum = tuple.get(tajoIndex);
  Type fieldType = fields.get(index);
  if (!tuple.isNull(tajoIndex)) {
   recordConsumer.startField(fieldType.getName(), index);
   writeValue(fieldType, column, datum);
   recordConsumer.endField(fieldType.getName(), index);
  } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
   throw new RuntimeException("Null-value for required field: " +
     column.getSimpleName());
  }
  ++index;
 }
}

代码示例来源:origin: com.twitter/parquet-pig

int i = 0;
for (Type field : parquetSchema.getFields()) {
 if (field.isPrimitive() && field.isRepetition(Repetition.OPTIONAL)) {
  PrimitiveType primitiveType = field.asPrimitiveType();
  switch (primitiveType.getPrimitiveTypeName()) {

代码示例来源:origin: uber/hudi

if (!elementType.isRepetition(Type.Repetition.REPEATED)) {
 throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);

相关文章