org.apache.parquet.column.Dictionary.getMaxId()方法的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(10.7k)|赞(0)|评价(0)|浏览(116)

本文整理了Java中org.apache.parquet.column.Dictionary.getMaxId()方法的一些代码示例,展示了Dictionary.getMaxId()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Dictionary.getMaxId()方法的具体详情如下:
包路径:org.apache.parquet.column.Dictionary
类名称:Dictionary
方法名:getMaxId

Dictionary.getMaxId介绍

暂无

代码示例

代码示例来源:origin: apache/hive

@Override
public void setDictionary(Dictionary dictionary) {
 int length = dictionary.getMaxId() + 1;
 lookupTable = new ArrayList<T>();
 for (int i = 0; i < length; i++) {
  lookupTable.add(convert(dictionary.decodeToBinary(i)));
 }
}

代码示例来源:origin: org.apache.parquet/parquet-avro

@Override
@SuppressWarnings("unchecked")
public void setDictionary(Dictionary dictionary) {
 dict = (T[]) new Object[dictionary.getMaxId() + 1];
 for (int i = 0; i <= dictionary.getMaxId(); i++) {
  dict[i] = convert(dictionary.decodeToBinary(i));
 }
}

代码示例来源:origin: org.apache.parquet/parquet-protobuf

@Override
public void setDictionary(Dictionary dictionary) {
 dict = new  Descriptors.EnumValueDescriptor[dictionary.getMaxId() + 1];
 for (int i = 0; i <= dictionary.getMaxId(); i++) {
  Binary binaryValue = dictionary.decodeToBinary(i);
  dict[i] = translateEnumValue(binaryValue);
 }
}

代码示例来源:origin: ai.h2o/h2o-parquet-parser

@Override
public void setDictionary(Dictionary dictionary) {
 _dict = new String[dictionary.getMaxId() + 1];
 for (int i = 0; i <= dictionary.getMaxId(); i++) {
  _dict[i] = dictionary.decodeToBinary(i).toStringUsingUTF8();
 }
}

代码示例来源:origin: dremio/dremio-oss

public static void printDictionary(ColumnDescriptor columnDescriptor, Dictionary localDictionary) {
  System.out.println("Dictionary for column " + columnDescriptor.toString());
  for (int i = 0; i < localDictionary.getMaxId(); ++i) {
   switch (columnDescriptor.getType()) {
    case INT32:
     System.out.println(format("%d: %d", i, localDictionary.decodeToInt(i)));
     break;
    case INT64:
     System.out.println(format("%d: %d", i, localDictionary.decodeToLong(i)));
     break;
    case INT96:
    case BINARY:
    case FIXED_LEN_BYTE_ARRAY:
     System.out.println(format("%d: %s", i, new String(localDictionary.decodeToBinary(i).getBytesUnsafe())));
     break;
    case FLOAT:
     System.out.println(format("%d: %f", i, localDictionary.decodeToFloat(i)));
     break;
    case DOUBLE:
     System.out.println(format("%d: %f", i, localDictionary.decodeToDouble(i)));
     break;
    case BOOLEAN:
     System.out.println(format("%d: %b", i, localDictionary.decodeToBoolean(i)));
     break;
    default:
     break;
   }
  }
 }
}

代码示例来源:origin: Netflix/iceberg

for (int i=0; i<=dict.getMaxId(); i++) {
 switch (col.getType()) {
  case BINARY: dictSet.add((T) conversion.apply(dict.decodeToBinary(i)));

代码示例来源:origin: dremio/dremio-oss

localIdToGlobalId = new int[pageReader.dictionary.getMaxId() + 1];
final VectorContainer vectorContainer = globalDictionaries.getDictionaries().get(schemaElement.getName());
switch (schemaElement.getType()) {
   valueLookup.put(intVector.get(i), i);
  for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
   localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToInt(i));
   valueLookup.put(longVector.get(i), i);
  for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
   localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToLong(i));
  for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
   localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToBinary(i));
  for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
   localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToFloat(i));
  for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
   localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToDouble(i));
  for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
   localIdToGlobalId[i] = pageReader.dictionary.decodeToBoolean(i) ? 1 : 0;
this.dictionaryWidthBits =  BytesUtils.getWidthFromMaxInt(pageReader.dictionary.getMaxId() - 1);

代码示例来源:origin: org.apache.parquet/parquet-hadoop

@SuppressWarnings("unchecked")
private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException {
 ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1);
 DictionaryPage page = dictionaries.readDictionaryPage(col);
 // the chunk may not be dictionary-encoded
 if (page == null) {
  return null;
 }
 Dictionary dict = page.getEncoding().initDictionary(col, page);
 Set dictSet = new HashSet<T>();
 for (int i=0; i<=dict.getMaxId(); i++) {
  switch(meta.getType()) {
   case BINARY: dictSet.add(dict.decodeToBinary(i));
    break;
   case INT32: dictSet.add(dict.decodeToInt(i));
    break;
   case INT64: dictSet.add(dict.decodeToLong(i));
    break;
   case FLOAT: dictSet.add(dict.decodeToFloat(i));
    break;
   case DOUBLE: dictSet.add(dict.decodeToDouble(i));
    break;
   default:
    LOG.warn("Unknown dictionary type{}", meta.getType());
  }
 }
 return (Set<T>) dictSet;
}

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-hadoop

for (int i = 0; i <= dict.getMaxId(); i++) {
 dictSet.add((T) dictValueProvider.apply(i));

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildLongGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final BigIntVector longVector = input.addOrGet(field);
 longVector.allocateNew();
 SortedSet<Long> values = Sets.newTreeSet();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToLong(i));
  }
 }
 if (existingDict != null) {
  final BigIntVector existingDictValues = existingDict.getValueAccessorById(BigIntVector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(existingDictValues.get(i));
  }
 }
 final Iterator<Long> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  longVector.setSafe(recordCount++, iter.next());
 }
 longVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildIntegerGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(32, true), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final IntVector intVector = input.addOrGet(field);
 intVector.allocateNew();
 final SortedSet<Integer> values = Sets.newTreeSet();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToInt(i));
  }
 }
 if (existingDict != null) {
  final IntVector existingDictValues = existingDict.getValueAccessorById(IntVector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(existingDictValues.get(i));
  }
 }
 final Iterator<Integer> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  intVector.setSafe(recordCount++, iter.next());
 }
 intVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildFloatGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final Float4Vector floatVector = input.addOrGet(field);
 floatVector.allocateNew();
 SortedSet<Float> values = Sets.newTreeSet();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToFloat(i));
  }
 }
 if (existingDict != null) {
  final Float4Vector existingDictValues = existingDict.getValueAccessorById(Float4Vector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(existingDictValues.get(i));
  }
 }
 final Iterator<Float> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  floatVector.setSafe(recordCount++, iter.next());
 }
 floatVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildDoubleGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final Float8Vector doubleVector = input.addOrGet(field);
 doubleVector.allocateNew();
 SortedSet<Double> values = Sets.newTreeSet();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToDouble(i));
  }
 }
 if (existingDict != null) {
  final Float8Vector existingDictValues = existingDict.getValueAccessorById(Float8Vector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(existingDictValues.get(i));
  }
 }
 final Iterator<Double> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  doubleVector.setSafe(recordCount++, iter.next());
 }
 doubleVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildBinaryGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Binary(), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final VarBinaryVector binaryVector = input.addOrGet(field);
 binaryVector.allocateNew();
 final SortedSet<Binary> values = new TreeSet<>();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToBinary(i));
  }
 }
 if (existingDict != null) {
  final VarBinaryVector existingDictValues = existingDict.getValueAccessorById(VarBinaryVector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(Binary.fromConstantByteArray(existingDictValues.get(i)));
  }
 }
 final Iterator<Binary> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  final byte[] data = iter.next().getBytes();
  binaryVector.setSafe(recordCount++, data, 0, data.length);
 }
 binaryVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

相关文章