org.apache.hadoop.hive.ql.io.orc.Reader类的使用及代码示例

x33g5p2x  于2022-01-29 转载在 其他  
字(13.5k)|赞(0)|评价(0)|浏览(262)

本文整理了Java中org.apache.hadoop.hive.ql.io.orc.Reader类的一些代码示例,展示了Reader类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Reader类的具体详情如下:
包路径:org.apache.hadoop.hive.ql.io.orc.Reader
类名称:Reader

Reader介绍

[英]The interface for reading ORC files. One Reader can support multiple concurrent RecordReader.
[中]用于读取ORC文件的接口。一个读卡器可以支持多个并发RecordReader。

代码示例

代码示例来源:origin: apache/hive

.compress(reader.getCompression())
  .version(reader.getFileVersion())
  .rowIndexStride(reader.getRowIndexStride())
  .inspector(reader.getObjectInspector());
if (reader.getCompression() != org.apache.hadoop.hive.ql.io.orc.CompressionKind.NONE) {
 writerOptions.bufferSize(reader.getCompressionSize()).enforceBufferSize();
 List<StripeInformation> stripes = reader.getStripes();
 List<StripeStatistics> stripeStats = reader.getOrcProtoStripeStatistics();
 long lastRow = reader.getNumberOfRows() - 1;
 try (RecordReader rr = reader.rows()) {
  rr.seekToRow(lastRow);
  OrcStruct row = (OrcStruct) rr.next(null);
  StructObjectInspector soi = (StructObjectInspector) reader.getObjectInspector();
 for (String metadataKey : reader.getMetadataKeys()) {
  if (!metadataKey.equals(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) {
   writer.addUserMetadata(metadataKey, reader.getMetadataValue(metadataKey));

代码示例来源:origin: apache/hive

private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException {
 //create reader, look at footer
 //no need to check side file since it can only be in a streaming ingest delta
 Reader orcReader = OrcFile.createReader(bucket.getPath(), OrcFile.readerOptions(fs.getConf()).filesystem(fs));
 if (orcReader.hasMetadataValue(ACID_STATS)) {
  try {
   ByteBuffer val = orcReader.getMetadataValue(ACID_STATS).duplicate();
   String acidStats = utf8Decoder.decode(val).toString();
   String[] parts = acidStats.split(",");
   long updates = Long.parseLong(parts[1]);
   long deletes = Long.parseLong(parts[2]);
   return deletes > 0 || updates > 0;
  } catch (CharacterCodingException e) {
   throw new IllegalArgumentException("Bad string encoding for " + ACID_STATS, e);
  }
 } else {
  throw new IllegalStateException("AcidStats missing in " + bucket.getPath());
 }
}

代码示例来源:origin: apache/drill

if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) {
 keyWrapper.setInputPath(path);
 keyWrapper.setIsIncompatFile(true);
  keyWrapper.setCompression(reader.getCompressionKind());
  keyWrapper.setCompressBufferSize(reader.getCompressionSize());
  keyWrapper.setVersion(reader.getFileVersion());
  keyWrapper.setRowIndexStride(reader.getRowIndexStride());
  keyWrapper.setTypes(reader.getTypes());
 } else {
  stripeIdx++;

代码示例来源:origin: apache/hive

static boolean isAcidKeyIndexValid(Reader reader) {
 if (reader.getNumberOfRows() == 0) {
  return true;
 }
 // The number of stripes should match the key index count
 List<StripeInformation> stripes = reader.getStripes();
 RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
 if (keyIndex == null) {
  return false;
 }
 for (int idx = 0; idx < keyIndex.length; ++idx) {
  if (keyIndex[idx] == null) {
   LOG.info("*** keyIndex[" + idx + "] is null");
   return false;
  }
 }
 return stripes.size() == keyIndex.length;
}

代码示例来源:origin: apache/hive

@Override
 public SerDeStats getStats() {
  stats.setRawDataSize(file.getRawDataSize());
  stats.setRowCount(file.getNumberOfRows());
  return stats;
 }
}

代码示例来源:origin: com.facebook.presto/presto-raptor

Reader reader = createReader(fileSystem, path(input));
if (reader.getNumberOfRows() < rowsToDelete.length()) {
  throw new IOException("File has fewer rows than deletion vector");
if (reader.getNumberOfRows() == deleteRowCount) {
  return new OrcFileInfo(0, 0);
if (reader.getNumberOfRows() >= Integer.MAX_VALUE) {
  throw new IOException("File has too many rows");
int inputRowCount = toIntExact(reader.getNumberOfRows());
    .compress(reader.getCompression())
    .inspector(reader.getObjectInspector());
try (Closer<RecordReader, IOException> recordReader = closer(reader.rows(), RecordReader::close);
    Closer<Writer, IOException> writer = closer(createWriter(path(output), writerOptions), Writer::close)) {
  if (reader.hasMetadataValue(OrcFileMetadata.KEY)) {
    ByteBuffer orcFileMetadata = reader.getMetadataValue(OrcFileMetadata.KEY);
    writer.get().addUserMetadata(OrcFileMetadata.KEY, orcFileMetadata);

代码示例来源:origin: apache/hive

private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException {
 org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration());
 Reader reader = OrcFile.createReader(orcFile,
     OrcFile.readerOptions(conf).filesystem(fs));
 RecordReader rows = reader.rows();
 StructObjectInspector inspector = (StructObjectInspector) reader
     .getObjectInspector();
 System.out.format("Found Bucket File : %s \n", orcFile.getName());
 ArrayList<SampleRec> result = new ArrayList<SampleRec>();
 while (rows.hasNext()) {
  Object row = rows.next(null);
  SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5];
  result.add(rec);
 }
 return result;
}

代码示例来源:origin: com.facebook.presto.hive/hive-apache

Path path = new Path(filename);
Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
System.out.println("File Version: " + reader.getFileVersion().getName() +
          " with " + reader.getWriterVersion());
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
System.out.println("Rows: " + reader.getNumberOfRows());
System.out.println("Compression: " + reader.getCompression());
if (reader.getCompression() != CompressionKind.NONE) {
 System.out.println("Compression size: " + reader.getCompressionSize());
System.out.println("Type: " + reader.getObjectInspector().getTypeName());
System.out.println("\nStripe Statistics:");
Metadata metadata = reader.getMetadata();
for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
 System.out.println("  Stripe " + (n + 1) + ":");
ColumnStatistics[] stats = reader.getStatistics();
int colCount = stats.length;
System.out.println("\nFile Statistics:");
for (StripeInformation stripe : reader.getStripes()) {
 ++stripeIx;
 long stripeStart = stripe.getOffset();

代码示例来源:origin: apache/hive

OrcFile.readerOptions(conf).filesystem(fs));
StructObjectInspector readerInspector =
  (StructObjectInspector) reader.getObjectInspector();
List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector.
  getStructFieldRef("dec").getFieldObjectInspector();
RecordReader rows = reader.rows();
while (rows.hasNext()) {
 Object row = rows.next(null);
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(2, stats[0].getNumberOfValues());
assertEquals(0, stats[1].getNumberOfValues());

代码示例来源:origin: apache/hive

assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(5077, reader.getNumberOfRows());
DecimalColumnStatistics stats =
  (DecimalColumnStatistics) reader.getStatistics()[5];
assertEquals(71, stats.getNumberOfValues());
assertEquals(HiveDecimal.create("-5643.234"), stats.getMinimum());
int rowCount = 0;
long currentOffset = -1;
for(StripeInformation stripe: reader.getStripes()) {
 stripeCount += 1;
 rowCount += stripe.getNumberOfRows();
assertEquals(reader.getNumberOfRows(), rowCount);
assertEquals(2, stripeCount);
assertEquals(reader.getContentLength(), currentOffset);
RecordReader rows = reader.rows();
assertEquals(0, rows.getRowNumber());
assertEquals(0.0, rows.getProgress(), 0.000001);
row = (OrcStruct) rows.next(null);
assertEquals(1, rows.getRowNumber());
inspector = reader.getObjectInspector();
assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
  inspector.getTypeName());
assertEquals(false, rows.hasNext());
assertEquals(1.0, rows.getProgress(), 0.00001);
assertEquals(reader.getNumberOfRows(), rows.getRowNumber());

代码示例来源:origin: apache/hive

Reader reader = OrcFile.createReader(testFilePath,
  OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(4, reader.getNumberOfRows());
assertEquals(273, reader.getRawDataSize());
assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
assertEquals(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
assertEquals(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(4, stats[0].getNumberOfValues());
assertEquals("count: 4 hasNull: false", stats[0].toString());
  (StructObjectInspector) reader.getObjectInspector();
assertEquals(ObjectInspector.Category.STRUCT,
  readerInspector.getCategory());
StringObjectInspector st = (StringObjectInspector) readerInspector.
  getStructFieldRef("string1").getFieldObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);

代码示例来源:origin: apache/hive

Reader reader = OrcFile.createReader(outputFilePath,
  OrcFile.readerOptions(conf).filesystem(localFs));
assertTrue(reader.getNumberOfRows() == rownum);
assertEquals(reader.getCompression(), CompressionKind.ZLIB);
StructObjectInspector soi =
  (StructObjectInspector)reader.getObjectInspector();
StructTypeInfo ti =
  (StructTypeInfo)TypeInfoUtils.getTypeInfoFromObjectInspector(soi);
  PrimitiveObjectInspector.PrimitiveCategory.STRING);
RecordReader rows = reader.rows();
Object row = rows.next(null);

代码示例来源:origin: apache/hive

int rowCount = 0;
long currentOffset = -1;
for(StripeInformation stripe : reader.getStripes()) {
 stripeCount += 1;
 rowCount += stripe.getNumberOfRows();
assertEquals(reader.getNumberOfRows(), rowCount);
assertEquals(2, stripeCount);
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(7500, stats[1].getNumberOfValues());
assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
  .getObjectInspector();
assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
StringObjectInspector mk = (StringObjectInspector) ma
  .getMapKeyObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);

代码示例来源:origin: apache/hive

@Test
public void emptyFile() throws Exception {
 ObjectInspector inspector;
 synchronized (TestOrcFile.class) {
  inspector = ObjectInspectorFactory.getReflectionObjectInspector
    (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
 }
 Writer writer = OrcFile.createWriter(testFilePath,
                    OrcFile.writerOptions(conf)
                    .inspector(inspector)
                    .stripeSize(1000)
                    .compress(CompressionKind.NONE)
                    .bufferSize(100));
 writer.close();
 Reader reader = OrcFile.createReader(testFilePath,
   OrcFile.readerOptions(conf).filesystem(fs));
 assertEquals(false, reader.rows().hasNext());
 assertEquals(CompressionKind.NONE, reader.getCompression());
 assertEquals(0, reader.getNumberOfRows());
 assertEquals(0, reader.getCompressionSize());
 assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
 assertEquals(3, reader.getContentLength());
 assertEquals(false, reader.getStripes().iterator().hasNext());
}

代码示例来源:origin: apache/hive

Reader reader = OrcFile.createReader(testFilePath,
  OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(COUNT, reader.getNumberOfRows());
RecordReader rows = reader.rows();
OrcStruct row = null;
for(int i=COUNT-1; i >= 0; --i) {
 reader.getStripes().iterator();
long offsetOfStripe2 = 0;
long offsetOfStripe4 = 0;
boolean[] columns = new boolean[reader.getStatistics().length];
columns[5] = true; // long colulmn
columns[9] = true; // text column
rows = reader.rowsOptions(new Reader.Options()
  .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2)
  .include(columns));

代码示例来源:origin: apache/hive

.version(fileVersion)
.rowIndexStride(rowIndexStride)
.inspector(reader.getObjectInspector());

代码示例来源:origin: apache/hive

types.add(typeBuilder.build());
Mockito.when(reader.getTypes()).thenReturn(types);
Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class)))
  .thenReturn(recordReader);
Mockito.when(recordReader.next(row3)).thenReturn(row5);
Mockito.when(reader.hasMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME))
  .thenReturn(true);
Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME))
  .thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61"
    .getBytes("UTF-8")));
Mockito.when(reader.getStripes())
  .thenReturn(createStripes(2, 2, 1));

代码示例来源:origin: apache/hive

ColumnStatistics[] stats = reader.getStatistics();
assertEquals(2, stats[1].getNumberOfValues());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
  stats[3].toString());
StripeStatistics ss = reader.getStripeStatistics().get(0);
assertEquals(2, ss.getColumnStatistics()[0].getNumberOfValues());
assertEquals(1, ((BooleanColumnStatistics) ss.getColumnStatistics()[1]).getTrueCount());
  (StructObjectInspector) reader.getObjectInspector();
assertEquals(ObjectInspector.Category.STRUCT,
  readerInspector.getCategory());
StringObjectInspector mk = (StringObjectInspector)
  ma.getMapKeyObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);

代码示例来源:origin: org.apache.spark/spark-hive_2.10

public SparkOrcNewRecordReader(Reader file, Configuration conf,
  long offset, long length) throws IOException {
 List<OrcProto.Type> types = file.getTypes();
 numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount();
 value = new OrcStruct(numColumns);
 this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
   length);
 this.objectInspector = file.getObjectInspector();
}

代码示例来源:origin: apache/hive

if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) {
 keyWrapper.setInputPath(path);
 keyWrapper.setIsIncompatFile(true);
  keyWrapper.setCompression(reader.getCompressionKind());
  keyWrapper.setCompressBufferSize(reader.getCompressionSize());
  keyWrapper.setFileVersion(reader.getFileVersion());
  keyWrapper.setWriterVersion(reader.getWriterVersion());
  keyWrapper.setRowIndexStride(reader.getRowIndexStride());
  keyWrapper.setFileSchema(reader.getSchema());
 } else {
  stripeIdx++;

相关文章