本文整理了Java中org.apache.hadoop.hive.ql.io.orc.Reader
类的一些代码示例,展示了Reader
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Reader
类的具体详情如下:
包路径:org.apache.hadoop.hive.ql.io.orc.Reader
类名称:Reader
[英]The interface for reading ORC files. One Reader can support multiple concurrent RecordReader.
[中]用于读取ORC文件的接口。一个读卡器可以支持多个并发RecordReader。
代码示例来源:origin: apache/hive
.compress(reader.getCompression())
.version(reader.getFileVersion())
.rowIndexStride(reader.getRowIndexStride())
.inspector(reader.getObjectInspector());
if (reader.getCompression() != org.apache.hadoop.hive.ql.io.orc.CompressionKind.NONE) {
writerOptions.bufferSize(reader.getCompressionSize()).enforceBufferSize();
List<StripeInformation> stripes = reader.getStripes();
List<StripeStatistics> stripeStats = reader.getOrcProtoStripeStatistics();
long lastRow = reader.getNumberOfRows() - 1;
try (RecordReader rr = reader.rows()) {
rr.seekToRow(lastRow);
OrcStruct row = (OrcStruct) rr.next(null);
StructObjectInspector soi = (StructObjectInspector) reader.getObjectInspector();
for (String metadataKey : reader.getMetadataKeys()) {
if (!metadataKey.equals(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) {
writer.addUserMetadata(metadataKey, reader.getMetadataValue(metadataKey));
代码示例来源:origin: apache/hive
private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException {
//create reader, look at footer
//no need to check side file since it can only be in a streaming ingest delta
Reader orcReader = OrcFile.createReader(bucket.getPath(), OrcFile.readerOptions(fs.getConf()).filesystem(fs));
if (orcReader.hasMetadataValue(ACID_STATS)) {
try {
ByteBuffer val = orcReader.getMetadataValue(ACID_STATS).duplicate();
String acidStats = utf8Decoder.decode(val).toString();
String[] parts = acidStats.split(",");
long updates = Long.parseLong(parts[1]);
long deletes = Long.parseLong(parts[2]);
return deletes > 0 || updates > 0;
} catch (CharacterCodingException e) {
throw new IllegalArgumentException("Bad string encoding for " + ACID_STATS, e);
}
} else {
throw new IllegalStateException("AcidStats missing in " + bucket.getPath());
}
}
代码示例来源:origin: apache/drill
if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) {
keyWrapper.setInputPath(path);
keyWrapper.setIsIncompatFile(true);
keyWrapper.setCompression(reader.getCompressionKind());
keyWrapper.setCompressBufferSize(reader.getCompressionSize());
keyWrapper.setVersion(reader.getFileVersion());
keyWrapper.setRowIndexStride(reader.getRowIndexStride());
keyWrapper.setTypes(reader.getTypes());
} else {
stripeIdx++;
代码示例来源:origin: apache/hive
static boolean isAcidKeyIndexValid(Reader reader) {
if (reader.getNumberOfRows() == 0) {
return true;
}
// The number of stripes should match the key index count
List<StripeInformation> stripes = reader.getStripes();
RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
if (keyIndex == null) {
return false;
}
for (int idx = 0; idx < keyIndex.length; ++idx) {
if (keyIndex[idx] == null) {
LOG.info("*** keyIndex[" + idx + "] is null");
return false;
}
}
return stripes.size() == keyIndex.length;
}
代码示例来源:origin: apache/hive
@Override
public SerDeStats getStats() {
stats.setRawDataSize(file.getRawDataSize());
stats.setRowCount(file.getNumberOfRows());
return stats;
}
}
代码示例来源:origin: com.facebook.presto/presto-raptor
Reader reader = createReader(fileSystem, path(input));
if (reader.getNumberOfRows() < rowsToDelete.length()) {
throw new IOException("File has fewer rows than deletion vector");
if (reader.getNumberOfRows() == deleteRowCount) {
return new OrcFileInfo(0, 0);
if (reader.getNumberOfRows() >= Integer.MAX_VALUE) {
throw new IOException("File has too many rows");
int inputRowCount = toIntExact(reader.getNumberOfRows());
.compress(reader.getCompression())
.inspector(reader.getObjectInspector());
try (Closer<RecordReader, IOException> recordReader = closer(reader.rows(), RecordReader::close);
Closer<Writer, IOException> writer = closer(createWriter(path(output), writerOptions), Writer::close)) {
if (reader.hasMetadataValue(OrcFileMetadata.KEY)) {
ByteBuffer orcFileMetadata = reader.getMetadataValue(OrcFileMetadata.KEY);
writer.get().addUserMetadata(OrcFileMetadata.KEY, orcFileMetadata);
代码示例来源:origin: apache/hive
private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException {
org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration());
Reader reader = OrcFile.createReader(orcFile,
OrcFile.readerOptions(conf).filesystem(fs));
RecordReader rows = reader.rows();
StructObjectInspector inspector = (StructObjectInspector) reader
.getObjectInspector();
System.out.format("Found Bucket File : %s \n", orcFile.getName());
ArrayList<SampleRec> result = new ArrayList<SampleRec>();
while (rows.hasNext()) {
Object row = rows.next(null);
SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5];
result.add(rec);
}
return result;
}
代码示例来源:origin: com.facebook.presto.hive/hive-apache
Path path = new Path(filename);
Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
System.out.println("File Version: " + reader.getFileVersion().getName() +
" with " + reader.getWriterVersion());
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
System.out.println("Rows: " + reader.getNumberOfRows());
System.out.println("Compression: " + reader.getCompression());
if (reader.getCompression() != CompressionKind.NONE) {
System.out.println("Compression size: " + reader.getCompressionSize());
System.out.println("Type: " + reader.getObjectInspector().getTypeName());
System.out.println("\nStripe Statistics:");
Metadata metadata = reader.getMetadata();
for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
System.out.println(" Stripe " + (n + 1) + ":");
ColumnStatistics[] stats = reader.getStatistics();
int colCount = stats.length;
System.out.println("\nFile Statistics:");
for (StripeInformation stripe : reader.getStripes()) {
++stripeIx;
long stripeStart = stripe.getOffset();
代码示例来源:origin: apache/hive
OrcFile.readerOptions(conf).filesystem(fs));
StructObjectInspector readerInspector =
(StructObjectInspector) reader.getObjectInspector();
List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector.
getStructFieldRef("dec").getFieldObjectInspector();
RecordReader rows = reader.rows();
while (rows.hasNext()) {
Object row = rows.next(null);
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(2, stats[0].getNumberOfValues());
assertEquals(0, stats[1].getNumberOfValues());
代码示例来源:origin: apache/hive
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(5077, reader.getNumberOfRows());
DecimalColumnStatistics stats =
(DecimalColumnStatistics) reader.getStatistics()[5];
assertEquals(71, stats.getNumberOfValues());
assertEquals(HiveDecimal.create("-5643.234"), stats.getMinimum());
int rowCount = 0;
long currentOffset = -1;
for(StripeInformation stripe: reader.getStripes()) {
stripeCount += 1;
rowCount += stripe.getNumberOfRows();
assertEquals(reader.getNumberOfRows(), rowCount);
assertEquals(2, stripeCount);
assertEquals(reader.getContentLength(), currentOffset);
RecordReader rows = reader.rows();
assertEquals(0, rows.getRowNumber());
assertEquals(0.0, rows.getProgress(), 0.000001);
row = (OrcStruct) rows.next(null);
assertEquals(1, rows.getRowNumber());
inspector = reader.getObjectInspector();
assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
inspector.getTypeName());
assertEquals(false, rows.hasNext());
assertEquals(1.0, rows.getProgress(), 0.00001);
assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
代码示例来源:origin: apache/hive
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(4, reader.getNumberOfRows());
assertEquals(273, reader.getRawDataSize());
assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
assertEquals(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
assertEquals(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(4, stats[0].getNumberOfValues());
assertEquals("count: 4 hasNull: false", stats[0].toString());
(StructObjectInspector) reader.getObjectInspector();
assertEquals(ObjectInspector.Category.STRUCT,
readerInspector.getCategory());
StringObjectInspector st = (StringObjectInspector) readerInspector.
getStructFieldRef("string1").getFieldObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);
代码示例来源:origin: apache/hive
Reader reader = OrcFile.createReader(outputFilePath,
OrcFile.readerOptions(conf).filesystem(localFs));
assertTrue(reader.getNumberOfRows() == rownum);
assertEquals(reader.getCompression(), CompressionKind.ZLIB);
StructObjectInspector soi =
(StructObjectInspector)reader.getObjectInspector();
StructTypeInfo ti =
(StructTypeInfo)TypeInfoUtils.getTypeInfoFromObjectInspector(soi);
PrimitiveObjectInspector.PrimitiveCategory.STRING);
RecordReader rows = reader.rows();
Object row = rows.next(null);
代码示例来源:origin: apache/hive
int rowCount = 0;
long currentOffset = -1;
for(StripeInformation stripe : reader.getStripes()) {
stripeCount += 1;
rowCount += stripe.getNumberOfRows();
assertEquals(reader.getNumberOfRows(), rowCount);
assertEquals(2, stripeCount);
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(7500, stats[1].getNumberOfValues());
assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
.getObjectInspector();
assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
StringObjectInspector mk = (StringObjectInspector) ma
.getMapKeyObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);
代码示例来源:origin: apache/hive
@Test
public void emptyFile() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf)
.inspector(inspector)
.stripeSize(1000)
.compress(CompressionKind.NONE)
.bufferSize(100));
writer.close();
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(false, reader.rows().hasNext());
assertEquals(CompressionKind.NONE, reader.getCompression());
assertEquals(0, reader.getNumberOfRows());
assertEquals(0, reader.getCompressionSize());
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(3, reader.getContentLength());
assertEquals(false, reader.getStripes().iterator().hasNext());
}
代码示例来源:origin: apache/hive
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(COUNT, reader.getNumberOfRows());
RecordReader rows = reader.rows();
OrcStruct row = null;
for(int i=COUNT-1; i >= 0; --i) {
reader.getStripes().iterator();
long offsetOfStripe2 = 0;
long offsetOfStripe4 = 0;
boolean[] columns = new boolean[reader.getStatistics().length];
columns[5] = true; // long colulmn
columns[9] = true; // text column
rows = reader.rowsOptions(new Reader.Options()
.range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2)
.include(columns));
代码示例来源:origin: apache/hive
.version(fileVersion)
.rowIndexStride(rowIndexStride)
.inspector(reader.getObjectInspector());
代码示例来源:origin: apache/hive
types.add(typeBuilder.build());
Mockito.when(reader.getTypes()).thenReturn(types);
Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class)))
.thenReturn(recordReader);
Mockito.when(recordReader.next(row3)).thenReturn(row5);
Mockito.when(reader.hasMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME))
.thenReturn(true);
Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME))
.thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61"
.getBytes("UTF-8")));
Mockito.when(reader.getStripes())
.thenReturn(createStripes(2, 2, 1));
代码示例来源:origin: apache/hive
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(2, stats[1].getNumberOfValues());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
stats[3].toString());
StripeStatistics ss = reader.getStripeStatistics().get(0);
assertEquals(2, ss.getColumnStatistics()[0].getNumberOfValues());
assertEquals(1, ((BooleanColumnStatistics) ss.getColumnStatistics()[1]).getTrueCount());
(StructObjectInspector) reader.getObjectInspector();
assertEquals(ObjectInspector.Category.STRUCT,
readerInspector.getCategory());
StringObjectInspector mk = (StringObjectInspector)
ma.getMapKeyObjectInspector();
RecordReader rows = reader.rows();
Object row = rows.next(null);
assertNotNull(row);
代码示例来源:origin: org.apache.spark/spark-hive_2.10
public SparkOrcNewRecordReader(Reader file, Configuration conf,
long offset, long length) throws IOException {
List<OrcProto.Type> types = file.getTypes();
numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount();
value = new OrcStruct(numColumns);
this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
length);
this.objectInspector = file.getObjectInspector();
}
代码示例来源:origin: apache/hive
if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) {
keyWrapper.setInputPath(path);
keyWrapper.setIsIncompatFile(true);
keyWrapper.setCompression(reader.getCompressionKind());
keyWrapper.setCompressBufferSize(reader.getCompressionSize());
keyWrapper.setFileVersion(reader.getFileVersion());
keyWrapper.setWriterVersion(reader.getWriterVersion());
keyWrapper.setRowIndexStride(reader.getRowIndexStride());
keyWrapper.setFileSchema(reader.getSchema());
} else {
stripeIdx++;
内容来源于网络,如有侵权,请联系作者删除!