本文整理了Java中org.apache.hadoop.hive.ql.io.orc.Reader.getNumberOfRows
方法的一些代码示例,展示了Reader.getNumberOfRows
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Reader.getNumberOfRows
方法的具体详情如下:
包路径:org.apache.hadoop.hive.ql.io.orc.Reader
类名称:Reader
方法名:getNumberOfRows
[英]Get the number of rows in the file.
[中]获取文件中的行数。
代码示例来源:origin: apache/hive
static boolean isAcidKeyIndexValid(Reader reader) {
if (reader.getNumberOfRows() == 0) {
return true;
}
// The number of stripes should match the key index count
List<StripeInformation> stripes = reader.getStripes();
RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
if (keyIndex == null) {
return false;
}
for (int idx = 0; idx < keyIndex.length; ++idx) {
if (keyIndex[idx] == null) {
LOG.info("*** keyIndex[" + idx + "] is null");
return false;
}
}
return stripes.size() == keyIndex.length;
}
代码示例来源:origin: apache/hive
@Override
public SerDeStats getStats() {
stats.setRawDataSize(file.getRawDataSize());
stats.setRowCount(file.getNumberOfRows());
return stats;
}
}
代码示例来源:origin: apache/drill
@Override
public SerDeStats getStats() {
stats.setRawDataSize(file.getRawDataSize());
stats.setRowCount(file.getNumberOfRows());
return stats;
}
}
代码示例来源:origin: apache/hive
@Override
public void next(OrcStruct next) throws IOException {
while(true) {
if(nextFromCurrentFile(next)) {
return;
} else {
if (originalFiles.size() <= nextFileIndex) {
//no more original files to read
nextRecord = null;
recordReader.close();
return;
} else {
rowIdOffset += reader.getNumberOfRows();
recordReader.close();
reader = advanceToNextFile();
if(reader == null) {
nextRecord = null;
return;
}
recordReader = reader.rowsOptions(options, conf);
}
}
}
}
/**
代码示例来源:origin: apache/hive
DeleteReaderValue(Reader deleteDeltaReader, Path deleteDeltaFile,
Reader.Options readerOptions, int bucket, ValidWriteIdList validWriteIdList,
boolean isBucketedTable, final JobConf conf,
OrcRawRecordMerger.KeyInterval keyInterval, OrcSplit orcSplit)
throws IOException {
this.reader = deleteDeltaReader;
this.deleteDeltaFile = deleteDeltaFile;
this.recordReader = deleteDeltaReader.rowsOptions(readerOptions, conf);
this.bucketForSplit = bucket;
final boolean useDecimal64ColumnVector = HiveConf.getVar(conf, ConfVars
.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64");
if (useDecimal64ColumnVector) {
this.batch = deleteDeltaReader.getSchema().createRowBatchV2();
} else {
this.batch = deleteDeltaReader.getSchema().createRowBatch();
}
if (!recordReader.nextBatch(batch)) { // Read the first batch.
this.batch = null; // Oh! the first batch itself was null. Close the reader.
}
this.indexPtrInBatch = 0;
this.validWriteIdList = validWriteIdList;
this.isBucketedTable = isBucketedTable;
if(batch != null) {
checkBucketId();//check 1st batch
}
this.keyInterval = keyInterval;
this.orcSplit = orcSplit;
this.numEvents = deleteDeltaReader.getNumberOfRows();
LOG.debug("Num events stats({},x,x)", numEvents);
}
代码示例来源:origin: apache/hive
if (deleteDeltaReader.getNumberOfRows() <= 0) {
continue; // just a safe check to ensure that we are not reading empty delete files.
totalDeleteEventCount += deleteDeltaReader.getNumberOfRows();
DeleteReaderValue deleteReaderValue = new DeleteReaderValue(deleteDeltaReader,
deleteDeltaFile, readerOptions, bucket, validWriteIdList, isBucketedTable, conf,
代码示例来源:origin: apache/hive
Reader reader = OrcFile.createReader(outputFilePath,
OrcFile.readerOptions(conf).filesystem(localFs));
assertTrue(reader.getNumberOfRows() == rownum);
assertEquals(reader.getCompression(), CompressionKind.ZLIB);
StructObjectInspector soi =
代码示例来源:origin: apache/hive
rowIdOffset += reader.getNumberOfRows();
代码示例来源:origin: apache/hive
if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) {
keyWrapper.setInputPath(path);
keyWrapper.setIsIncompatFile(true);
代码示例来源:origin: apache/drill
if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) {
keyWrapper.setInputPath(path);
keyWrapper.setIsIncompatFile(true);
代码示例来源:origin: apache/hive
@Test
public void emptyFile() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf)
.inspector(inspector)
.stripeSize(1000)
.compress(CompressionKind.NONE)
.bufferSize(100));
writer.close();
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(false, reader.rows().hasNext());
assertEquals(CompressionKind.NONE, reader.getCompression());
assertEquals(0, reader.getNumberOfRows());
assertEquals(0, reader.getCompressionSize());
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(3, reader.getContentLength());
assertEquals(false, reader.getStripes().iterator().hasNext());
}
代码示例来源:origin: apache/hive
assertEquals(reader.getNumberOfRows(), rowCount);
assertEquals(6300000, reader.getRawDataSize());
assertEquals(2, stripeCount);
代码示例来源:origin: apache/hive
OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(1000, reader.getNumberOfRows());
assertEquals(44500, reader.getRawDataSize());
assertEquals(1500, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
代码示例来源:origin: apache/hive
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(50000, reader.getNumberOfRows());
assertEquals(0, reader.getRowIndexStride());
StripeInformation stripe = reader.getStripes().iterator().next();
代码示例来源:origin: apache/hive
@Test
public void testOrcSerDeStatsMap() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcSerDeStats.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
(MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf)
.inspector(inspector)
.stripeSize(10000)
.bufferSize(10000));
for (int row = 0; row < 1000; row++) {
Map<String, Double> test = new HashMap<String, Double>();
for (int i = 0; i < 10; i++) {
test.put("hi" + i, 2.0);
}
writer.addRow(new MapStruct(test));
}
writer.close();
// stats from writer
assertEquals(1000, writer.getNumberOfRows());
assertEquals(950000, writer.getRawDataSize());
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
// stats from reader
assertEquals(1000, reader.getNumberOfRows());
assertEquals(950000, reader.getRawDataSize());
assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1")));
}
代码示例来源:origin: apache/hive
OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(2, reader.getNumberOfRows());
assertEquals(1668, reader.getRawDataSize());
assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1")));
代码示例来源:origin: apache/hive
@Test
public void testOrcSerDeStatsList() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcSerDeStats.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
(ListStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf)
.inspector(inspector)
.stripeSize(10000)
.bufferSize(10000));
for (int row = 0; row < 5000; row++) {
List<String> test = new ArrayList<String>();
for (int i = 0; i < 1000; i++) {
test.add("hi");
}
writer.addRow(new ListStruct(test));
}
writer.close();
assertEquals(5000, writer.getNumberOfRows());
assertEquals(430000000, writer.getRawDataSize());
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
// stats from reader
assertEquals(5000, reader.getNumberOfRows());
assertEquals(430000000, reader.getRawDataSize());
assertEquals(430000000, reader.getRawDataSizeOfColumns(Lists.newArrayList("list1")));
}
代码示例来源:origin: apache/hive
assertEquals(2500, reader.getNumberOfRows());
代码示例来源:origin: apache/hive
assertEquals(2500, reader.getNumberOfRows());
代码示例来源:origin: apache/hive
assertEquals(1, reader.getNumberOfRows());
reader = OrcFile.createReader(bucketPath,
new OrcFile.ReaderOptions(conf).filesystem(fs));
assertEquals(2, reader.getNumberOfRows());
内容来源于网络,如有侵权,请联系作者删除!