org.apache.hadoop.hive.ql.io.orc.Reader.getNumberOfRows()方法的使用及代码示例

x33g5p2x  于2022-01-29 转载在 其他  
字(7.9k)|赞(0)|评价(0)|浏览(186)

本文整理了Java中org.apache.hadoop.hive.ql.io.orc.Reader.getNumberOfRows方法的一些代码示例,展示了Reader.getNumberOfRows的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Reader.getNumberOfRows方法的具体详情如下:
包路径:org.apache.hadoop.hive.ql.io.orc.Reader
类名称:Reader
方法名:getNumberOfRows

Reader.getNumberOfRows介绍

[英]Get the number of rows in the file.
[中]获取文件中的行数。

代码示例

代码示例来源:origin: apache/hive

static boolean isAcidKeyIndexValid(Reader reader) {
 if (reader.getNumberOfRows() == 0) {
  return true;
 }
 // The number of stripes should match the key index count
 List<StripeInformation> stripes = reader.getStripes();
 RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
 if (keyIndex == null) {
  return false;
 }
 for (int idx = 0; idx < keyIndex.length; ++idx) {
  if (keyIndex[idx] == null) {
   LOG.info("*** keyIndex[" + idx + "] is null");
   return false;
  }
 }
 return stripes.size() == keyIndex.length;
}

代码示例来源:origin: apache/hive

@Override
 public SerDeStats getStats() {
  stats.setRawDataSize(file.getRawDataSize());
  stats.setRowCount(file.getNumberOfRows());
  return stats;
 }
}

代码示例来源:origin: apache/drill

@Override
 public SerDeStats getStats() {
  stats.setRawDataSize(file.getRawDataSize());
  stats.setRowCount(file.getNumberOfRows());
  return stats;
 }
}

代码示例来源:origin: apache/hive

@Override
public void next(OrcStruct next) throws IOException {
 while(true) {
  if(nextFromCurrentFile(next)) {
   return;
  } else {
   if (originalFiles.size() <= nextFileIndex) {
    //no more original files to read
    nextRecord = null;
    recordReader.close();
    return;
   } else {
    rowIdOffset += reader.getNumberOfRows();
    recordReader.close();
    reader = advanceToNextFile();
    if(reader == null) {
     nextRecord = null;
     return;
    }
    recordReader = reader.rowsOptions(options, conf);
   }
  }
 }
}
/**

代码示例来源:origin: apache/hive

DeleteReaderValue(Reader deleteDeltaReader, Path deleteDeltaFile,
  Reader.Options readerOptions, int bucket, ValidWriteIdList validWriteIdList,
  boolean isBucketedTable, final JobConf conf,
  OrcRawRecordMerger.KeyInterval keyInterval, OrcSplit orcSplit)
  throws IOException {
 this.reader = deleteDeltaReader;
 this.deleteDeltaFile = deleteDeltaFile;
 this.recordReader  = deleteDeltaReader.rowsOptions(readerOptions, conf);
 this.bucketForSplit = bucket;
 final boolean useDecimal64ColumnVector = HiveConf.getVar(conf, ConfVars
  .HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64");
 if (useDecimal64ColumnVector) {
  this.batch = deleteDeltaReader.getSchema().createRowBatchV2();
 } else {
  this.batch = deleteDeltaReader.getSchema().createRowBatch();
 }
 if (!recordReader.nextBatch(batch)) { // Read the first batch.
  this.batch = null; // Oh! the first batch itself was null. Close the reader.
 }
 this.indexPtrInBatch = 0;
 this.validWriteIdList = validWriteIdList;
 this.isBucketedTable = isBucketedTable;
 if(batch != null) {
  checkBucketId();//check 1st batch
 }
 this.keyInterval = keyInterval;
 this.orcSplit = orcSplit;
 this.numEvents = deleteDeltaReader.getNumberOfRows();
 LOG.debug("Num events stats({},x,x)", numEvents);
}

代码示例来源:origin: apache/hive

if (deleteDeltaReader.getNumberOfRows() <= 0) {
 continue; // just a safe check to ensure that we are not reading empty delete files.
totalDeleteEventCount += deleteDeltaReader.getNumberOfRows();
DeleteReaderValue deleteReaderValue = new DeleteReaderValue(deleteDeltaReader,
  deleteDeltaFile, readerOptions, bucket, validWriteIdList, isBucketedTable, conf,

代码示例来源:origin: apache/hive

Reader reader = OrcFile.createReader(outputFilePath,
  OrcFile.readerOptions(conf).filesystem(localFs));
assertTrue(reader.getNumberOfRows() == rownum);
assertEquals(reader.getCompression(), CompressionKind.ZLIB);
StructObjectInspector soi =

代码示例来源:origin: apache/hive

rowIdOffset += reader.getNumberOfRows();

代码示例来源:origin: apache/hive

if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) {
 keyWrapper.setInputPath(path);
 keyWrapper.setIsIncompatFile(true);

代码示例来源:origin: apache/drill

if ((stripeStatistics == null || stripeStatistics.isEmpty()) && reader.getNumberOfRows() > 0) {
 keyWrapper.setInputPath(path);
 keyWrapper.setIsIncompatFile(true);

代码示例来源:origin: apache/hive

@Test
public void emptyFile() throws Exception {
 ObjectInspector inspector;
 synchronized (TestOrcFile.class) {
  inspector = ObjectInspectorFactory.getReflectionObjectInspector
    (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
 }
 Writer writer = OrcFile.createWriter(testFilePath,
                    OrcFile.writerOptions(conf)
                    .inspector(inspector)
                    .stripeSize(1000)
                    .compress(CompressionKind.NONE)
                    .bufferSize(100));
 writer.close();
 Reader reader = OrcFile.createReader(testFilePath,
   OrcFile.readerOptions(conf).filesystem(fs));
 assertEquals(false, reader.rows().hasNext());
 assertEquals(CompressionKind.NONE, reader.getCompression());
 assertEquals(0, reader.getNumberOfRows());
 assertEquals(0, reader.getCompressionSize());
 assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
 assertEquals(3, reader.getContentLength());
 assertEquals(false, reader.getStripes().iterator().hasNext());
}

代码示例来源:origin: apache/hive

assertEquals(reader.getNumberOfRows(), rowCount);
assertEquals(6300000, reader.getRawDataSize());
assertEquals(2, stripeCount);

代码示例来源:origin: apache/hive

OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(1000, reader.getNumberOfRows());
assertEquals(44500, reader.getRawDataSize());
assertEquals(1500, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));

代码示例来源:origin: apache/hive

Reader reader = OrcFile.createReader(testFilePath,
  OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(50000, reader.getNumberOfRows());
assertEquals(0, reader.getRowIndexStride());
StripeInformation stripe = reader.getStripes().iterator().next();

代码示例来源:origin: apache/hive

@Test
public void testOrcSerDeStatsMap() throws Exception {
 ObjectInspector inspector;
 synchronized (TestOrcSerDeStats.class) {
  inspector = ObjectInspectorFactory.getReflectionObjectInspector
    (MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
 }
 Writer writer = OrcFile.createWriter(testFilePath,
   OrcFile.writerOptions(conf)
     .inspector(inspector)
     .stripeSize(10000)
     .bufferSize(10000));
 for (int row = 0; row < 1000; row++) {
  Map<String, Double> test = new HashMap<String, Double>();
  for (int i = 0; i < 10; i++) {
   test.put("hi" + i, 2.0);
  }
  writer.addRow(new MapStruct(test));
 }
 writer.close();
 // stats from writer
 assertEquals(1000, writer.getNumberOfRows());
 assertEquals(950000, writer.getRawDataSize());
 Reader reader = OrcFile.createReader(testFilePath,
   OrcFile.readerOptions(conf).filesystem(fs));
 // stats from reader
 assertEquals(1000, reader.getNumberOfRows());
 assertEquals(950000, reader.getRawDataSize());
 assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1")));
}

代码示例来源:origin: apache/hive

OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(2, reader.getNumberOfRows());
assertEquals(1668, reader.getRawDataSize());
assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1")));

代码示例来源:origin: apache/hive

@Test
public void testOrcSerDeStatsList() throws Exception {
 ObjectInspector inspector;
 synchronized (TestOrcSerDeStats.class) {
  inspector = ObjectInspectorFactory.getReflectionObjectInspector
    (ListStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
 }
 Writer writer = OrcFile.createWriter(testFilePath,
   OrcFile.writerOptions(conf)
     .inspector(inspector)
     .stripeSize(10000)
     .bufferSize(10000));
 for (int row = 0; row < 5000; row++) {
  List<String> test = new ArrayList<String>();
  for (int i = 0; i < 1000; i++) {
   test.add("hi");
  }
  writer.addRow(new ListStruct(test));
 }
 writer.close();
 assertEquals(5000, writer.getNumberOfRows());
 assertEquals(430000000, writer.getRawDataSize());
 Reader reader = OrcFile.createReader(testFilePath,
   OrcFile.readerOptions(conf).filesystem(fs));
 // stats from reader
 assertEquals(5000, reader.getNumberOfRows());
 assertEquals(430000000, reader.getRawDataSize());
 assertEquals(430000000, reader.getRawDataSizeOfColumns(Lists.newArrayList("list1")));
}

代码示例来源:origin: apache/hive

assertEquals(2500, reader.getNumberOfRows());

代码示例来源:origin: apache/hive

assertEquals(2500, reader.getNumberOfRows());

代码示例来源:origin: apache/hive

assertEquals(1, reader.getNumberOfRows());
reader = OrcFile.createReader(bucketPath,
 new OrcFile.ReaderOptions(conf).filesystem(fs));
assertEquals(2, reader.getNumberOfRows());

相关文章