use of io.prestosql.orc.reader.AbstractColumnReader in project hetu-core by openlookeng.
the class AbstractOrcRecordReader method advanceToNextRowGroup.
private boolean advanceToNextRowGroup() throws IOException {
nextRowInGroup = 0;
if (currentRowGroup >= 0) {
if (rowGroupStatisticsValidation.isPresent()) {
OrcWriteValidation.StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get();
long offset = stripes.get(currentStripe).getOffset();
writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build().get());
statisticsValidation.reset();
}
}
while (!rowGroups.hasNext() && currentStripe < stripes.size()) {
advanceToNextStripe();
currentRowGroup = -1;
}
if (!rowGroups.hasNext()) {
currentGroupRowCount = 0;
return false;
}
currentRowGroup++;
RowGroup localCurrentRowGroup = rowGroups.next();
currentGroupRowCount = localCurrentRowGroup.getRowCount();
if (localCurrentRowGroup.getMinAverageRowBytes() > 0) {
maxBatchSize = toIntExact(min(maxBatchSize, max(1, maxBlockBytes / localCurrentRowGroup.getMinAverageRowBytes())));
}
currentPosition = currentStripePosition + localCurrentRowGroup.getRowOffset();
filePosition = stripeFilePositions.get(currentStripe) + localCurrentRowGroup.getRowOffset();
// give reader data streams from row group
InputStreamSources rowGroupStreamSources = localCurrentRowGroup.getStreamSources();
for (AbstractColumnReader columnReader : columnReaders) {
if (columnReader != null) {
if (columnReader instanceof CachingColumnReader || columnReader instanceof ResultCachingSelectiveColumnReader || columnReader instanceof DataCachingSelectiveColumnReader) {
StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
streamSourceMeta.setDataSourceId(orcDataSource.getId());
streamSourceMeta.setLastModifiedTime(orcDataSource.getLastModifiedTime());
streamSourceMeta.setStripeOffset(stripes.get(currentStripe).getOffset());
streamSourceMeta.setRowGroupOffset(localCurrentRowGroup.getRowOffset());
streamSourceMeta.setRowCount(localCurrentRowGroup.getRowCount());
rowGroupStreamSources.setStreamSourceMeta(streamSourceMeta);
}
columnReader.startRowGroup(rowGroupStreamSources);
}
}
return true;
}
use of io.prestosql.orc.reader.AbstractColumnReader in project hetu-core by openlookeng.
the class AbstractOrcRecordReader method close.
@Override
public void close() throws IOException {
try (Closer closer = Closer.create()) {
closer.register(orcDataSource);
for (AbstractColumnReader column : columnReaders) {
if (column != null) {
closer.register(column::close);
}
}
}
if (writeChecksumBuilder.isPresent()) {
OrcWriteValidation.WriteChecksum actualChecksum = writeChecksumBuilder.get().build();
validateWrite(validation -> validation.getChecksum().getTotalRowCount() == actualChecksum.getTotalRowCount(), "Invalid row count");
List<Long> columnHashes = actualChecksum.getColumnHashes();
for (int i = 0; i < columnHashes.size(); i++) {
int columnIndex = i;
validateWrite(validation -> validation.getChecksum().getColumnHashes().get(columnIndex).equals(columnHashes.get(columnIndex)), "Invalid checksum for column %s", columnIndex);
}
validateWrite(validation -> validation.getChecksum().getStripeHash() == actualChecksum.getStripeHash(), "Invalid stripes checksum");
}
if (fileStatisticsValidation.isPresent()) {
Optional<ColumnMetadata<ColumnStatistics>> columnStatistics = fileStatisticsValidation.get().build();
writeValidation.get().validateFileStatistics(orcDataSource.getId(), columnStatistics);
}
}
use of io.prestosql.orc.reader.AbstractColumnReader in project hetu-core by openlookeng.
the class AbstractOrcRecordReader method advanceToNextStripe.
private void advanceToNextStripe() throws IOException {
currentStripeSystemMemoryContext.close();
currentStripeSystemMemoryContext = systemMemoryUsage.newAggregatedMemoryContext();
rowGroups = ImmutableList.<RowGroup>of().iterator();
if (currentStripe >= 0) {
if (stripeStatisticsValidation.isPresent()) {
OrcWriteValidation.StatisticsValidation statisticsValidation = stripeStatisticsValidation.get();
long offset = stripes.get(currentStripe).getOffset();
writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build().get());
statisticsValidation.reset();
}
}
currentStripe++;
if (currentStripe >= stripes.size()) {
return;
}
if (currentStripe > 0) {
currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
}
StripeInformation stripeInformation = stripes.get(currentStripe);
validateWriteStripe(stripeInformation.getNumberOfRows());
Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext);
if (stripe != null) {
// Give readers access to dictionary streams
InputStreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
ColumnMetadata<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
for (AbstractColumnReader columnReader : columnReaders) {
if (columnReader != null) {
ZoneId fileTimeZone = stripe.getFileTimeZone();
columnReader.startStripe(fileTimeZone, dictionaryStreamSources, columnEncodings);
}
}
rowGroups = stripe.getRowGroups().iterator();
}
}
Aggregations