Search in sources :

Example 1 with AbstractColumnReader

use of io.prestosql.orc.reader.AbstractColumnReader in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method advanceToNextRowGroup.

private boolean advanceToNextRowGroup() throws IOException {
    nextRowInGroup = 0;
    if (currentRowGroup >= 0) {
        if (rowGroupStatisticsValidation.isPresent()) {
            OrcWriteValidation.StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build().get());
            statisticsValidation.reset();
        }
    }
    while (!rowGroups.hasNext() && currentStripe < stripes.size()) {
        advanceToNextStripe();
        currentRowGroup = -1;
    }
    if (!rowGroups.hasNext()) {
        currentGroupRowCount = 0;
        return false;
    }
    currentRowGroup++;
    RowGroup localCurrentRowGroup = rowGroups.next();
    currentGroupRowCount = localCurrentRowGroup.getRowCount();
    if (localCurrentRowGroup.getMinAverageRowBytes() > 0) {
        maxBatchSize = toIntExact(min(maxBatchSize, max(1, maxBlockBytes / localCurrentRowGroup.getMinAverageRowBytes())));
    }
    currentPosition = currentStripePosition + localCurrentRowGroup.getRowOffset();
    filePosition = stripeFilePositions.get(currentStripe) + localCurrentRowGroup.getRowOffset();
    // give reader data streams from row group
    InputStreamSources rowGroupStreamSources = localCurrentRowGroup.getStreamSources();
    for (AbstractColumnReader columnReader : columnReaders) {
        if (columnReader != null) {
            if (columnReader instanceof CachingColumnReader || columnReader instanceof ResultCachingSelectiveColumnReader || columnReader instanceof DataCachingSelectiveColumnReader) {
                StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
                streamSourceMeta.setDataSourceId(orcDataSource.getId());
                streamSourceMeta.setLastModifiedTime(orcDataSource.getLastModifiedTime());
                streamSourceMeta.setStripeOffset(stripes.get(currentStripe).getOffset());
                streamSourceMeta.setRowGroupOffset(localCurrentRowGroup.getRowOffset());
                streamSourceMeta.setRowCount(localCurrentRowGroup.getRowCount());
                rowGroupStreamSources.setStreamSourceMeta(streamSourceMeta);
            }
            columnReader.startRowGroup(rowGroupStreamSources);
        }
    }
    return true;
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) ResultCachingSelectiveColumnReader(io.prestosql.orc.reader.ResultCachingSelectiveColumnReader) DataCachingSelectiveColumnReader(io.prestosql.orc.reader.DataCachingSelectiveColumnReader) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader)

Example 2 with AbstractColumnReader

use of io.prestosql.orc.reader.AbstractColumnReader in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method close.

@Override
public void close() throws IOException {
    try (Closer closer = Closer.create()) {
        closer.register(orcDataSource);
        for (AbstractColumnReader column : columnReaders) {
            if (column != null) {
                closer.register(column::close);
            }
        }
    }
    if (writeChecksumBuilder.isPresent()) {
        OrcWriteValidation.WriteChecksum actualChecksum = writeChecksumBuilder.get().build();
        validateWrite(validation -> validation.getChecksum().getTotalRowCount() == actualChecksum.getTotalRowCount(), "Invalid row count");
        List<Long> columnHashes = actualChecksum.getColumnHashes();
        for (int i = 0; i < columnHashes.size(); i++) {
            int columnIndex = i;
            validateWrite(validation -> validation.getChecksum().getColumnHashes().get(columnIndex).equals(columnHashes.get(columnIndex)), "Invalid checksum for column %s", columnIndex);
        }
        validateWrite(validation -> validation.getChecksum().getStripeHash() == actualChecksum.getStripeHash(), "Invalid stripes checksum");
    }
    if (fileStatisticsValidation.isPresent()) {
        Optional<ColumnMetadata<ColumnStatistics>> columnStatistics = fileStatisticsValidation.get().build();
        writeValidation.get().validateFileStatistics(orcDataSource.getId(), columnStatistics);
    }
}
Also used : Closer(com.google.common.io.Closer) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Comparator.comparingLong(java.util.Comparator.comparingLong) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader)

Example 3 with AbstractColumnReader

use of io.prestosql.orc.reader.AbstractColumnReader in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method advanceToNextStripe.

private void advanceToNextStripe() throws IOException {
    currentStripeSystemMemoryContext.close();
    currentStripeSystemMemoryContext = systemMemoryUsage.newAggregatedMemoryContext();
    rowGroups = ImmutableList.<RowGroup>of().iterator();
    if (currentStripe >= 0) {
        if (stripeStatisticsValidation.isPresent()) {
            OrcWriteValidation.StatisticsValidation statisticsValidation = stripeStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build().get());
            statisticsValidation.reset();
        }
    }
    currentStripe++;
    if (currentStripe >= stripes.size()) {
        return;
    }
    if (currentStripe > 0) {
        currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
    }
    StripeInformation stripeInformation = stripes.get(currentStripe);
    validateWriteStripe(stripeInformation.getNumberOfRows());
    Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext);
    if (stripe != null) {
        // Give readers access to dictionary streams
        InputStreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
        ColumnMetadata<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
        for (AbstractColumnReader columnReader : columnReaders) {
            if (columnReader != null) {
                ZoneId fileTimeZone = stripe.getFileTimeZone();
                columnReader.startStripe(fileTimeZone, dictionaryStreamSources, columnEncodings);
            }
        }
        rowGroups = stripe.getRowGroups().iterator();
    }
}
Also used : ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) ZoneId(java.time.ZoneId) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation)

Aggregations

AbstractColumnReader (io.prestosql.orc.reader.AbstractColumnReader)3 InputStreamSources (io.prestosql.orc.stream.InputStreamSources)2 Closer (com.google.common.io.Closer)1 ColumnEncoding (io.prestosql.orc.metadata.ColumnEncoding)1 ColumnMetadata (io.prestosql.orc.metadata.ColumnMetadata)1 StripeInformation (io.prestosql.orc.metadata.StripeInformation)1 CachingColumnReader (io.prestosql.orc.reader.CachingColumnReader)1 DataCachingSelectiveColumnReader (io.prestosql.orc.reader.DataCachingSelectiveColumnReader)1 ResultCachingSelectiveColumnReader (io.prestosql.orc.reader.ResultCachingSelectiveColumnReader)1 StreamSourceMeta (io.prestosql.orc.stream.StreamSourceMeta)1 ZoneId (java.time.ZoneId)1 Comparator.comparingLong (java.util.Comparator.comparingLong)1