Search in sources :

Example 1 with StatisticsValidation

use of io.trino.orc.OrcWriteValidation.StatisticsValidation in project trino by trinodb.

the class OrcRecordReader method advanceToNextStripe.

private void advanceToNextStripe() throws IOException {
    currentStripeMemoryContext.close();
    currentStripeMemoryContext = memoryUsage.newAggregatedMemoryContext();
    rowGroups = ImmutableList.<RowGroup>of().iterator();
    if (currentStripe >= 0) {
        if (stripeStatisticsValidation.isPresent()) {
            StatisticsValidation statisticsValidation = stripeStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build().get());
            statisticsValidation.reset();
        }
    }
    currentStripe++;
    if (currentStripe >= stripes.size()) {
        return;
    }
    if (currentStripe > 0) {
        currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
    }
    StripeInformation stripeInformation = stripes.get(currentStripe);
    validateWriteStripe(stripeInformation.getNumberOfRows());
    Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeMemoryContext);
    if (stripe != null) {
        // Give readers access to dictionary streams
        InputStreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
        ColumnMetadata<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
        ZoneId fileTimeZone = stripe.getFileTimeZone();
        for (ColumnReader column : columnReaders) {
            if (column != null) {
                column.startStripe(fileTimeZone, dictionaryStreamSources, columnEncodings);
            }
        }
        rowGroups = stripe.getRowGroups().iterator();
    }
    orcDataSourceMemoryUsage.setBytes(orcDataSource.getRetainedSize());
}
Also used : ColumnEncoding(io.trino.orc.metadata.ColumnEncoding) InputStreamSources(io.trino.orc.stream.InputStreamSources) ZoneId(java.time.ZoneId) StatisticsValidation(io.trino.orc.OrcWriteValidation.StatisticsValidation) ColumnReader(io.trino.orc.reader.ColumnReader) ColumnReaders.createColumnReader(io.trino.orc.reader.ColumnReaders.createColumnReader) StripeInformation(io.trino.orc.metadata.StripeInformation)

Example 2 with StatisticsValidation

use of io.trino.orc.OrcWriteValidation.StatisticsValidation in project trino by trinodb.

the class OrcRecordReader method advanceToNextRowGroup.

private boolean advanceToNextRowGroup() throws IOException {
    nextRowInGroup = 0;
    if (currentRowGroup >= 0) {
        if (rowGroupStatisticsValidation.isPresent()) {
            StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build().get());
            statisticsValidation.reset();
        }
    }
    while (!rowGroups.hasNext() && currentStripe < stripes.size()) {
        advanceToNextStripe();
        currentRowGroup = -1;
    }
    if (!rowGroups.hasNext()) {
        currentGroupRowCount = 0;
        return false;
    }
    currentRowGroup++;
    RowGroup currentRowGroup = rowGroups.next();
    currentGroupRowCount = currentRowGroup.getRowCount();
    if (currentRowGroup.getMinAverageRowBytes() > 0) {
        maxBatchSize = toIntExact(min(maxBatchSize, max(1, maxBlockBytes / currentRowGroup.getMinAverageRowBytes())));
    }
    currentPosition = currentStripePosition + currentRowGroup.getRowOffset();
    filePosition = stripeFilePositions.get(currentStripe) + currentRowGroup.getRowOffset();
    // give reader data streams from row group
    InputStreamSources rowGroupStreamSources = currentRowGroup.getStreamSources();
    for (ColumnReader column : columnReaders) {
        if (column != null) {
            column.startRowGroup(rowGroupStreamSources);
        }
    }
    return true;
}
Also used : InputStreamSources(io.trino.orc.stream.InputStreamSources) StatisticsValidation(io.trino.orc.OrcWriteValidation.StatisticsValidation) ColumnReader(io.trino.orc.reader.ColumnReader) ColumnReaders.createColumnReader(io.trino.orc.reader.ColumnReaders.createColumnReader)

Aggregations

StatisticsValidation (io.trino.orc.OrcWriteValidation.StatisticsValidation)2 ColumnReader (io.trino.orc.reader.ColumnReader)2 ColumnReaders.createColumnReader (io.trino.orc.reader.ColumnReaders.createColumnReader)2 InputStreamSources (io.trino.orc.stream.InputStreamSources)2 ColumnEncoding (io.trino.orc.metadata.ColumnEncoding)1 StripeInformation (io.trino.orc.metadata.StripeInformation)1 ZoneId (java.time.ZoneId)1