Search in sources :

Example 16 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class DoubleColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
            }
            dataStream.skip(readOffset);
        }
    }
    Block block;
    if (dataStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(DOUBLE, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(DOUBLE, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : Block(io.trino.spi.block.Block) LongArrayBlock(io.trino.spi.block.LongArrayBlock) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 17 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class LongColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
            }
            dataStream.skip(readOffset);
        }
    }
    Block block;
    if (dataStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(type, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : IntArrayBlock(io.trino.spi.block.IntArrayBlock) Block(io.trino.spi.block.Block) LongArrayBlock(io.trino.spi.block.LongArrayBlock) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) ShortArrayBlock(io.trino.spi.block.ShortArrayBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 18 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class MapColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
            long entrySkipSize = lengthStream.sum(readOffset);
            keyColumnReader.prepareNextRead(toIntExact(entrySkipSize));
            valueColumnReader.prepareNextRead(toIntExact(entrySkipSize));
        }
    }
    // We will use the offsetVector as the buffer to read the length values from lengthStream,
    // and the length values will be converted in-place to an offset vector.
    int[] offsetVector = new int[nextBatchSize + 1];
    boolean[] nullVector = null;
    if (presentStream == null) {
        if (lengthStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
        }
        lengthStream.next(offsetVector, nextBatchSize);
    } else {
        nullVector = new boolean[nextBatchSize];
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
            lengthStream.next(offsetVector, nextBatchSize - nullValues);
            unpackLengthNulls(offsetVector, nullVector, nextBatchSize - nullValues);
        }
    }
    // Calculate the entryCount. Note that the values in the offsetVector are still length values now.
    int entryCount = 0;
    for (int i = 0; i < offsetVector.length - 1; i++) {
        entryCount += offsetVector[i];
    }
    Block keys;
    Block values;
    if (entryCount > 0) {
        keyColumnReader.prepareNextRead(entryCount);
        valueColumnReader.prepareNextRead(entryCount);
        keys = keyColumnReader.readBlock();
        values = blockFactory.createBlock(entryCount, valueColumnReader::readBlock, true);
    } else {
        keys = type.getKeyType().createBlockBuilder(null, 0).build();
        values = type.getValueType().createBlockBuilder(null, 1).build();
    }
    Block[] keyValueBlock = createKeyValueBlock(nextBatchSize, keys, values, offsetVector);
    convertLengthVectorToOffsetVector(offsetVector);
    readOffset = 0;
    nextBatchSize = 0;
    return type.createBlockFromKeyValue(Optional.ofNullable(nullVector), offsetVector, keyValueBlock[0], keyValueBlock[1]);
}
Also used : Block(io.trino.spi.block.Block) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 19 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class SliceDictionaryColumnReader method openRowGroup.

private void openRowGroup() throws IOException {
    // read the dictionary
    if (!dictionaryOpen) {
        if (dictionarySize > 0) {
            // resize the dictionary lengths array if necessary
            if (dictionaryLength.length < dictionarySize) {
                dictionaryLength = new int[dictionarySize];
            }
            // read the lengths
            LongInputStream lengthStream = dictionaryLengthStreamSource.openStream();
            if (lengthStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is missing");
            }
            lengthStream.next(dictionaryLength, dictionarySize);
            long dataLength = 0;
            for (int i = 0; i < dictionarySize; i++) {
                dataLength += dictionaryLength[i];
            }
            // we must always create a new dictionary array because the previous dictionary may still be referenced
            dictionaryData = new byte[toIntExact(dataLength)];
            // add one extra entry for null
            dictionaryOffsetVector = new int[dictionarySize + 2];
            // read dictionary values
            ByteArrayInputStream dictionaryDataStream = dictionaryDataStreamSource.openStream();
            readDictionary(dictionaryDataStream, dictionarySize, dictionaryLength, 0, dictionaryData, dictionaryOffsetVector, maxCodePointCount, isCharType);
        } else {
            dictionaryData = EMPTY_DICTIONARY_DATA;
            dictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
        }
    }
    dictionaryOpen = true;
    setDictionaryBlockData(dictionaryData, dictionaryOffsetVector, dictionarySize + 1);
    presentStream = presentStreamSource.openStream();
    dataStream = dataStreamSource.openStream();
    rowGroupOpen = true;
}
Also used : ByteArrayInputStream(io.trino.orc.stream.ByteArrayInputStream) OrcCorruptionException(io.trino.orc.OrcCorruptionException) LongInputStream(io.trino.orc.stream.LongInputStream)

Example 20 with OrcCorruptionException

use of io.trino.orc.OrcCorruptionException in project trino by trinodb.

the class LongInputStreamV1 method next.

@Override
public void next(int[] values, int items) throws IOException {
    int offset = 0;
    while (items > 0) {
        if (used == numLiterals) {
            numLiterals = 0;
            used = 0;
            readValues();
        }
        int chunkSize = min(numLiterals - used, items);
        if (repeat) {
            for (int i = 0; i < chunkSize; i++) {
                long literal = literals[0] + ((used + i) * delta);
                int value = (int) literal;
                if (literal != value) {
                    throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
                }
                values[offset + i] = value;
            }
        } else {
            for (int i = 0; i < chunkSize; i++) {
                long literal = literals[used + i];
                int value = (int) literal;
                if (literal != value) {
                    throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
                }
                values[offset + i] = value;
            }
        }
        used += chunkSize;
        offset += chunkSize;
        items -= chunkSize;
    }
}
Also used : OrcCorruptionException(io.trino.orc.OrcCorruptionException) LongStreamV1Checkpoint(io.trino.orc.checkpoint.LongStreamV1Checkpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint)

Aggregations

OrcCorruptionException (io.trino.orc.OrcCorruptionException)33 Block (io.trino.spi.block.Block)13 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)11 LongStreamCheckpoint (io.trino.orc.checkpoint.LongStreamCheckpoint)7 DecimalStreamCheckpoint (io.trino.orc.checkpoint.DecimalStreamCheckpoint)4 InputStreamCheckpoint.createInputStreamCheckpoint (io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)4 LongStreamV2Checkpoint (io.trino.orc.checkpoint.LongStreamV2Checkpoint)4 ByteArrayBlock (io.trino.spi.block.ByteArrayBlock)4 LongArrayBlock (io.trino.spi.block.LongArrayBlock)4 Slice (io.airlift.slice.Slice)3 LongStreamV1Checkpoint (io.trino.orc.checkpoint.LongStreamV1Checkpoint)3 ByteStreamCheckpoint (io.trino.orc.checkpoint.ByteStreamCheckpoint)2 ReaderUtils.verifyStreamType (io.trino.orc.reader.ReaderUtils.verifyStreamType)2 IntArrayBlock (io.trino.spi.block.IntArrayBlock)2 LazyBlock (io.trino.spi.block.LazyBlock)2 RowBlock (io.trino.spi.block.RowBlock)2 VariableWidthBlock (io.trino.spi.block.VariableWidthBlock)2 RowType (io.trino.spi.type.RowType)2 Type (io.trino.spi.type.Type)2 ColumnReaders.createColumnReader (io.trino.orc.reader.ColumnReaders.createColumnReader)1