Search in sources :

Example 41 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class BooleanBatchStreamReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
            dataStream.skip(readOffset);
        }
    }
    if (dataStream == null && presentStream != null) {
        presentStream.skip(nextBatchSize);
        Block nullValueBlock = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
        readOffset = 0;
        nextBatchSize = 0;
        return nullValueBlock;
    }
    Block block;
    if (dataStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull, nextBatchSize - nullCount);
        } else {
            block = RunLengthEncodedBlock.create(BOOLEAN, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) ByteArrayBlock(com.facebook.presto.common.block.ByteArrayBlock) Block(com.facebook.presto.common.block.Block) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Example 42 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class SliceDictionaryBatchStreamReader method openRowGroup.

private void openRowGroup() throws IOException {
    // read the dictionary
    if (!stripeDictionaryOpen) {
        if (stripeDictionarySize > 0) {
            // resize the dictionary lengths array if necessary
            if (stripeDictionaryLength.length < stripeDictionarySize) {
                stripeDictionaryLength = new int[stripeDictionarySize];
                systemMemoryContext.setBytes(sizeOf(stripeDictionaryLength));
            }
            // read the lengths
            LongInputStream lengthStream = stripeDictionaryLengthStreamSource.openStream();
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is not present");
            }
            lengthStream.next(stripeDictionaryLength, stripeDictionarySize);
            long dataLength = 0;
            for (int i = 0; i < stripeDictionarySize; i++) {
                dataLength += stripeDictionaryLength[i];
            }
            // we must always create a new dictionary array because the previous dictionary may still be referenced
            stripeDictionaryData = new byte[toIntExact(dataLength)];
            systemMemoryContext.setBytes(sizeOf(stripeDictionaryData));
            // add one extra entry for null
            stripeDictionaryOffsetVector = new int[stripeDictionarySize + 2];
            systemMemoryContext.setBytes(sizeOf(stripeDictionaryOffsetVector));
            // read dictionary values
            ByteArrayInputStream dictionaryDataStream = stripeDictionaryDataStreamSource.openStream();
            readDictionary(dictionaryDataStream, stripeDictionarySize, stripeDictionaryLength, 0, stripeDictionaryData, stripeDictionaryOffsetVector, maxCodePointCount, isCharType);
        } else {
            stripeDictionaryData = EMPTY_DICTIONARY_DATA;
            stripeDictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
        }
    }
    stripeDictionaryOpen = true;
    // read row group dictionary
    RowGroupDictionaryLengthInputStream dictionaryLengthStream = rowGroupDictionaryLengthStreamSource.openStream();
    if (dictionaryLengthStream != null) {
        int rowGroupDictionarySize = dictionaryLengthStream.getEntryCount();
        // resize the dictionary lengths array if necessary
        if (rowGroupDictionaryLength.length < rowGroupDictionarySize) {
            rowGroupDictionaryLength = new int[rowGroupDictionarySize];
        }
        // read the lengths
        dictionaryLengthStream.next(rowGroupDictionaryLength, rowGroupDictionarySize);
        long dataLength = 0;
        for (int i = 0; i < rowGroupDictionarySize; i++) {
            dataLength += rowGroupDictionaryLength[i];
        }
        // We must always create a new dictionary array because the previous dictionary may still be referenced
        // The first elements of the dictionary are from the stripe dictionary, then the row group dictionary elements, and then a null
        byte[] rowGroupDictionaryData = Arrays.copyOf(stripeDictionaryData, stripeDictionaryOffsetVector[stripeDictionarySize] + toIntExact(dataLength));
        int[] rowGroupDictionaryOffsetVector = Arrays.copyOf(stripeDictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 2);
        // read dictionary values
        ByteArrayInputStream dictionaryDataStream = rowGroupDictionaryDataStreamSource.openStream();
        readDictionary(dictionaryDataStream, rowGroupDictionarySize, rowGroupDictionaryLength, stripeDictionarySize, rowGroupDictionaryData, rowGroupDictionaryOffsetVector, maxCodePointCount, isCharType);
        setDictionaryBlockData(rowGroupDictionaryData, rowGroupDictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 1);
    } else {
        // there is no row group dictionary so use the stripe dictionary
        setDictionaryBlockData(stripeDictionaryData, stripeDictionaryOffsetVector, stripeDictionarySize + 1);
    }
    presentStream = presentStreamSource.openStream();
    inDictionaryStream = inDictionaryStreamSource.openStream();
    dataStream = dataStreamSource.openStream();
    rowGroupOpen = true;
}
Also used : ByteArrayInputStream(com.facebook.presto.orc.stream.ByteArrayInputStream) RowGroupDictionaryLengthInputStream(com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Example 43 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class SliceDictionaryBatchStreamReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the length reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
            if (inDictionaryStream != null) {
                inDictionaryStream.skip(readOffset);
            }
            dataStream.skip(readOffset);
        }
    }
    int[] idsVector = new int[nextBatchSize];
    if (presentStream == null) {
        // Data doesn't have nulls
        if (dataStream == null) {
            throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
        }
        if (inDictionaryStream == null) {
            dataStream.next(idsVector, nextBatchSize);
        } else {
            for (int i = 0; i < nextBatchSize; i++) {
                idsVector[i] = toIntExact(dataStream.next());
                if (!inDictionaryStream.nextBit()) {
                    // row group dictionary elements are after the main dictionary
                    idsVector[i] += stripeDictionarySize;
                }
            }
        }
    } else {
        // Data has nulls
        if (dataStream == null) {
            // The only valid case for dataStream is null when data has nulls is that all values are nulls.
            // In that case the only element in the dictionaryBlock is null and the ids in idsVector should
            // be all 0's, so we don't need to update idVector again.
            int nullValues = presentStream.getUnsetBits(nextBatchSize);
            if (nullValues != nextBatchSize) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
            }
        } else {
            for (int i = 0; i < nextBatchSize; i++) {
                if (!presentStream.nextBit()) {
                    // null is the last entry in the slice dictionary
                    idsVector[i] = dictionaryBlock.getPositionCount() - 1;
                } else {
                    idsVector[i] = toIntExact(dataStream.next());
                    if (inDictionaryStream != null && !inDictionaryStream.nextBit()) {
                        // row group dictionary elements are after the main dictionary
                        idsVector[i] += stripeDictionarySize;
                    }
                }
            }
        }
    }
    Block block = new DictionaryBlock(nextBatchSize, dictionaryBlock, idsVector);
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : DictionaryBlock(com.facebook.presto.common.block.DictionaryBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) DictionaryBlock(com.facebook.presto.common.block.DictionaryBlock) Block(com.facebook.presto.common.block.Block) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Example 44 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class TimestampBatchStreamReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the data reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (secondsStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but seconds stream is missing");
            }
            if (nanosStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but nanos stream is missing");
            }
            secondsStream.skip(readOffset);
            nanosStream.skip(readOffset);
        }
    }
    Block block;
    if (secondsStream == null && nanosStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        block = RunLengthEncodedBlock.create(TIMESTAMP, null, nextBatchSize);
    } else if (presentStream == null) {
        block = readNonNullBlock();
    } else {
        boolean[] isNull = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNull);
        if (nullCount == 0) {
            block = readNonNullBlock();
        } else if (nullCount != nextBatchSize) {
            block = readNullBlock(isNull);
        } else {
            block = RunLengthEncodedBlock.create(TIMESTAMP, null, nextBatchSize);
        }
    }
    readOffset = 0;
    nextBatchSize = 0;
    return block;
}
Also used : RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) LongArrayBlock(com.facebook.presto.common.block.LongArrayBlock) Block(com.facebook.presto.common.block.Block) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException)

Example 45 with OrcCorruptionException

use of com.facebook.presto.orc.OrcCorruptionException in project presto by prestodb.

the class LongDictionaryProvider method loadDictionary.

private DictionaryResult loadDictionary(StreamDescriptor streamDescriptor, InputStreamSource<LongInputStream> dictionaryDataStream, long[] dictionaryBuffer, int items) throws IOException {
    // We construct and use the input stream exactly once per stream descriptor per stripe, so we don't
    // really need to cache it.
    LongInputStream inputStream = dictionaryDataStream.openStream();
    if (inputStream == null) {
        throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but data stream is not present for %s", streamDescriptor);
    }
    if (dictionaryBuffer == null || dictionaryBuffer.length < items) {
        dictionaryBuffer = new long[items];
    }
    inputStream.next(dictionaryBuffer, items);
    return new DictionaryResult(dictionaryBuffer, true);
}
Also used : OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Aggregations

OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)53 Block (com.facebook.presto.common.block.Block)12 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)10 BlockBuilderStatus (com.facebook.presto.spi.block.BlockBuilderStatus)10 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)9 BlockBuilder (com.facebook.presto.spi.block.BlockBuilder)8 LongStreamV2Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV2Checkpoint)6 InputStreamCheckpoint.createInputStreamCheckpoint (com.facebook.presto.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint)5 Slice (io.airlift.slice.Slice)5 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)4 LongStreamV1Checkpoint (com.facebook.presto.orc.checkpoint.LongStreamV1Checkpoint)4 ByteStreamCheckpoint (com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint)3 LongInputStream (com.facebook.presto.orc.stream.LongInputStream)3 Block (com.facebook.presto.spi.block.Block)3 ByteArrayBlock (com.facebook.presto.common.block.ByteArrayBlock)2 LongArrayBlock (com.facebook.presto.common.block.LongArrayBlock)2 VariableWidthBlock (com.facebook.presto.common.block.VariableWidthBlock)2 DecimalStreamCheckpoint (com.facebook.presto.orc.checkpoint.DecimalStreamCheckpoint)2 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)2 ByteArrayInputStream (com.facebook.presto.orc.stream.ByteArrayInputStream)2