Search in sources :

Example 1 with RowGroupDictionaryLengthInputStream

use of com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream in project presto by prestodb.

the class SliceDictionarySelectiveReader method openRowGroup.

private void openRowGroup() throws IOException {
    // read the dictionary
    if (!stripeDictionaryOpen) {
        if (stripeDictionarySize > 0) {
            // resize the dictionary lengths array if necessary
            if (stripeDictionaryLength.length < stripeDictionarySize) {
                stripeDictionaryLength = new int[stripeDictionarySize];
            }
            // read the lengths
            LongInputStream lengthStream = stripeDictionaryLengthStreamSource.openStream();
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is not present");
            }
            lengthStream.nextIntVector(stripeDictionarySize, stripeDictionaryLength, 0);
            long dataLength = 0;
            for (int i = 0; i < stripeDictionarySize; i++) {
                dataLength += stripeDictionaryLength[i];
            }
            dictionaryData = ensureCapacity(dictionaryData, toIntExact(dataLength));
            dictionaryOffsetVector = ensureCapacity(dictionaryOffsetVector, stripeDictionarySize + 2);
            // read dictionary values
            ByteArrayInputStream dictionaryDataStream = stripeDictionaryDataStreamSource.openStream();
            readDictionary(dictionaryDataStream, stripeDictionarySize, stripeDictionaryLength, 0, dictionaryData, dictionaryOffsetVector, maxCodePointCount, isCharType);
        } else {
            dictionaryData = EMPTY_DICTIONARY_DATA;
            dictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
        }
        // If there is no rowgroup dictionary, we only need to wrap the stripe dictionary once per stripe because wrapping dictionary is very expensive.
        dictionaryWrapped = false;
    }
    // read row group dictionary
    RowGroupDictionaryLengthInputStream dictionaryLengthStream = rowGroupDictionaryLengthStreamSource.openStream();
    if (dictionaryLengthStream != null) {
        int rowGroupDictionarySize = dictionaryLengthStream.getEntryCount();
        rowGroupDictionaryLength = ensureCapacity(rowGroupDictionaryLength, rowGroupDictionarySize);
        // read the lengths
        dictionaryLengthStream.nextIntVector(rowGroupDictionarySize, rowGroupDictionaryLength, 0);
        long dataLength = 0;
        for (int i = 0; i < rowGroupDictionarySize; i++) {
            dataLength += rowGroupDictionaryLength[i];
        }
        dictionaryData = ensureCapacity(dictionaryData, dictionaryOffsetVector[stripeDictionarySize] + toIntExact(dataLength), MEDIUM, PRESERVE);
        dictionaryOffsetVector = ensureCapacity(dictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 2, MEDIUM, PRESERVE);
        dictionaryWrapped = false;
        // read dictionary values
        ByteArrayInputStream dictionaryDataStream = rowGroupDictionaryDataStreamSource.openStream();
        readDictionary(dictionaryDataStream, rowGroupDictionarySize, rowGroupDictionaryLength, stripeDictionarySize, dictionaryData, dictionaryOffsetVector, maxCodePointCount, isCharType);
        currentDictionarySize = stripeDictionarySize + rowGroupDictionarySize + 1;
        initiateEvaluationStatus(stripeDictionarySize + rowGroupDictionarySize + 1);
    } else {
        // there is no row group dictionary so use the stripe dictionary
        currentDictionarySize = stripeDictionarySize + 1;
        initiateEvaluationStatus(stripeDictionarySize + 1);
    }
    dictionaryOffsetVector[currentDictionarySize] = dictionaryOffsetVector[currentDictionarySize - 1];
    stripeDictionaryOpen = true;
    presentStream = presentStreamSource.openStream();
    inDictionaryStream = inDictionaryStreamSource.openStream();
    dataStream = dataStreamSource.openStream();
    rowGroupOpen = true;
}
Also used : ByteArrayInputStream(com.facebook.presto.orc.stream.ByteArrayInputStream) RowGroupDictionaryLengthInputStream(com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Example 2 with RowGroupDictionaryLengthInputStream

use of com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream in project presto by prestodb.

the class SliceDictionaryBatchStreamReader method openRowGroup.

private void openRowGroup() throws IOException {
    // read the dictionary
    if (!stripeDictionaryOpen) {
        if (stripeDictionarySize > 0) {
            // resize the dictionary lengths array if necessary
            if (stripeDictionaryLength.length < stripeDictionarySize) {
                stripeDictionaryLength = new int[stripeDictionarySize];
                systemMemoryContext.setBytes(sizeOf(stripeDictionaryLength));
            }
            // read the lengths
            LongInputStream lengthStream = stripeDictionaryLengthStreamSource.openStream();
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is not present");
            }
            lengthStream.next(stripeDictionaryLength, stripeDictionarySize);
            long dataLength = 0;
            for (int i = 0; i < stripeDictionarySize; i++) {
                dataLength += stripeDictionaryLength[i];
            }
            // we must always create a new dictionary array because the previous dictionary may still be referenced
            stripeDictionaryData = new byte[toIntExact(dataLength)];
            systemMemoryContext.setBytes(sizeOf(stripeDictionaryData));
            // add one extra entry for null
            stripeDictionaryOffsetVector = new int[stripeDictionarySize + 2];
            systemMemoryContext.setBytes(sizeOf(stripeDictionaryOffsetVector));
            // read dictionary values
            ByteArrayInputStream dictionaryDataStream = stripeDictionaryDataStreamSource.openStream();
            readDictionary(dictionaryDataStream, stripeDictionarySize, stripeDictionaryLength, 0, stripeDictionaryData, stripeDictionaryOffsetVector, maxCodePointCount, isCharType);
        } else {
            stripeDictionaryData = EMPTY_DICTIONARY_DATA;
            stripeDictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
        }
    }
    stripeDictionaryOpen = true;
    // read row group dictionary
    RowGroupDictionaryLengthInputStream dictionaryLengthStream = rowGroupDictionaryLengthStreamSource.openStream();
    if (dictionaryLengthStream != null) {
        int rowGroupDictionarySize = dictionaryLengthStream.getEntryCount();
        // resize the dictionary lengths array if necessary
        if (rowGroupDictionaryLength.length < rowGroupDictionarySize) {
            rowGroupDictionaryLength = new int[rowGroupDictionarySize];
        }
        // read the lengths
        dictionaryLengthStream.next(rowGroupDictionaryLength, rowGroupDictionarySize);
        long dataLength = 0;
        for (int i = 0; i < rowGroupDictionarySize; i++) {
            dataLength += rowGroupDictionaryLength[i];
        }
        // We must always create a new dictionary array because the previous dictionary may still be referenced
        // The first elements of the dictionary are from the stripe dictionary, then the row group dictionary elements, and then a null
        byte[] rowGroupDictionaryData = Arrays.copyOf(stripeDictionaryData, stripeDictionaryOffsetVector[stripeDictionarySize] + toIntExact(dataLength));
        int[] rowGroupDictionaryOffsetVector = Arrays.copyOf(stripeDictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 2);
        // read dictionary values
        ByteArrayInputStream dictionaryDataStream = rowGroupDictionaryDataStreamSource.openStream();
        readDictionary(dictionaryDataStream, rowGroupDictionarySize, rowGroupDictionaryLength, stripeDictionarySize, rowGroupDictionaryData, rowGroupDictionaryOffsetVector, maxCodePointCount, isCharType);
        setDictionaryBlockData(rowGroupDictionaryData, rowGroupDictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 1);
    } else {
        // there is no row group dictionary so use the stripe dictionary
        setDictionaryBlockData(stripeDictionaryData, stripeDictionaryOffsetVector, stripeDictionarySize + 1);
    }
    presentStream = presentStreamSource.openStream();
    inDictionaryStream = inDictionaryStreamSource.openStream();
    dataStream = dataStreamSource.openStream();
    rowGroupOpen = true;
}
Also used : ByteArrayInputStream(com.facebook.presto.orc.stream.ByteArrayInputStream) RowGroupDictionaryLengthInputStream(com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Aggregations

OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)2 ByteArrayInputStream (com.facebook.presto.orc.stream.ByteArrayInputStream)2 LongInputStream (com.facebook.presto.orc.stream.LongInputStream)2 RowGroupDictionaryLengthInputStream (com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream)2