Search in sources :

Example 1 with ByteArrayInputStream

use of com.facebook.presto.orc.stream.ByteArrayInputStream in project presto by prestodb.

the class TestSliceDictionaryColumnWriter method getDictionaryKeys.

private List<String> getDictionaryKeys(List<String> values, OrcEncoding orcEncoding, boolean sortDictionaryKeys) throws IOException {
    DictionaryColumnWriter writer = getDictionaryColumnWriter(orcEncoding, sortDictionaryKeys);
    for (int index = 0; index < values.size(); ) {
        int endIndex = Math.min(index + 10_000, values.size());
        BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 10_000);
        while (index < endIndex) {
            VARCHAR.writeSlice(blockBuilder, utf8Slice(values.get(index++)));
        }
        writer.beginRowGroup();
        writer.writeBlock(blockBuilder);
        writer.finishRowGroup();
    }
    writer.close();
    List<StreamDataOutput> streams = writer.getDataStreams();
    int dictionarySize = writer.getColumnEncodings().get(COLUMN_ID).getDictionarySize();
    ByteArrayInputStream dictionaryDataStream = new ByteArrayInputStream(getOrcInputStream(streams, DICTIONARY_DATA));
    LongInputStream dictionaryLengthStream = getDictionaryLengthStream(streams, orcEncoding);
    List<String> dictionaryKeys = new ArrayList<>(dictionarySize);
    for (int i = 0; i < dictionarySize; i++) {
        int length = toIntExact(dictionaryLengthStream.next());
        String dictionaryKey = new String(dictionaryDataStream.next(length), UTF_8);
        dictionaryKeys.add(dictionaryKey);
    }
    return dictionaryKeys;
}
Also used : ByteArrayInputStream(com.facebook.presto.orc.stream.ByteArrayInputStream) ArrayList(java.util.ArrayList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Example 2 with ByteArrayInputStream

use of com.facebook.presto.orc.stream.ByteArrayInputStream in project presto by prestodb.

the class SliceDictionarySelectiveReader method openRowGroup.

private void openRowGroup() throws IOException {
    // read the dictionary
    if (!stripeDictionaryOpen) {
        if (stripeDictionarySize > 0) {
            // resize the dictionary lengths array if necessary
            if (stripeDictionaryLength.length < stripeDictionarySize) {
                stripeDictionaryLength = new int[stripeDictionarySize];
            }
            // read the lengths
            LongInputStream lengthStream = stripeDictionaryLengthStreamSource.openStream();
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is not present");
            }
            lengthStream.nextIntVector(stripeDictionarySize, stripeDictionaryLength, 0);
            long dataLength = 0;
            for (int i = 0; i < stripeDictionarySize; i++) {
                dataLength += stripeDictionaryLength[i];
            }
            dictionaryData = ensureCapacity(dictionaryData, toIntExact(dataLength));
            dictionaryOffsetVector = ensureCapacity(dictionaryOffsetVector, stripeDictionarySize + 2);
            // read dictionary values
            ByteArrayInputStream dictionaryDataStream = stripeDictionaryDataStreamSource.openStream();
            readDictionary(dictionaryDataStream, stripeDictionarySize, stripeDictionaryLength, 0, dictionaryData, dictionaryOffsetVector, maxCodePointCount, isCharType);
        } else {
            dictionaryData = EMPTY_DICTIONARY_DATA;
            dictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
        }
        // If there is no rowgroup dictionary, we only need to wrap the stripe dictionary once per stripe because wrapping dictionary is very expensive.
        dictionaryWrapped = false;
    }
    // read row group dictionary
    RowGroupDictionaryLengthInputStream dictionaryLengthStream = rowGroupDictionaryLengthStreamSource.openStream();
    if (dictionaryLengthStream != null) {
        int rowGroupDictionarySize = dictionaryLengthStream.getEntryCount();
        rowGroupDictionaryLength = ensureCapacity(rowGroupDictionaryLength, rowGroupDictionarySize);
        // read the lengths
        dictionaryLengthStream.nextIntVector(rowGroupDictionarySize, rowGroupDictionaryLength, 0);
        long dataLength = 0;
        for (int i = 0; i < rowGroupDictionarySize; i++) {
            dataLength += rowGroupDictionaryLength[i];
        }
        dictionaryData = ensureCapacity(dictionaryData, dictionaryOffsetVector[stripeDictionarySize] + toIntExact(dataLength), MEDIUM, PRESERVE);
        dictionaryOffsetVector = ensureCapacity(dictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 2, MEDIUM, PRESERVE);
        dictionaryWrapped = false;
        // read dictionary values
        ByteArrayInputStream dictionaryDataStream = rowGroupDictionaryDataStreamSource.openStream();
        readDictionary(dictionaryDataStream, rowGroupDictionarySize, rowGroupDictionaryLength, stripeDictionarySize, dictionaryData, dictionaryOffsetVector, maxCodePointCount, isCharType);
        currentDictionarySize = stripeDictionarySize + rowGroupDictionarySize + 1;
        initiateEvaluationStatus(stripeDictionarySize + rowGroupDictionarySize + 1);
    } else {
        // there is no row group dictionary so use the stripe dictionary
        currentDictionarySize = stripeDictionarySize + 1;
        initiateEvaluationStatus(stripeDictionarySize + 1);
    }
    dictionaryOffsetVector[currentDictionarySize] = dictionaryOffsetVector[currentDictionarySize - 1];
    stripeDictionaryOpen = true;
    presentStream = presentStreamSource.openStream();
    inDictionaryStream = inDictionaryStreamSource.openStream();
    dataStream = dataStreamSource.openStream();
    rowGroupOpen = true;
}
Also used : ByteArrayInputStream(com.facebook.presto.orc.stream.ByteArrayInputStream) RowGroupDictionaryLengthInputStream(com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Example 3 with ByteArrayInputStream

use of com.facebook.presto.orc.stream.ByteArrayInputStream in project presto by prestodb.

the class SliceDictionaryBatchStreamReader method openRowGroup.

private void openRowGroup() throws IOException {
    // read the dictionary
    if (!stripeDictionaryOpen) {
        if (stripeDictionarySize > 0) {
            // resize the dictionary lengths array if necessary
            if (stripeDictionaryLength.length < stripeDictionarySize) {
                stripeDictionaryLength = new int[stripeDictionarySize];
                systemMemoryContext.setBytes(sizeOf(stripeDictionaryLength));
            }
            // read the lengths
            LongInputStream lengthStream = stripeDictionaryLengthStreamSource.openStream();
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Dictionary is not empty but dictionary length stream is not present");
            }
            lengthStream.next(stripeDictionaryLength, stripeDictionarySize);
            long dataLength = 0;
            for (int i = 0; i < stripeDictionarySize; i++) {
                dataLength += stripeDictionaryLength[i];
            }
            // we must always create a new dictionary array because the previous dictionary may still be referenced
            stripeDictionaryData = new byte[toIntExact(dataLength)];
            systemMemoryContext.setBytes(sizeOf(stripeDictionaryData));
            // add one extra entry for null
            stripeDictionaryOffsetVector = new int[stripeDictionarySize + 2];
            systemMemoryContext.setBytes(sizeOf(stripeDictionaryOffsetVector));
            // read dictionary values
            ByteArrayInputStream dictionaryDataStream = stripeDictionaryDataStreamSource.openStream();
            readDictionary(dictionaryDataStream, stripeDictionarySize, stripeDictionaryLength, 0, stripeDictionaryData, stripeDictionaryOffsetVector, maxCodePointCount, isCharType);
        } else {
            stripeDictionaryData = EMPTY_DICTIONARY_DATA;
            stripeDictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;
        }
    }
    stripeDictionaryOpen = true;
    // read row group dictionary
    RowGroupDictionaryLengthInputStream dictionaryLengthStream = rowGroupDictionaryLengthStreamSource.openStream();
    if (dictionaryLengthStream != null) {
        int rowGroupDictionarySize = dictionaryLengthStream.getEntryCount();
        // resize the dictionary lengths array if necessary
        if (rowGroupDictionaryLength.length < rowGroupDictionarySize) {
            rowGroupDictionaryLength = new int[rowGroupDictionarySize];
        }
        // read the lengths
        dictionaryLengthStream.next(rowGroupDictionaryLength, rowGroupDictionarySize);
        long dataLength = 0;
        for (int i = 0; i < rowGroupDictionarySize; i++) {
            dataLength += rowGroupDictionaryLength[i];
        }
        // We must always create a new dictionary array because the previous dictionary may still be referenced
        // The first elements of the dictionary are from the stripe dictionary, then the row group dictionary elements, and then a null
        byte[] rowGroupDictionaryData = Arrays.copyOf(stripeDictionaryData, stripeDictionaryOffsetVector[stripeDictionarySize] + toIntExact(dataLength));
        int[] rowGroupDictionaryOffsetVector = Arrays.copyOf(stripeDictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 2);
        // read dictionary values
        ByteArrayInputStream dictionaryDataStream = rowGroupDictionaryDataStreamSource.openStream();
        readDictionary(dictionaryDataStream, rowGroupDictionarySize, rowGroupDictionaryLength, stripeDictionarySize, rowGroupDictionaryData, rowGroupDictionaryOffsetVector, maxCodePointCount, isCharType);
        setDictionaryBlockData(rowGroupDictionaryData, rowGroupDictionaryOffsetVector, stripeDictionarySize + rowGroupDictionarySize + 1);
    } else {
        // there is no row group dictionary so use the stripe dictionary
        setDictionaryBlockData(stripeDictionaryData, stripeDictionaryOffsetVector, stripeDictionarySize + 1);
    }
    presentStream = presentStreamSource.openStream();
    inDictionaryStream = inDictionaryStreamSource.openStream();
    dataStream = dataStreamSource.openStream();
    rowGroupOpen = true;
}
Also used : ByteArrayInputStream(com.facebook.presto.orc.stream.ByteArrayInputStream) RowGroupDictionaryLengthInputStream(com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Aggregations

ByteArrayInputStream (com.facebook.presto.orc.stream.ByteArrayInputStream)3 LongInputStream (com.facebook.presto.orc.stream.LongInputStream)3 OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)2 RowGroupDictionaryLengthInputStream (com.facebook.presto.orc.stream.RowGroupDictionaryLengthInputStream)2 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)1 StreamDataOutput (com.facebook.presto.orc.stream.StreamDataOutput)1 ArrayList (java.util.ArrayList)1