Search in sources :

Example 1 with StreamDataOutput

use of com.facebook.presto.orc.stream.StreamDataOutput in project presto by prestodb.

the class ListColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams() throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<LongStreamCheckpoint> lengthCheckpoints = lengthStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        LongStreamCheckpoint lengthCheckpoint = lengthCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createArrayColumnPositionList(compressed, lengthCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(column, StreamKind.ROW_INDEX, slice.length(), false);
    ImmutableList.Builder<StreamDataOutput> indexStreams = ImmutableList.builder();
    indexStreams.add(new StreamDataOutput(slice, stream));
    indexStreams.addAll(elementWriter.getIndexStreams());
    return indexStreams.build();
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(com.facebook.presto.orc.stream.PresentOutputStream) LongOutputStream(com.facebook.presto.orc.stream.LongOutputStream) LongOutputStream.createLengthOutputStream(com.facebook.presto.orc.stream.LongOutputStream.createLengthOutputStream) Stream(com.facebook.presto.orc.metadata.Stream)

Example 2 with StreamDataOutput

use of com.facebook.presto.orc.stream.StreamDataOutput in project presto by prestodb.

the class MapColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams() throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<LongStreamCheckpoint> lengthCheckpoints = lengthStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        LongStreamCheckpoint lengthCheckpoint = lengthCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createArrayColumnPositionList(compressed, lengthCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(column, StreamKind.ROW_INDEX, slice.length(), false);
    ImmutableList.Builder<StreamDataOutput> indexStreams = ImmutableList.builder();
    indexStreams.add(new StreamDataOutput(slice, stream));
    indexStreams.addAll(keyWriter.getIndexStreams());
    indexStreams.addAll(valueWriter.getIndexStreams());
    return indexStreams.build();
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(com.facebook.presto.orc.stream.PresentOutputStream) LongOutputStream(com.facebook.presto.orc.stream.LongOutputStream) LongOutputStream.createLengthOutputStream(com.facebook.presto.orc.stream.LongOutputStream.createLengthOutputStream) Stream(com.facebook.presto.orc.metadata.Stream)

Example 3 with StreamDataOutput

use of com.facebook.presto.orc.stream.StreamDataOutput in project presto by prestodb.

the class SliceDirectColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams() throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<LongStreamCheckpoint> lengthCheckpoints = lengthStream.getCheckpoints();
    List<ByteArrayStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        LongStreamCheckpoint lengthCheckpoint = lengthCheckpoints.get(groupId);
        ByteArrayStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createSliceColumnPositionList(compressed, lengthCheckpoint, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(column, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) ByteArrayStreamCheckpoint(com.facebook.presto.orc.checkpoint.ByteArrayStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) ByteArrayStreamCheckpoint(com.facebook.presto.orc.checkpoint.ByteArrayStreamCheckpoint) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(com.facebook.presto.orc.stream.PresentOutputStream) LongOutputStream(com.facebook.presto.orc.stream.LongOutputStream) LongOutputStream.createLengthOutputStream(com.facebook.presto.orc.stream.LongOutputStream.createLengthOutputStream) Stream(com.facebook.presto.orc.metadata.Stream) ByteArrayOutputStream(com.facebook.presto.orc.stream.ByteArrayOutputStream)

Example 4 with StreamDataOutput

use of com.facebook.presto.orc.stream.StreamDataOutput in project presto by prestodb.

the class ByteColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams() throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<ByteStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        ByteStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createByteColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(column, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) ByteStreamCheckpoint(com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) ByteStreamCheckpoint(com.facebook.presto.orc.checkpoint.ByteStreamCheckpoint) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(com.facebook.presto.orc.stream.PresentOutputStream) Stream(com.facebook.presto.orc.metadata.Stream) ByteOutputStream(com.facebook.presto.orc.stream.ByteOutputStream)

Example 5 with StreamDataOutput

use of com.facebook.presto.orc.stream.StreamDataOutput in project presto by prestodb.

the class TestSliceDictionaryColumnWriter method getDictionaryKeys.

private List<String> getDictionaryKeys(List<String> values, OrcEncoding orcEncoding, boolean sortDictionaryKeys) throws IOException {
    DictionaryColumnWriter writer = getDictionaryColumnWriter(orcEncoding, sortDictionaryKeys);
    for (int index = 0; index < values.size(); ) {
        int endIndex = Math.min(index + 10_000, values.size());
        BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 10_000);
        while (index < endIndex) {
            VARCHAR.writeSlice(blockBuilder, utf8Slice(values.get(index++)));
        }
        writer.beginRowGroup();
        writer.writeBlock(blockBuilder);
        writer.finishRowGroup();
    }
    writer.close();
    List<StreamDataOutput> streams = writer.getDataStreams();
    int dictionarySize = writer.getColumnEncodings().get(COLUMN_ID).getDictionarySize();
    ByteArrayInputStream dictionaryDataStream = new ByteArrayInputStream(getOrcInputStream(streams, DICTIONARY_DATA));
    LongInputStream dictionaryLengthStream = getDictionaryLengthStream(streams, orcEncoding);
    List<String> dictionaryKeys = new ArrayList<>(dictionarySize);
    for (int i = 0; i < dictionarySize; i++) {
        int length = toIntExact(dictionaryLengthStream.next());
        String dictionaryKey = new String(dictionaryDataStream.next(length), UTF_8);
        dictionaryKeys.add(dictionaryKey);
    }
    return dictionaryKeys;
}
Also used : ByteArrayInputStream(com.facebook.presto.orc.stream.ByteArrayInputStream) ArrayList(java.util.ArrayList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Aggregations

StreamDataOutput (com.facebook.presto.orc.stream.StreamDataOutput)18 ArrayList (java.util.ArrayList)16 Slice (io.airlift.slice.Slice)14 Stream (com.facebook.presto.orc.metadata.Stream)13 ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)13 ImmutableList (com.google.common.collect.ImmutableList)13 List (java.util.List)13 BooleanStreamCheckpoint (com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint)12 RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)12 PresentOutputStream (com.facebook.presto.orc.stream.PresentOutputStream)12 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)7 LongOutputStream (com.facebook.presto.orc.stream.LongOutputStream)7 LongOutputStream.createLengthOutputStream (com.facebook.presto.orc.stream.LongOutputStream.createLengthOutputStream)3 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)2 Collectors.toList (java.util.stream.Collectors.toList)2 Page (com.facebook.presto.common.Page)1 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)1 DataOutput (com.facebook.presto.common.io.DataOutput)1 DataOutput.createDataOutput (com.facebook.presto.common.io.DataOutput.createDataOutput)1 DataSink (com.facebook.presto.common.io.DataSink)1