Search in sources :

Example 1 with BooleanStreamCheckpoint

use of io.trino.orc.checkpoint.BooleanStreamCheckpoint in project trino by trinodb.

the class ListColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<LongStreamCheckpoint> lengthCheckpoints = lengthStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        LongStreamCheckpoint lengthCheckpoint = lengthCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createArrayColumnPositionList(compressed, lengthCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    ImmutableList.Builder<StreamDataOutput> indexStreams = ImmutableList.builder();
    indexStreams.add(new StreamDataOutput(slice, stream));
    indexStreams.addAll(elementWriter.getIndexStreams(metadataWriter));
    indexStreams.addAll(elementWriter.getBloomFilters(metadataWriter));
    return indexStreams.build();
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(io.trino.orc.stream.StreamDataOutput) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint) RowGroupIndex(io.trino.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(io.trino.orc.stream.PresentOutputStream) Stream(io.trino.orc.metadata.Stream) LongOutputStream(io.trino.orc.stream.LongOutputStream) LongOutputStream.createLengthOutputStream(io.trino.orc.stream.LongOutputStream.createLengthOutputStream)

Example 2 with BooleanStreamCheckpoint

use of io.trino.orc.checkpoint.BooleanStreamCheckpoint in project trino by trinodb.

the class SliceDirectColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<LongStreamCheckpoint> lengthCheckpoints = lengthStream.getCheckpoints();
    List<ByteArrayStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        LongStreamCheckpoint lengthCheckpoint = lengthCheckpoints.get(groupId);
        ByteArrayStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createSliceColumnPositionList(compressed, lengthCheckpoint, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ByteArrayStreamCheckpoint(io.trino.orc.checkpoint.ByteArrayStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) StreamDataOutput(io.trino.orc.stream.StreamDataOutput) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ByteArrayStreamCheckpoint(io.trino.orc.checkpoint.ByteArrayStreamCheckpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint) RowGroupIndex(io.trino.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) PresentOutputStream(io.trino.orc.stream.PresentOutputStream) Stream(io.trino.orc.metadata.Stream) LongOutputStream(io.trino.orc.stream.LongOutputStream) LongOutputStream.createLengthOutputStream(io.trino.orc.stream.LongOutputStream.createLengthOutputStream) ByteArrayOutputStream(io.trino.orc.stream.ByteArrayOutputStream)

Example 3 with BooleanStreamCheckpoint

use of io.trino.orc.checkpoint.BooleanStreamCheckpoint in project trino by trinodb.

the class BooleanOutputStream method getCheckpoints.

@Override
public List<BooleanStreamCheckpoint> getCheckpoints() {
    checkState(closed);
    ImmutableList.Builder<BooleanStreamCheckpoint> booleanStreamCheckpoint = ImmutableList.builder();
    List<ByteStreamCheckpoint> byteStreamCheckpoints = byteOutputStream.getCheckpoints();
    for (int groupId = 0; groupId < checkpointBitOffsets.size(); groupId++) {
        int checkpointBitOffset = checkpointBitOffsets.get(groupId);
        ByteStreamCheckpoint byteStreamCheckpoint = byteStreamCheckpoints.get(groupId);
        booleanStreamCheckpoint.add(new BooleanStreamCheckpoint(checkpointBitOffset, byteStreamCheckpoint));
    }
    return booleanStreamCheckpoint.build();
}
Also used : BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) ByteStreamCheckpoint(io.trino.orc.checkpoint.ByteStreamCheckpoint) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ByteStreamCheckpoint(io.trino.orc.checkpoint.ByteStreamCheckpoint)

Example 4 with BooleanStreamCheckpoint

use of io.trino.orc.checkpoint.BooleanStreamCheckpoint in project trino by trinodb.

the class ByteColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<ByteStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        ByteStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createByteColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ByteStreamCheckpoint(io.trino.orc.checkpoint.ByteStreamCheckpoint) StreamDataOutput(io.trino.orc.stream.StreamDataOutput) ByteStreamCheckpoint(io.trino.orc.checkpoint.ByteStreamCheckpoint) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(io.trino.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) PresentOutputStream(io.trino.orc.stream.PresentOutputStream) Stream(io.trino.orc.metadata.Stream) ByteOutputStream(io.trino.orc.stream.ByteOutputStream)

Example 5 with BooleanStreamCheckpoint

use of io.trino.orc.checkpoint.BooleanStreamCheckpoint in project trino by trinodb.

the class TestBooleanOutputStream method testWriteBoolean.

@Test
public void testWriteBoolean() {
    List<List<Integer>> testGroups = ImmutableList.of(ImmutableList.of(149, 317, 2), ImmutableList.of(2), ImmutableList.of(1, 2, 4, 0, 8), ImmutableList.of(1, 4, 8, 1024, 10000), ImmutableList.of(14000, 1, 2));
    for (List<Integer> counts : testGroups) {
        OrcOutputBuffer buffer = new OrcOutputBuffer(NONE, 1024);
        BooleanOutputStream output = new BooleanOutputStream(buffer);
        // write multiple booleans together
        for (int count : counts) {
            output.writeBooleans(count, true);
            output.recordCheckpoint();
        }
        output.close();
        List<BooleanStreamCheckpoint> batchWriteCheckpoints = output.getCheckpoints();
        DynamicSliceOutput slice = new DynamicSliceOutput(128);
        buffer.writeDataTo(slice);
        Slice batchWriteBuffer = slice.slice();
        // write one boolean a time
        buffer.reset();
        output.reset();
        for (int count : counts) {
            for (int i = 0; i < count; i++) {
                output.writeBoolean(true);
            }
            output.recordCheckpoint();
        }
        output.close();
        List<BooleanStreamCheckpoint> singleWriteCheckpoints = output.getCheckpoints();
        slice = new DynamicSliceOutput(128);
        buffer.writeDataTo(slice);
        Slice singleWriteBuffer = slice.slice();
        assertEquals(batchWriteCheckpoints.size(), singleWriteCheckpoints.size());
        for (int i = 0; i < batchWriteCheckpoints.size(); i++) {
            assertTrue(checkpointsEqual(batchWriteCheckpoints.get(i), singleWriteCheckpoints.get(i)));
        }
        assertEquals(batchWriteBuffer, singleWriteBuffer);
    }
}
Also used : OrcOutputBuffer(io.trino.orc.OrcOutputBuffer) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) Slice(io.airlift.slice.Slice) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) ByteStreamCheckpoint(io.trino.orc.checkpoint.ByteStreamCheckpoint) Test(org.testng.annotations.Test)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)14 BooleanStreamCheckpoint (io.trino.orc.checkpoint.BooleanStreamCheckpoint)14 Slice (io.airlift.slice.Slice)13 List (java.util.List)13 RowGroupIndex (io.trino.orc.metadata.RowGroupIndex)12 Stream (io.trino.orc.metadata.Stream)12 ColumnStatistics (io.trino.orc.metadata.statistics.ColumnStatistics)12 PresentOutputStream (io.trino.orc.stream.PresentOutputStream)12 StreamDataOutput (io.trino.orc.stream.StreamDataOutput)12 ArrayList (java.util.ArrayList)12 LongStreamCheckpoint (io.trino.orc.checkpoint.LongStreamCheckpoint)7 LongOutputStream (io.trino.orc.stream.LongOutputStream)7 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)6 LongOutputStream.createLengthOutputStream (io.trino.orc.stream.LongOutputStream.createLengthOutputStream)4 ByteStreamCheckpoint (io.trino.orc.checkpoint.ByteStreamCheckpoint)3 ByteArrayOutputStream (io.trino.orc.stream.ByteArrayOutputStream)2 DynamicSliceOutput (io.airlift.slice.DynamicSliceOutput)1 OrcOutputBuffer (io.trino.orc.OrcOutputBuffer)1 ByteArrayStreamCheckpoint (io.trino.orc.checkpoint.ByteArrayStreamCheckpoint)1 DecimalStreamCheckpoint (io.trino.orc.checkpoint.DecimalStreamCheckpoint)1