Search in sources :

Example 11 with Stream

use of io.prestosql.orc.metadata.Stream in project hetu-core by openlookeng.

the class AbstractTestValueStream method testWriteValue.

protected void testWriteValue(List<List<T>> groups) throws IOException {
    W outputStream = createValueOutputStream();
    for (int i = 0; i < 3; i++) {
        outputStream.reset();
        long retainedBytes = 0;
        for (List<T> group : groups) {
            outputStream.recordCheckpoint();
            group.forEach(value -> writeValue(outputStream, value));
            assertTrue(outputStream.getRetainedBytes() >= retainedBytes);
            retainedBytes = outputStream.getRetainedBytes();
        }
        outputStream.close();
        DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1000);
        StreamDataOutput streamDataOutput = outputStream.getStreamDataOutput(new OrcColumnId(33));
        streamDataOutput.writeData(sliceOutput);
        Stream stream = streamDataOutput.getStream();
        assertEquals(stream.getStreamKind(), StreamKind.DATA);
        assertEquals(stream.getColumnId(), new OrcColumnId(33));
        assertEquals(stream.getLength(), sliceOutput.size());
        List<C> checkpoints = outputStream.getCheckpoints();
        assertEquals(checkpoints.size(), groups.size());
        R valueStream = createValueStream(sliceOutput.slice());
        for (List<T> group : groups) {
            int index = 0;
            for (T expectedValue : group) {
                index++;
                T actualValue = readValue(valueStream);
                if (!actualValue.equals(expectedValue)) {
                    assertEquals(actualValue, expectedValue, "index=" + index);
                }
            }
        }
        for (int groupIndex = groups.size() - 1; groupIndex >= 0; groupIndex--) {
            valueStream.seekToCheckpoint(checkpoints.get(groupIndex));
            for (T expectedValue : groups.get(groupIndex)) {
                T actualValue = readValue(valueStream);
                if (!actualValue.equals(expectedValue)) {
                    assertEquals(actualValue, expectedValue);
                }
            }
        }
    }
}
Also used : OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) Stream(io.prestosql.orc.metadata.Stream) StreamCheckpoint(io.prestosql.orc.checkpoint.StreamCheckpoint)

Example 12 with Stream

use of io.prestosql.orc.metadata.Stream in project hetu-core by openlookeng.

the class ByteColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<ByteStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        ByteStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createByteColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.prestosql.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) ByteStreamCheckpoint(io.prestosql.orc.checkpoint.ByteStreamCheckpoint) StreamDataOutput(io.prestosql.orc.stream.StreamDataOutput) BooleanStreamCheckpoint(io.prestosql.orc.checkpoint.BooleanStreamCheckpoint) ByteStreamCheckpoint(io.prestosql.orc.checkpoint.ByteStreamCheckpoint) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ByteOutputStream(io.prestosql.orc.stream.ByteOutputStream) PresentOutputStream(io.prestosql.orc.stream.PresentOutputStream) Stream(io.prestosql.orc.metadata.Stream)

Example 13 with Stream

use of io.prestosql.orc.metadata.Stream in project hetu-core by openlookeng.

the class LongColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<LongStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        LongStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createLongColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.prestosql.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(io.prestosql.orc.stream.StreamDataOutput) LongStreamCheckpoint(io.prestosql.orc.checkpoint.LongStreamCheckpoint) BooleanStreamCheckpoint(io.prestosql.orc.checkpoint.BooleanStreamCheckpoint) LongStreamCheckpoint(io.prestosql.orc.checkpoint.LongStreamCheckpoint) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) LongOutputStream(io.prestosql.orc.stream.LongOutputStream) PresentOutputStream(io.prestosql.orc.stream.PresentOutputStream) Stream(io.prestosql.orc.metadata.Stream)

Example 14 with Stream

use of io.prestosql.orc.metadata.Stream in project hetu-core by openlookeng.

the class SliceDictionaryColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    if (directEncoded) {
        return directColumnWriter.getIndexStreams(metadataWriter);
    }
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<LongStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroups.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroups.get(groupId).getColumnStatistics();
        LongStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createSliceColumnPositionList(compression != NONE, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.prestosql.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(io.prestosql.orc.stream.StreamDataOutput) LongStreamCheckpoint(io.prestosql.orc.checkpoint.LongStreamCheckpoint) BooleanStreamCheckpoint(io.prestosql.orc.checkpoint.BooleanStreamCheckpoint) LongStreamCheckpoint(io.prestosql.orc.checkpoint.LongStreamCheckpoint) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) LongOutputStream(io.prestosql.orc.stream.LongOutputStream) LongOutputStream.createLengthOutputStream(io.prestosql.orc.stream.LongOutputStream.createLengthOutputStream) PresentOutputStream(io.prestosql.orc.stream.PresentOutputStream) ByteArrayOutputStream(io.prestosql.orc.stream.ByteArrayOutputStream) Stream(io.prestosql.orc.metadata.Stream)

Example 15 with Stream

use of io.prestosql.orc.metadata.Stream in project hetu-core by openlookeng.

the class BooleanColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter) throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<BooleanStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        BooleanStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createBooleanColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(io.prestosql.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(io.prestosql.orc.stream.StreamDataOutput) BooleanStreamCheckpoint(io.prestosql.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(io.prestosql.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(io.prestosql.orc.stream.PresentOutputStream) BooleanOutputStream(io.prestosql.orc.stream.BooleanOutputStream) Stream(io.prestosql.orc.metadata.Stream)

Aggregations

Stream (io.prestosql.orc.metadata.Stream)27 ArrayList (java.util.ArrayList)20 List (java.util.List)20 ImmutableList (com.google.common.collect.ImmutableList)19 ColumnStatistics (io.prestosql.orc.metadata.statistics.ColumnStatistics)18 Slice (io.airlift.slice.Slice)17 RowGroupIndex (io.prestosql.orc.metadata.RowGroupIndex)16 BooleanStreamCheckpoint (io.prestosql.orc.checkpoint.BooleanStreamCheckpoint)14 StreamDataOutput (io.prestosql.orc.stream.StreamDataOutput)14 PresentOutputStream (io.prestosql.orc.stream.PresentOutputStream)12 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)11 OrcInputStream (io.prestosql.orc.stream.OrcInputStream)9 InputStream (java.io.InputStream)9 ImmutableMap (com.google.common.collect.ImmutableMap)8 ValueInputStream (io.prestosql.orc.stream.ValueInputStream)8 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)7 LongStreamCheckpoint (io.prestosql.orc.checkpoint.LongStreamCheckpoint)7 LongOutputStream (io.prestosql.orc.stream.LongOutputStream)7 StreamCheckpoint (io.prestosql.orc.checkpoint.StreamCheckpoint)6 HashMap (java.util.HashMap)6