Search in sources :

Example 1 with VariableWidthBlock

use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.

the class TestGroupByHash method testProperWorkTypesSelected.

@Test
public void testProperWorkTypesSelected() {
    Block bigintBlock = BlockAssertions.createLongsBlock(1, 2, 3, 4, 5, 6, 7, 8);
    Block bigintDictionaryBlock = BlockAssertions.createLongDictionaryBlock(0, 8);
    Block bigintRleBlock = BlockAssertions.createRLEBlock(42, 8);
    Block varcharBlock = BlockAssertions.createStringsBlock("1", "2", "3", "4", "5", "6", "7", "8");
    Block varcharDictionaryBlock = BlockAssertions.createStringDictionaryBlock(1, 8);
    Block varcharRleBlock = new RunLengthEncodedBlock(new VariableWidthBlock(1, Slices.EMPTY_SLICE, new int[] { 0, 1 }, Optional.empty()), 8);
    Block bigintBigDictionaryBlock = BlockAssertions.createLongDictionaryBlock(1, 8, 1000);
    Block bigintSingletonDictionaryBlock = BlockAssertions.createLongDictionaryBlock(1, 500000, 1);
    // Above Short.MAX_VALUE
    Block bigintHugeDictionaryBlock = BlockAssertions.createLongDictionaryBlock(1, 500000, 66000);
    Page singleBigintPage = new Page(bigintBlock);
    assertGroupByHashWork(singleBigintPage, ImmutableList.of(BIGINT), BigintGroupByHash.GetGroupIdsWork.class);
    Page singleBigintDictionaryPage = new Page(bigintDictionaryBlock);
    assertGroupByHashWork(singleBigintDictionaryPage, ImmutableList.of(BIGINT), BigintGroupByHash.GetDictionaryGroupIdsWork.class);
    Page singleBigintRlePage = new Page(bigintRleBlock);
    assertGroupByHashWork(singleBigintRlePage, ImmutableList.of(BIGINT), BigintGroupByHash.GetRunLengthEncodedGroupIdsWork.class);
    Page singleVarcharPage = new Page(varcharBlock);
    assertGroupByHashWork(singleVarcharPage, ImmutableList.of(VARCHAR), MultiChannelGroupByHash.GetNonDictionaryGroupIdsWork.class);
    Page singleVarcharDictionaryPage = new Page(varcharDictionaryBlock);
    assertGroupByHashWork(singleVarcharDictionaryPage, ImmutableList.of(VARCHAR), MultiChannelGroupByHash.GetDictionaryGroupIdsWork.class);
    Page singleVarcharRlePage = new Page(varcharRleBlock);
    assertGroupByHashWork(singleVarcharRlePage, ImmutableList.of(VARCHAR), MultiChannelGroupByHash.GetRunLengthEncodedGroupIdsWork.class);
    Page lowCardinalityDictionaryPage = new Page(bigintDictionaryBlock, varcharDictionaryBlock);
    assertGroupByHashWork(lowCardinalityDictionaryPage, ImmutableList.of(BIGINT, VARCHAR), MultiChannelGroupByHash.GetLowCardinalityDictionaryGroupIdsWork.class);
    Page highCardinalityDictionaryPage = new Page(bigintDictionaryBlock, bigintBigDictionaryBlock);
    assertGroupByHashWork(highCardinalityDictionaryPage, ImmutableList.of(BIGINT, VARCHAR), MultiChannelGroupByHash.GetNonDictionaryGroupIdsWork.class);
    // Cardinality above Short.MAX_VALUE
    Page lowCardinalityHugeDictionaryPage = new Page(bigintSingletonDictionaryBlock, bigintHugeDictionaryBlock);
    assertGroupByHashWork(lowCardinalityHugeDictionaryPage, ImmutableList.of(BIGINT, BIGINT), MultiChannelGroupByHash.GetNonDictionaryGroupIdsWork.class);
}
Also used : TypeTestUtils.getHashBlock(io.trino.type.TypeTestUtils.getHashBlock) BlockAssertions.createLongSequenceBlock(io.trino.block.BlockAssertions.createLongSequenceBlock) DictionaryBlock(io.trino.spi.block.DictionaryBlock) BlockAssertions.createStringSequenceBlock(io.trino.block.BlockAssertions.createStringSequenceBlock) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) VariableWidthBlock(io.trino.spi.block.VariableWidthBlock) BlockAssertions.createLongsBlock(io.trino.block.BlockAssertions.createLongsBlock) Page(io.trino.spi.Page) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) VariableWidthBlock(io.trino.spi.block.VariableWidthBlock) Test(org.testng.annotations.Test)

Example 2 with VariableWidthBlock

use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.

the class SliceDirectColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the length reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but length stream is missing");
            }
            long dataSkipSize = lengthStream.sum(readOffset);
            if (dataSkipSize > 0) {
                if (dataStream == null) {
                    throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
                }
                dataStream.skip(dataSkipSize);
            }
        }
    }
    if (lengthStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        Block nullValueBlock = readAllNullsBlock();
        readOffset = 0;
        nextBatchSize = 0;
        return nullValueBlock;
    }
    // create new isNullVector and offsetVector for VariableWidthBlock
    boolean[] isNullVector = null;
    // We will use the offsetVector as the buffer to read the length values from lengthStream,
    // and the length values will be converted in-place to an offset vector.
    int[] offsetVector = new int[nextBatchSize + 1];
    if (presentStream == null) {
        lengthStream.next(offsetVector, nextBatchSize);
    } else {
        isNullVector = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNullVector);
        if (nullCount == nextBatchSize) {
            // all nulls
            Block nullValueBlock = readAllNullsBlock();
            readOffset = 0;
            nextBatchSize = 0;
            return nullValueBlock;
        }
        if (lengthStream == null) {
            throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but length stream is missing");
        }
        if (nullCount == 0) {
            isNullVector = null;
            lengthStream.next(offsetVector, nextBatchSize);
        } else {
            lengthStream.next(offsetVector, nextBatchSize - nullCount);
            unpackLengthNulls(offsetVector, isNullVector, nextBatchSize - nullCount);
        }
    }
    // Calculate the total length for all entries. Note that the values in the offsetVector are still length values now.
    long totalLength = 0;
    for (int i = 0; i < nextBatchSize; i++) {
        totalLength += offsetVector[i];
    }
    int currentBatchSize = nextBatchSize;
    readOffset = 0;
    nextBatchSize = 0;
    if (totalLength == 0) {
        return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector));
    }
    if (totalLength > ONE_GIGABYTE) {
        throw new TrinoException(GENERIC_INTERNAL_ERROR, format("Values in column \"%s\" are too large to process for Trino. %s column values are larger than 1GB [%s]", column.getPath(), nextBatchSize, column.getOrcDataSourceId()));
    }
    if (dataStream == null) {
        throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
    }
    // allocate enough space to read
    byte[] data = new byte[toIntExact(totalLength)];
    Slice slice = Slices.wrappedBuffer(data);
    if (maxCodePointCount < 0) {
        // unbounded, simply read all data in on shot
        dataStream.next(data, 0, data.length);
        convertLengthVectorToOffsetVector(offsetVector);
    } else {
        // We do the following operations together in the for loop:
        // * truncate strings
        // * convert original length values in offsetVector into truncated offset values
        int currentLength = offsetVector[0];
        offsetVector[0] = 0;
        for (int i = 1; i <= currentBatchSize; i++) {
            int nextLength = offsetVector[i];
            if (isNullVector != null && isNullVector[i - 1]) {
                checkState(currentLength == 0, "Corruption in slice direct stream: length is non-zero for null entry");
                offsetVector[i] = offsetVector[i - 1];
                currentLength = nextLength;
                continue;
            }
            int offset = offsetVector[i - 1];
            // read data without truncation
            dataStream.next(data, offset, offset + currentLength);
            // adjust offsetVector with truncated length
            int truncatedLength = computeTruncatedLength(slice, offset, currentLength, maxCodePointCount, isCharType);
            verify(truncatedLength >= 0);
            offsetVector[i] = offset + truncatedLength;
            currentLength = nextLength;
        }
    }
    // this can lead to over-retention but unlikely to happen given truncation rarely happens
    return new VariableWidthBlock(currentBatchSize, slice, offsetVector, Optional.ofNullable(isNullVector));
}
Also used : Slice(io.airlift.slice.Slice) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) VariableWidthBlock(io.trino.spi.block.VariableWidthBlock) TrinoException(io.trino.spi.TrinoException) OrcCorruptionException(io.trino.orc.OrcCorruptionException) VariableWidthBlock(io.trino.spi.block.VariableWidthBlock)

Example 3 with VariableWidthBlock

use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.

the class TestVariableWidthBlock method testCompactBlock.

@Test
public void testCompactBlock() {
    Slice compactSlice = Slices.copyOf(createExpectedValue(16));
    Slice incompactSlice = Slices.copyOf(createExpectedValue(20)).slice(0, 16);
    int[] offsets = { 0, 1, 1, 2, 4, 8, 16 };
    boolean[] valueIsNull = { false, true, false, false, false, false };
    testCompactBlock(new VariableWidthBlock(0, EMPTY_SLICE, new int[1], Optional.empty()));
    testCompactBlock(new VariableWidthBlock(valueIsNull.length, compactSlice, offsets, Optional.of(valueIsNull)));
    testIncompactBlock(new VariableWidthBlock(valueIsNull.length - 1, compactSlice, offsets, Optional.of(valueIsNull)));
    // underlying slice is not compact
    testIncompactBlock(new VariableWidthBlock(valueIsNull.length, incompactSlice, offsets, Optional.of(valueIsNull)));
}
Also used : Slice(io.airlift.slice.Slice) VariableWidthBlock(io.trino.spi.block.VariableWidthBlock) Test(org.testng.annotations.Test)

Example 4 with VariableWidthBlock

use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.

the class TestDictionaryBlock method testNestedDictionarySizes.

@Test
public void testNestedDictionarySizes() {
    // fixed width block
    Block fixedWidthBlock = new IntArrayBlock(100, Optional.empty(), IntStream.range(0, 100).toArray());
    assertDictionarySizeMethods(fixedWidthBlock);
    assertDictionarySizeMethods(new DictionaryBlock(fixedWidthBlock, IntStream.range(0, 50).toArray()));
    assertDictionarySizeMethods(new DictionaryBlock(new DictionaryBlock(fixedWidthBlock, IntStream.range(0, 50).toArray()), IntStream.range(0, 10).toArray()));
    // variable width block
    Block variableWidthBlock = createSlicesBlock(createExpectedValues(100));
    assertDictionarySizeMethods(variableWidthBlock);
    assertDictionarySizeMethods(new DictionaryBlock(variableWidthBlock, IntStream.range(0, 50).toArray()));
    assertDictionarySizeMethods(new DictionaryBlock(new DictionaryBlock(variableWidthBlock, IntStream.range(0, 50).toArray()), IntStream.range(0, 10).toArray()));
}
Also used : IntArrayBlock(io.trino.spi.block.IntArrayBlock) DictionaryBlock(io.trino.spi.block.DictionaryBlock) BlockAssertions.createSlicesBlock(io.trino.block.BlockAssertions.createSlicesBlock) VariableWidthBlock(io.trino.spi.block.VariableWidthBlock) DictionaryBlock(io.trino.spi.block.DictionaryBlock) IntArrayBlock(io.trino.spi.block.IntArrayBlock) Block(io.trino.spi.block.Block) Test(org.testng.annotations.Test)

Example 5 with VariableWidthBlock

use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.

the class SliceData method toBlock.

@Override
public Block toBlock(Type desiredType) {
    checkArgument(desiredType.getJavaType() == Slice.class, "type doesn't match: %s", desiredType);
    Slice values = bytes == null ? Slices.EMPTY_SLICE : Slices.wrappedBuffer(bytes);
    int numberOfRecords = numberOfRecords();
    return new VariableWidthBlock(numberOfRecords, values, calculateOffsets(sizes, nulls, numberOfRecords), Optional.ofNullable(nulls));
}
Also used : Slice(io.airlift.slice.Slice) VariableWidthBlock(io.trino.spi.block.VariableWidthBlock)

Aggregations

VariableWidthBlock (io.trino.spi.block.VariableWidthBlock)6 Slice (io.airlift.slice.Slice)3 Block (io.trino.spi.block.Block)3 Test (org.testng.annotations.Test)3 DictionaryBlock (io.trino.spi.block.DictionaryBlock)2 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)2 BlockAssertions.createLongSequenceBlock (io.trino.block.BlockAssertions.createLongSequenceBlock)1 BlockAssertions.createLongsBlock (io.trino.block.BlockAssertions.createLongsBlock)1 BlockAssertions.createSlicesBlock (io.trino.block.BlockAssertions.createSlicesBlock)1 BlockAssertions.createStringSequenceBlock (io.trino.block.BlockAssertions.createStringSequenceBlock)1 OrcCorruptionException (io.trino.orc.OrcCorruptionException)1 Page (io.trino.spi.Page)1 TrinoException (io.trino.spi.TrinoException)1 IntArrayBlock (io.trino.spi.block.IntArrayBlock)1 TypeTestUtils.getHashBlock (io.trino.type.TypeTestUtils.getHashBlock)1