use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.
the class TestGroupByHash method testProperWorkTypesSelected.
@Test
public void testProperWorkTypesSelected() {
Block bigintBlock = BlockAssertions.createLongsBlock(1, 2, 3, 4, 5, 6, 7, 8);
Block bigintDictionaryBlock = BlockAssertions.createLongDictionaryBlock(0, 8);
Block bigintRleBlock = BlockAssertions.createRLEBlock(42, 8);
Block varcharBlock = BlockAssertions.createStringsBlock("1", "2", "3", "4", "5", "6", "7", "8");
Block varcharDictionaryBlock = BlockAssertions.createStringDictionaryBlock(1, 8);
Block varcharRleBlock = new RunLengthEncodedBlock(new VariableWidthBlock(1, Slices.EMPTY_SLICE, new int[] { 0, 1 }, Optional.empty()), 8);
Block bigintBigDictionaryBlock = BlockAssertions.createLongDictionaryBlock(1, 8, 1000);
Block bigintSingletonDictionaryBlock = BlockAssertions.createLongDictionaryBlock(1, 500000, 1);
// Above Short.MAX_VALUE
Block bigintHugeDictionaryBlock = BlockAssertions.createLongDictionaryBlock(1, 500000, 66000);
Page singleBigintPage = new Page(bigintBlock);
assertGroupByHashWork(singleBigintPage, ImmutableList.of(BIGINT), BigintGroupByHash.GetGroupIdsWork.class);
Page singleBigintDictionaryPage = new Page(bigintDictionaryBlock);
assertGroupByHashWork(singleBigintDictionaryPage, ImmutableList.of(BIGINT), BigintGroupByHash.GetDictionaryGroupIdsWork.class);
Page singleBigintRlePage = new Page(bigintRleBlock);
assertGroupByHashWork(singleBigintRlePage, ImmutableList.of(BIGINT), BigintGroupByHash.GetRunLengthEncodedGroupIdsWork.class);
Page singleVarcharPage = new Page(varcharBlock);
assertGroupByHashWork(singleVarcharPage, ImmutableList.of(VARCHAR), MultiChannelGroupByHash.GetNonDictionaryGroupIdsWork.class);
Page singleVarcharDictionaryPage = new Page(varcharDictionaryBlock);
assertGroupByHashWork(singleVarcharDictionaryPage, ImmutableList.of(VARCHAR), MultiChannelGroupByHash.GetDictionaryGroupIdsWork.class);
Page singleVarcharRlePage = new Page(varcharRleBlock);
assertGroupByHashWork(singleVarcharRlePage, ImmutableList.of(VARCHAR), MultiChannelGroupByHash.GetRunLengthEncodedGroupIdsWork.class);
Page lowCardinalityDictionaryPage = new Page(bigintDictionaryBlock, varcharDictionaryBlock);
assertGroupByHashWork(lowCardinalityDictionaryPage, ImmutableList.of(BIGINT, VARCHAR), MultiChannelGroupByHash.GetLowCardinalityDictionaryGroupIdsWork.class);
Page highCardinalityDictionaryPage = new Page(bigintDictionaryBlock, bigintBigDictionaryBlock);
assertGroupByHashWork(highCardinalityDictionaryPage, ImmutableList.of(BIGINT, VARCHAR), MultiChannelGroupByHash.GetNonDictionaryGroupIdsWork.class);
// Cardinality above Short.MAX_VALUE
Page lowCardinalityHugeDictionaryPage = new Page(bigintSingletonDictionaryBlock, bigintHugeDictionaryBlock);
assertGroupByHashWork(lowCardinalityHugeDictionaryPage, ImmutableList.of(BIGINT, BIGINT), MultiChannelGroupByHash.GetNonDictionaryGroupIdsWork.class);
}
use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.
the class SliceDirectColumnReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the length reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but length stream is missing");
}
long dataSkipSize = lengthStream.sum(readOffset);
if (dataSkipSize > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
dataStream.skip(dataSkipSize);
}
}
}
if (lengthStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
Block nullValueBlock = readAllNullsBlock();
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
// create new isNullVector and offsetVector for VariableWidthBlock
boolean[] isNullVector = null;
// We will use the offsetVector as the buffer to read the length values from lengthStream,
// and the length values will be converted in-place to an offset vector.
int[] offsetVector = new int[nextBatchSize + 1];
if (presentStream == null) {
lengthStream.next(offsetVector, nextBatchSize);
} else {
isNullVector = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNullVector);
if (nullCount == nextBatchSize) {
// all nulls
Block nullValueBlock = readAllNullsBlock();
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
if (lengthStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but length stream is missing");
}
if (nullCount == 0) {
isNullVector = null;
lengthStream.next(offsetVector, nextBatchSize);
} else {
lengthStream.next(offsetVector, nextBatchSize - nullCount);
unpackLengthNulls(offsetVector, isNullVector, nextBatchSize - nullCount);
}
}
// Calculate the total length for all entries. Note that the values in the offsetVector are still length values now.
long totalLength = 0;
for (int i = 0; i < nextBatchSize; i++) {
totalLength += offsetVector[i];
}
int currentBatchSize = nextBatchSize;
readOffset = 0;
nextBatchSize = 0;
if (totalLength == 0) {
return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector));
}
if (totalLength > ONE_GIGABYTE) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, format("Values in column \"%s\" are too large to process for Trino. %s column values are larger than 1GB [%s]", column.getPath(), nextBatchSize, column.getOrcDataSourceId()));
}
if (dataStream == null) {
throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
// allocate enough space to read
byte[] data = new byte[toIntExact(totalLength)];
Slice slice = Slices.wrappedBuffer(data);
if (maxCodePointCount < 0) {
// unbounded, simply read all data in on shot
dataStream.next(data, 0, data.length);
convertLengthVectorToOffsetVector(offsetVector);
} else {
// We do the following operations together in the for loop:
// * truncate strings
// * convert original length values in offsetVector into truncated offset values
int currentLength = offsetVector[0];
offsetVector[0] = 0;
for (int i = 1; i <= currentBatchSize; i++) {
int nextLength = offsetVector[i];
if (isNullVector != null && isNullVector[i - 1]) {
checkState(currentLength == 0, "Corruption in slice direct stream: length is non-zero for null entry");
offsetVector[i] = offsetVector[i - 1];
currentLength = nextLength;
continue;
}
int offset = offsetVector[i - 1];
// read data without truncation
dataStream.next(data, offset, offset + currentLength);
// adjust offsetVector with truncated length
int truncatedLength = computeTruncatedLength(slice, offset, currentLength, maxCodePointCount, isCharType);
verify(truncatedLength >= 0);
offsetVector[i] = offset + truncatedLength;
currentLength = nextLength;
}
}
// this can lead to over-retention but unlikely to happen given truncation rarely happens
return new VariableWidthBlock(currentBatchSize, slice, offsetVector, Optional.ofNullable(isNullVector));
}
use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.
the class TestVariableWidthBlock method testCompactBlock.
@Test
public void testCompactBlock() {
Slice compactSlice = Slices.copyOf(createExpectedValue(16));
Slice incompactSlice = Slices.copyOf(createExpectedValue(20)).slice(0, 16);
int[] offsets = { 0, 1, 1, 2, 4, 8, 16 };
boolean[] valueIsNull = { false, true, false, false, false, false };
testCompactBlock(new VariableWidthBlock(0, EMPTY_SLICE, new int[1], Optional.empty()));
testCompactBlock(new VariableWidthBlock(valueIsNull.length, compactSlice, offsets, Optional.of(valueIsNull)));
testIncompactBlock(new VariableWidthBlock(valueIsNull.length - 1, compactSlice, offsets, Optional.of(valueIsNull)));
// underlying slice is not compact
testIncompactBlock(new VariableWidthBlock(valueIsNull.length, incompactSlice, offsets, Optional.of(valueIsNull)));
}
use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.
the class TestDictionaryBlock method testNestedDictionarySizes.
@Test
public void testNestedDictionarySizes() {
// fixed width block
Block fixedWidthBlock = new IntArrayBlock(100, Optional.empty(), IntStream.range(0, 100).toArray());
assertDictionarySizeMethods(fixedWidthBlock);
assertDictionarySizeMethods(new DictionaryBlock(fixedWidthBlock, IntStream.range(0, 50).toArray()));
assertDictionarySizeMethods(new DictionaryBlock(new DictionaryBlock(fixedWidthBlock, IntStream.range(0, 50).toArray()), IntStream.range(0, 10).toArray()));
// variable width block
Block variableWidthBlock = createSlicesBlock(createExpectedValues(100));
assertDictionarySizeMethods(variableWidthBlock);
assertDictionarySizeMethods(new DictionaryBlock(variableWidthBlock, IntStream.range(0, 50).toArray()));
assertDictionarySizeMethods(new DictionaryBlock(new DictionaryBlock(variableWidthBlock, IntStream.range(0, 50).toArray()), IntStream.range(0, 10).toArray()));
}
use of io.trino.spi.block.VariableWidthBlock in project trino by trinodb.
the class SliceData method toBlock.
@Override
public Block toBlock(Type desiredType) {
checkArgument(desiredType.getJavaType() == Slice.class, "type doesn't match: %s", desiredType);
Slice values = bytes == null ? Slices.EMPTY_SLICE : Slices.wrappedBuffer(bytes);
int numberOfRecords = numberOfRecords();
return new VariableWidthBlock(numberOfRecords, values, calculateOffsets(sizes, nulls, numberOfRecords), Optional.ofNullable(nulls));
}
Aggregations