use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.
the class SliceDirectBatchStreamReader method readBlock.
@Override
public Block readBlock() throws IOException {
if (!rowGroupOpen) {
openRowGroup();
}
if (readOffset > 0) {
if (presentStream != null) {
// skip ahead the present bit reader, but count the set bits
// and use this as the skip size for the length reader
readOffset = presentStream.countBitsSet(readOffset);
}
if (readOffset > 0) {
if (lengthStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but length stream is not present");
}
long dataSkipSize = lengthStream.sum(readOffset);
if (dataSkipSize > 0) {
if (dataStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
}
dataStream.skip(dataSkipSize);
}
}
}
if (lengthStream == null) {
if (presentStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
}
presentStream.skip(nextBatchSize);
Block nullValueBlock = readAllNullsBlock();
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
// create new isNullVector and offsetVector for VariableWidthBlock
boolean[] isNullVector = null;
// We will use the offsetVector as the buffer to read the length values from lengthStream,
// and the length values will be converted in-place to an offset vector.
int[] offsetVector = new int[nextBatchSize + 1];
if (presentStream == null) {
lengthStream.next(offsetVector, nextBatchSize);
} else {
isNullVector = new boolean[nextBatchSize];
int nullCount = presentStream.getUnsetBits(nextBatchSize, isNullVector);
if (nullCount == nextBatchSize) {
// all nulls
Block nullValueBlock = readAllNullsBlock();
readOffset = 0;
nextBatchSize = 0;
return nullValueBlock;
}
if (nullCount == 0) {
isNullVector = null;
lengthStream.next(offsetVector, nextBatchSize);
} else {
lengthStream.next(offsetVector, nextBatchSize - nullCount);
unpackLengthNulls(offsetVector, isNullVector, nextBatchSize - nullCount);
}
}
// Calculate the total length for all entries. Note that the values in the offsetVector are still length values now.
long totalLength = 0;
for (int i = 0; i < nextBatchSize; i++) {
totalLength += offsetVector[i];
}
int currentBatchSize = nextBatchSize;
readOffset = 0;
nextBatchSize = 0;
if (totalLength == 0) {
return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector));
}
if (totalLength > ONE_GIGABYTE) {
throw new GenericInternalException(format("Values in column \"%s\" are too large to process for Presto. %s column values are larger than 1GB [%s]", streamDescriptor.getFieldName(), currentBatchSize, streamDescriptor.getOrcDataSourceId()));
}
if (dataStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is missing");
}
// allocate enough space to read
byte[] data = new byte[toIntExact(totalLength)];
Slice slice = Slices.wrappedBuffer(data);
if (maxCodePointCount < 0) {
// unbounded, simply read all data in on shot
dataStream.next(data, 0, data.length);
convertLengthVectorToOffsetVector(offsetVector);
} else {
// We do the following operations together in the for loop:
// * truncate strings
// * convert original length values in offsetVector into truncated offset values
int currentLength = offsetVector[0];
offsetVector[0] = 0;
for (int i = 1; i <= currentBatchSize; i++) {
int nextLength = offsetVector[i];
if (isNullVector != null && isNullVector[i - 1]) {
checkState(currentLength == 0, "Corruption in slice direct stream: length is non-zero for null entry");
offsetVector[i] = offsetVector[i - 1];
currentLength = nextLength;
continue;
}
int offset = offsetVector[i - 1];
// read data without truncation
dataStream.next(data, offset, offset + currentLength);
// adjust offsetVector with truncated length
int truncatedLength = computeTruncatedLength(slice, offset, currentLength, maxCodePointCount, isCharType);
verify(truncatedLength >= 0);
offsetVector[i] = offset + truncatedLength;
currentLength = nextLength;
}
}
// this can lead to over-retention but unlikely to happen given truncation rarely happens
return new VariableWidthBlock(currentBatchSize, slice, offsetVector, Optional.ofNullable(isNullVector));
}
use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.
the class SliceData method toBlock.
@Override
public Block toBlock(Type desiredType) {
checkArgument(desiredType.getJavaType() == Slice.class, "type doesn't match: %s", desiredType);
Slice values = bytes == null ? Slices.EMPTY_SLICE : Slices.wrappedBuffer(bytes);
int numberOfRecords = numberOfRecords();
return new VariableWidthBlock(numberOfRecords, values, calculateOffsets(sizes, nulls, numberOfRecords), Optional.ofNullable(nulls));
}
use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.
the class BinaryNestedBatchReader method readNestedNoNull.
@Override
protected ColumnChunk readNestedNoNull() throws IOException {
int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize);
DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
int newBatchSize = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
int valueCount = 0;
for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
}
newBatchSize += valueCount;
valuesDecoderContext.setNonNullCount(valueCount);
valuesDecoderContext.setValueCount(valueCount);
}
List<ValueBuffer> valueBuffers = new ArrayList<>();
int bufferSize = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
ValueBuffer valueBuffer = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(valuesDecoderContext.getNonNullCount());
bufferSize += valueBuffer.getBufferSize();
valueBuffers.add(valueBuffer);
}
byte[] byteBuffer = new byte[bufferSize];
int[] offsets = new int[newBatchSize + 1];
int i = 0;
int bufferIndex = 0;
int offsetIndex = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
ValueBuffer value = valueBuffers.get(i);
bufferIndex = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
offsetIndex += valuesDecoderContext.getValueCount();
i++;
}
Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
Block block = new VariableWidthBlock(newBatchSize, buffer, offsets, Optional.empty());
return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.
the class BinaryFlatBatchReader method readWithoutNull.
private ColumnChunk readWithoutNull() throws IOException {
boolean[] isNull = new boolean[nextBatchSize];
List<ValueBuffer> valueBuffers = new ArrayList<>();
List<ValuesDecoderContext> valuesDecoderContexts = new ArrayList<>();
int bufferSize = 0;
int remainingInBatch = nextBatchSize;
int startOffset = 0;
while (remainingInBatch > 0) {
if (remainingCountInPage == 0) {
if (!readNextPage()) {
break;
}
}
int readChunkSize = Math.min(remainingCountInPage, remainingInBatch);
ValueBuffer valueBuffer = valuesDecoder.readNext(readChunkSize);
bufferSize += valueBuffer.getBufferSize();
valueBuffers.add(valueBuffer);
ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext = new ValuesDecoderContext(valuesDecoder, startOffset, startOffset + readChunkSize);
valuesDecoderContext.setValueCount(readChunkSize);
valuesDecoderContext.setNonNullCount(readChunkSize);
valuesDecoderContexts.add(valuesDecoderContext);
startOffset += readChunkSize;
remainingInBatch -= readChunkSize;
remainingCountInPage -= readChunkSize;
}
byte[] byteBuffer = new byte[bufferSize];
int[] offsets = new int[nextBatchSize + 1];
int i = 0;
int bufferIndex = 0;
int offsetIndex = 0;
for (ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext : valuesDecoderContexts) {
BinaryValuesDecoder binaryValuesDecoder = valuesDecoderContext.getValuesDecoder();
ValueBuffer value = valueBuffers.get(i);
bufferIndex = binaryValuesDecoder.readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
offsetIndex += valuesDecoderContext.getValueCount();
i++;
}
Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
Block block = new VariableWidthBlock(nextBatchSize, buffer, offsets, Optional.of(isNull));
return new ColumnChunk(block, new int[0], new int[0]);
}
use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.
the class TestPinotSegmentPageSource method testMultivaluedType.
@Test
public void testMultivaluedType() throws IOException {
String[] columnNames = { "col1", "col2" };
DataSchema.ColumnDataType[] columnDataTypes = { DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.STRING_ARRAY };
DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes);
String[] stringArray = { "stringVal1", "stringVal2" };
int[] intArray = { 10, 34, 67 };
DataTableBuilder dataTableBuilder = new DataTableBuilder(dataSchema);
dataTableBuilder.startRow();
dataTableBuilder.setColumn(0, intArray);
dataTableBuilder.setColumn(1, stringArray);
dataTableBuilder.finishRow();
DataTable dataTable = dataTableBuilder.build();
PinotSessionProperties pinotSessionProperties = new PinotSessionProperties(pinotConfig);
ConnectorSession session = new TestingConnectorSession(pinotSessionProperties.getSessionProperties());
List<PinotColumnHandle> pinotColumnHandles = ImmutableList.of(new PinotColumnHandle(columnNames[0], PinotColumnUtils.getPrestoTypeFromPinotType(getFieldSpec(columnNames[0], columnDataTypes[0]), false, false), PinotColumnHandle.PinotColumnType.REGULAR), new PinotColumnHandle(columnNames[1], PinotColumnUtils.getPrestoTypeFromPinotType(getFieldSpec(columnNames[1], columnDataTypes[1]), false, false), PinotColumnHandle.PinotColumnType.REGULAR));
PinotSplit mockPinotSplit = new PinotSplit(pinotConnectorId.toString(), PinotSplit.SplitType.SEGMENT, pinotColumnHandles, Optional.empty(), Optional.of("blah"), ImmutableList.of("seg"), Optional.of("host"), getGrpcPort());
PinotSegmentPageSource pinotSegmentPageSource = getPinotSegmentPageSource(session, ImmutableList.of(dataTable), mockPinotSplit, pinotColumnHandles);
Page page = requireNonNull(pinotSegmentPageSource.getNextPage(), "Expected a valid page");
for (int i = 0; i < columnDataTypes.length; i++) {
Block block = page.getBlock(i);
Type type = PinotColumnUtils.getPrestoTypeFromPinotType(getFieldSpec(columnNames[i], columnDataTypes[i]), false, false);
Assert.assertTrue(type instanceof ArrayType, "presto type should be array");
if (((ArrayType) type).getElementType() instanceof IntegerType) {
Assert.assertTrue(block.getBlock(0).getInt(0) == 10, "Array element not matching");
Assert.assertTrue(block.getBlock(0).getInt(1) == 34, "Array element not matching");
Assert.assertTrue(block.getBlock(0).getInt(2) == 67, "Array element not matching");
} else if (((ArrayType) type).getElementType() instanceof VariableWidthType) {
Type type1 = ((ArrayType) type).getElementType();
Assert.assertTrue(block.getBlock(0) instanceof VariableWidthBlock);
VariableWidthBlock variableWidthBlock = (VariableWidthBlock) block.getBlock(0);
Assert.assertTrue("stringVal1".equals(new String(variableWidthBlock.getSlice(0, 0, variableWidthBlock.getSliceLength(0)).getBytes())), "Array element not matching");
Assert.assertTrue("stringVal2".equals(new String(variableWidthBlock.getSlice(1, 0, variableWidthBlock.getSliceLength(1)).getBytes())), "Array element not matching");
}
}
}
Aggregations