Search in sources :

Example 1 with VariableWidthBlock

use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.

the class SliceDirectBatchStreamReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the length reader
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (lengthStream == null) {
                throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but length stream is not present");
            }
            long dataSkipSize = lengthStream.sum(readOffset);
            if (dataSkipSize > 0) {
                if (dataStream == null) {
                    throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is not present");
                }
                dataStream.skip(dataSkipSize);
            }
        }
    }
    if (lengthStream == null) {
        if (presentStream == null) {
            throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is null but present stream is missing");
        }
        presentStream.skip(nextBatchSize);
        Block nullValueBlock = readAllNullsBlock();
        readOffset = 0;
        nextBatchSize = 0;
        return nullValueBlock;
    }
    // create new isNullVector and offsetVector for VariableWidthBlock
    boolean[] isNullVector = null;
    // We will use the offsetVector as the buffer to read the length values from lengthStream,
    // and the length values will be converted in-place to an offset vector.
    int[] offsetVector = new int[nextBatchSize + 1];
    if (presentStream == null) {
        lengthStream.next(offsetVector, nextBatchSize);
    } else {
        isNullVector = new boolean[nextBatchSize];
        int nullCount = presentStream.getUnsetBits(nextBatchSize, isNullVector);
        if (nullCount == nextBatchSize) {
            // all nulls
            Block nullValueBlock = readAllNullsBlock();
            readOffset = 0;
            nextBatchSize = 0;
            return nullValueBlock;
        }
        if (nullCount == 0) {
            isNullVector = null;
            lengthStream.next(offsetVector, nextBatchSize);
        } else {
            lengthStream.next(offsetVector, nextBatchSize - nullCount);
            unpackLengthNulls(offsetVector, isNullVector, nextBatchSize - nullCount);
        }
    }
    // Calculate the total length for all entries. Note that the values in the offsetVector are still length values now.
    long totalLength = 0;
    for (int i = 0; i < nextBatchSize; i++) {
        totalLength += offsetVector[i];
    }
    int currentBatchSize = nextBatchSize;
    readOffset = 0;
    nextBatchSize = 0;
    if (totalLength == 0) {
        return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector));
    }
    if (totalLength > ONE_GIGABYTE) {
        throw new GenericInternalException(format("Values in column \"%s\" are too large to process for Presto. %s column values are larger than 1GB [%s]", streamDescriptor.getFieldName(), currentBatchSize, streamDescriptor.getOrcDataSourceId()));
    }
    if (dataStream == null) {
        throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is missing");
    }
    // allocate enough space to read
    byte[] data = new byte[toIntExact(totalLength)];
    Slice slice = Slices.wrappedBuffer(data);
    if (maxCodePointCount < 0) {
        // unbounded, simply read all data in on shot
        dataStream.next(data, 0, data.length);
        convertLengthVectorToOffsetVector(offsetVector);
    } else {
        // We do the following operations together in the for loop:
        // * truncate strings
        // * convert original length values in offsetVector into truncated offset values
        int currentLength = offsetVector[0];
        offsetVector[0] = 0;
        for (int i = 1; i <= currentBatchSize; i++) {
            int nextLength = offsetVector[i];
            if (isNullVector != null && isNullVector[i - 1]) {
                checkState(currentLength == 0, "Corruption in slice direct stream: length is non-zero for null entry");
                offsetVector[i] = offsetVector[i - 1];
                currentLength = nextLength;
                continue;
            }
            int offset = offsetVector[i - 1];
            // read data without truncation
            dataStream.next(data, offset, offset + currentLength);
            // adjust offsetVector with truncated length
            int truncatedLength = computeTruncatedLength(slice, offset, currentLength, maxCodePointCount, isCharType);
            verify(truncatedLength >= 0);
            offsetVector[i] = offset + truncatedLength;
            currentLength = nextLength;
        }
    }
    // this can lead to over-retention but unlikely to happen given truncation rarely happens
    return new VariableWidthBlock(currentBatchSize, slice, offsetVector, Optional.ofNullable(isNullVector));
}
Also used : GenericInternalException(com.facebook.presto.common.GenericInternalException) Slice(io.airlift.slice.Slice) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) OrcCorruptionException(com.facebook.presto.orc.OrcCorruptionException) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock)

Example 2 with VariableWidthBlock

use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.

the class SliceData method toBlock.

@Override
public Block toBlock(Type desiredType) {
    checkArgument(desiredType.getJavaType() == Slice.class, "type doesn't match: %s", desiredType);
    Slice values = bytes == null ? Slices.EMPTY_SLICE : Slices.wrappedBuffer(bytes);
    int numberOfRecords = numberOfRecords();
    return new VariableWidthBlock(numberOfRecords, values, calculateOffsets(sizes, nulls, numberOfRecords), Optional.ofNullable(nulls));
}
Also used : Slice(io.airlift.slice.Slice) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock)

Example 3 with VariableWidthBlock

use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.

the class BinaryNestedBatchReader method readNestedNoNull.

@Override
protected ColumnChunk readNestedNoNull() throws IOException {
    int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
    RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize);
    DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
    int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
    int newBatchSize = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        int valueCount = 0;
        for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
            valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
        }
        newBatchSize += valueCount;
        valuesDecoderContext.setNonNullCount(valueCount);
        valuesDecoderContext.setValueCount(valueCount);
    }
    List<ValueBuffer> valueBuffers = new ArrayList<>();
    int bufferSize = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        ValueBuffer valueBuffer = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(valuesDecoderContext.getNonNullCount());
        bufferSize += valueBuffer.getBufferSize();
        valueBuffers.add(valueBuffer);
    }
    byte[] byteBuffer = new byte[bufferSize];
    int[] offsets = new int[newBatchSize + 1];
    int i = 0;
    int bufferIndex = 0;
    int offsetIndex = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        ValueBuffer value = valueBuffers.get(i);
        bufferIndex = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
        offsetIndex += valuesDecoderContext.getValueCount();
        i++;
    }
    Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
    Block block = new VariableWidthBlock(newBatchSize, buffer, offsets, Optional.empty());
    return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
Also used : ArrayList(java.util.ArrayList) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk) Slice(io.airlift.slice.Slice) ValueBuffer(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) BinaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)

Example 4 with VariableWidthBlock

use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.

the class BinaryFlatBatchReader method readWithoutNull.

private ColumnChunk readWithoutNull() throws IOException {
    boolean[] isNull = new boolean[nextBatchSize];
    List<ValueBuffer> valueBuffers = new ArrayList<>();
    List<ValuesDecoderContext> valuesDecoderContexts = new ArrayList<>();
    int bufferSize = 0;
    int remainingInBatch = nextBatchSize;
    int startOffset = 0;
    while (remainingInBatch > 0) {
        if (remainingCountInPage == 0) {
            if (!readNextPage()) {
                break;
            }
        }
        int readChunkSize = Math.min(remainingCountInPage, remainingInBatch);
        ValueBuffer valueBuffer = valuesDecoder.readNext(readChunkSize);
        bufferSize += valueBuffer.getBufferSize();
        valueBuffers.add(valueBuffer);
        ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext = new ValuesDecoderContext(valuesDecoder, startOffset, startOffset + readChunkSize);
        valuesDecoderContext.setValueCount(readChunkSize);
        valuesDecoderContext.setNonNullCount(readChunkSize);
        valuesDecoderContexts.add(valuesDecoderContext);
        startOffset += readChunkSize;
        remainingInBatch -= readChunkSize;
        remainingCountInPage -= readChunkSize;
    }
    byte[] byteBuffer = new byte[bufferSize];
    int[] offsets = new int[nextBatchSize + 1];
    int i = 0;
    int bufferIndex = 0;
    int offsetIndex = 0;
    for (ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext : valuesDecoderContexts) {
        BinaryValuesDecoder binaryValuesDecoder = valuesDecoderContext.getValuesDecoder();
        ValueBuffer value = valueBuffers.get(i);
        bufferIndex = binaryValuesDecoder.readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
        offsetIndex += valuesDecoderContext.getValueCount();
        i++;
    }
    Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
    Block block = new VariableWidthBlock(nextBatchSize, buffer, offsets, Optional.of(isNull));
    return new ColumnChunk(block, new int[0], new int[0]);
}
Also used : ArrayList(java.util.ArrayList) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk) Slice(io.airlift.slice.Slice) ValueBuffer(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) BinaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)

Example 5 with VariableWidthBlock

use of com.facebook.presto.common.block.VariableWidthBlock in project presto by prestodb.

the class TestPinotSegmentPageSource method testMultivaluedType.

@Test
public void testMultivaluedType() throws IOException {
    String[] columnNames = { "col1", "col2" };
    DataSchema.ColumnDataType[] columnDataTypes = { DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.STRING_ARRAY };
    DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes);
    String[] stringArray = { "stringVal1", "stringVal2" };
    int[] intArray = { 10, 34, 67 };
    DataTableBuilder dataTableBuilder = new DataTableBuilder(dataSchema);
    dataTableBuilder.startRow();
    dataTableBuilder.setColumn(0, intArray);
    dataTableBuilder.setColumn(1, stringArray);
    dataTableBuilder.finishRow();
    DataTable dataTable = dataTableBuilder.build();
    PinotSessionProperties pinotSessionProperties = new PinotSessionProperties(pinotConfig);
    ConnectorSession session = new TestingConnectorSession(pinotSessionProperties.getSessionProperties());
    List<PinotColumnHandle> pinotColumnHandles = ImmutableList.of(new PinotColumnHandle(columnNames[0], PinotColumnUtils.getPrestoTypeFromPinotType(getFieldSpec(columnNames[0], columnDataTypes[0]), false, false), PinotColumnHandle.PinotColumnType.REGULAR), new PinotColumnHandle(columnNames[1], PinotColumnUtils.getPrestoTypeFromPinotType(getFieldSpec(columnNames[1], columnDataTypes[1]), false, false), PinotColumnHandle.PinotColumnType.REGULAR));
    PinotSplit mockPinotSplit = new PinotSplit(pinotConnectorId.toString(), PinotSplit.SplitType.SEGMENT, pinotColumnHandles, Optional.empty(), Optional.of("blah"), ImmutableList.of("seg"), Optional.of("host"), getGrpcPort());
    PinotSegmentPageSource pinotSegmentPageSource = getPinotSegmentPageSource(session, ImmutableList.of(dataTable), mockPinotSplit, pinotColumnHandles);
    Page page = requireNonNull(pinotSegmentPageSource.getNextPage(), "Expected a valid page");
    for (int i = 0; i < columnDataTypes.length; i++) {
        Block block = page.getBlock(i);
        Type type = PinotColumnUtils.getPrestoTypeFromPinotType(getFieldSpec(columnNames[i], columnDataTypes[i]), false, false);
        Assert.assertTrue(type instanceof ArrayType, "presto type should be array");
        if (((ArrayType) type).getElementType() instanceof IntegerType) {
            Assert.assertTrue(block.getBlock(0).getInt(0) == 10, "Array element not matching");
            Assert.assertTrue(block.getBlock(0).getInt(1) == 34, "Array element not matching");
            Assert.assertTrue(block.getBlock(0).getInt(2) == 67, "Array element not matching");
        } else if (((ArrayType) type).getElementType() instanceof VariableWidthType) {
            Type type1 = ((ArrayType) type).getElementType();
            Assert.assertTrue(block.getBlock(0) instanceof VariableWidthBlock);
            VariableWidthBlock variableWidthBlock = (VariableWidthBlock) block.getBlock(0);
            Assert.assertTrue("stringVal1".equals(new String(variableWidthBlock.getSlice(0, 0, variableWidthBlock.getSliceLength(0)).getBytes())), "Array element not matching");
            Assert.assertTrue("stringVal2".equals(new String(variableWidthBlock.getSlice(1, 0, variableWidthBlock.getSliceLength(1)).getBytes())), "Array element not matching");
        }
    }
}
Also used : DataTable(org.apache.pinot.common.utils.DataTable) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Page(com.facebook.presto.common.Page) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) DataSchema(org.apache.pinot.common.utils.DataSchema) ArrayType(com.facebook.presto.common.type.ArrayType) IntegerType(com.facebook.presto.common.type.IntegerType) VariableWidthType(com.facebook.presto.common.type.VariableWidthType) IntegerType(com.facebook.presto.common.type.IntegerType) ArrayType(com.facebook.presto.common.type.ArrayType) Type(com.facebook.presto.common.type.Type) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorSession(com.facebook.presto.spi.ConnectorSession) VariableWidthType(com.facebook.presto.common.type.VariableWidthType) DataTableBuilder(org.apache.pinot.core.common.datatable.DataTableBuilder) Test(org.testng.annotations.Test)

Aggregations

VariableWidthBlock (com.facebook.presto.common.block.VariableWidthBlock)13 Slice (io.airlift.slice.Slice)8 Block (com.facebook.presto.common.block.Block)7 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)7 BinaryValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)4 ValueBuffer (com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer)4 ColumnChunk (com.facebook.presto.parquet.reader.ColumnChunk)4 ArrayList (java.util.ArrayList)4 Test (org.testng.annotations.Test)2 GenericInternalException (com.facebook.presto.common.GenericInternalException)1 Page (com.facebook.presto.common.Page)1 ArrayType (com.facebook.presto.common.type.ArrayType)1 IntegerType (com.facebook.presto.common.type.IntegerType)1 Type (com.facebook.presto.common.type.Type)1 VariableWidthType (com.facebook.presto.common.type.VariableWidthType)1 OrcCorruptionException (com.facebook.presto.orc.OrcCorruptionException)1 ConnectorSession (com.facebook.presto.spi.ConnectorSession)1 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)1 DynamicSliceOutput (io.airlift.slice.DynamicSliceOutput)1 DataSchema (org.apache.pinot.common.utils.DataSchema)1