Search in sources :

Example 1 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class HiveUpdateProcessor method createMergedColumnsBlock.

/**
 * @param page The first block in the page is a RowBlock, containing the three ACID
 * columns - - originalTransaction, bucket and rowId - - plus a RowBlock containing
 * the values of non-updated columns. The remaining blocks are the values of the updated
 * columns, whose offsets given by columnValueAndRowIdChannels
 * @return The RowBlock for updated and non-updated columns
 */
public Block createMergedColumnsBlock(Page page, List<Integer> columnValueAndRowIdChannels) {
    requireNonNull(page, "page is null");
    ColumnarRow acidBlock = getAcidBlock(page, columnValueAndRowIdChannels);
    int fieldCount = acidBlock.getFieldCount();
    List<Block> nonUpdatedColumnRowBlocks;
    if (nonUpdatedColumns.isEmpty()) {
        checkArgument(fieldCount == 3, "The ACID block must contain 3 children, but instead had %s children", fieldCount);
        nonUpdatedColumnRowBlocks = ImmutableList.of();
    } else {
        checkArgument(fieldCount == 4, "The first RowBlock must contain 4 children, but instead had %s children", fieldCount);
        Block lastAcidBlock = acidBlock.getField(3);
        checkArgument(lastAcidBlock instanceof RowBlock, "The last block in the acidBlock must be a RowBlock, but instead was %s", lastAcidBlock);
        ColumnarRow nonUpdatedColumnRow = toColumnarRow(lastAcidBlock);
        ImmutableList.Builder<Block> builder = ImmutableList.builder();
        for (int field = 0; field < nonUpdatedColumnRow.getFieldCount(); field++) {
            builder.add(nonUpdatedColumnRow.getField(field));
        }
        nonUpdatedColumnRowBlocks = builder.build();
    }
    // Merge the non-updated and updated column blocks
    Block[] dataColumnBlocks = new Block[allDataColumns.size()];
    int targetColumnChannel = 0;
    int nonUpdatedColumnChannel = 0;
    int updatedColumnNumber = 0;
    for (HiveColumnHandle column : allDataColumns) {
        Block block;
        if (nonUpdatedColumnNames.contains(column.getName())) {
            block = nonUpdatedColumnRowBlocks.get(nonUpdatedColumnChannel);
            nonUpdatedColumnChannel++;
        } else {
            int index = columnValueAndRowIdChannels.get(updatedColumnNumber);
            block = page.getBlock(index);
            updatedColumnNumber++;
        }
        dataColumnBlocks[targetColumnChannel] = block;
        targetColumnChannel++;
    }
    return RowBlock.fromFieldBlocks(page.getPositionCount(), Optional.empty(), dataColumnBlocks);
}
Also used : ColumnarRow.toColumnarRow(io.trino.spi.block.ColumnarRow.toColumnarRow) ColumnarRow(io.trino.spi.block.ColumnarRow) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Block(io.trino.spi.block.Block) RowBlock(io.trino.spi.block.RowBlock) RowBlock(io.trino.spi.block.RowBlock)

Example 2 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class UnionColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            readOffset = presentStream.countBitsSet(readOffset);
        }
        if (readOffset > 0) {
            if (dataStream == null) {
                throw new OrcCorruptionException(column.getOrcDataSourceId(), "Value is not null but data stream is missing");
            }
            int[] readOffsets = new int[fieldReaders.size()];
            for (byte tag : dataStream.next(readOffset)) {
                readOffsets[tag]++;
            }
            for (int i = 0; i < fieldReaders.size(); i++) {
                fieldReaders.get(i).prepareNextRead(readOffsets[i]);
            }
        }
    }
    boolean[] nullVector = null;
    Block[] blocks;
    if (presentStream == null) {
        blocks = getBlocks(nextBatchSize);
    } else {
        nullVector = new boolean[nextBatchSize];
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            blocks = getBlocks(nextBatchSize - nullValues);
        } else {
            List<Type> typeParameters = type.getTypeParameters();
            blocks = new Block[typeParameters.size() + 1];
            blocks[0] = TINYINT.createBlockBuilder(null, 0).build();
            for (int i = 0; i < typeParameters.size(); i++) {
                blocks[i + 1] = typeParameters.get(i).createBlockBuilder(null, 0).build();
            }
        }
    }
    verify(Arrays.stream(blocks).mapToInt(Block::getPositionCount).distinct().count() == 1);
    Block rowBlock = RowBlock.fromFieldBlocks(nextBatchSize, Optional.ofNullable(nullVector), blocks);
    readOffset = 0;
    nextBatchSize = 0;
    return rowBlock;
}
Also used : Type(io.trino.spi.type.Type) RowType(io.trino.spi.type.RowType) ReaderUtils.verifyStreamType(io.trino.orc.reader.ReaderUtils.verifyStreamType) LazyBlock(io.trino.spi.block.LazyBlock) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) RowBlock(io.trino.spi.block.RowBlock) ByteArrayBlock(io.trino.spi.block.ByteArrayBlock) OrcCorruptionException(io.trino.orc.OrcCorruptionException)

Example 3 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class StructColumnReader method readBlock.

@Override
public Block readBlock() throws IOException {
    if (!rowGroupOpen) {
        openRowGroup();
    }
    if (readOffset > 0) {
        if (presentStream != null) {
            // skip ahead the present bit reader, but count the set bits
            // and use this as the skip size for the field readers
            readOffset = presentStream.countBitsSet(readOffset);
        }
        for (ColumnReader structField : structFields.values()) {
            structField.prepareNextRead(readOffset);
        }
    }
    boolean[] nullVector = null;
    Block[] blocks;
    if (presentStream == null) {
        blocks = getBlocksForType(nextBatchSize);
    } else {
        nullVector = new boolean[nextBatchSize];
        int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector);
        if (nullValues != nextBatchSize) {
            blocks = getBlocksForType(nextBatchSize - nullValues);
        } else {
            List<Type> typeParameters = type.getTypeParameters();
            blocks = new Block[typeParameters.size()];
            for (int i = 0; i < typeParameters.size(); i++) {
                blocks[i] = typeParameters.get(i).createBlockBuilder(null, 0).build();
            }
        }
    }
    verify(Arrays.stream(blocks).mapToInt(Block::getPositionCount).distinct().count() == 1);
    // Struct is represented as a row block
    Block rowBlock = RowBlock.fromFieldBlocks(nextBatchSize, Optional.ofNullable(nullVector), blocks);
    readOffset = 0;
    nextBatchSize = 0;
    return rowBlock;
}
Also used : Type(io.trino.spi.type.Type) RowType(io.trino.spi.type.RowType) ReaderUtils.verifyStreamType(io.trino.orc.reader.ReaderUtils.verifyStreamType) Block(io.trino.spi.block.Block) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) RowBlock(io.trino.spi.block.RowBlock) ColumnReaders.createColumnReader(io.trino.orc.reader.ColumnReaders.createColumnReader)

Example 4 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class TestStructColumnReader method testReaderLowerCasesFieldNamesFromStream.

/**
 * The writer has fields with upper case characters, reader has same names downcased.
 */
@Test
public void testReaderLowerCasesFieldNamesFromStream() throws IOException {
    List<String> readerFields = new ArrayList<>(Arrays.asList("field_a", "field_b", "field_c"));
    List<String> writerFields = new ArrayList<>(Arrays.asList("field_A", "field_B", "field_C"));
    List<String> writerData = new ArrayList<>(Arrays.asList("fieldAValue", "fieldBValue", "fieldCValue"));
    Type readerType = getType(readerFields);
    Type writerType = getType(writerFields);
    write(tempFile, writerType, writerData);
    RowBlock readBlock = read(tempFile, readerType);
    List<?> actual = (List<?>) readerType.getObjectValue(TestingConnectorSession.SESSION, readBlock, 0);
    assertEquals(actual.size(), readerFields.size());
    assertEquals(actual.get(0), "fieldAValue");
    assertEquals(actual.get(1), "fieldBValue");
    assertEquals(actual.get(2), "fieldCValue");
}
Also used : Type(io.trino.spi.type.Type) OrcType(io.trino.orc.metadata.OrcType) ArrayList(java.util.ArrayList) RowBlock(io.trino.spi.block.RowBlock) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.testng.annotations.Test)

Example 5 with RowBlock

use of io.trino.spi.block.RowBlock in project trino by trinodb.

the class TestStructColumnReader method read.

private RowBlock read(TempFile tempFile, Type readerType) throws IOException {
    OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), READER_OPTIONS);
    OrcReader orcReader = OrcReader.createOrcReader(orcDataSource, READER_OPTIONS).orElseThrow(() -> new RuntimeException("File is empty"));
    OrcRecordReader recordReader = orcReader.createRecordReader(orcReader.getRootColumn().getNestedColumns(), ImmutableList.of(readerType), OrcPredicate.TRUE, UTC, newSimpleAggregatedMemoryContext(), OrcReader.INITIAL_BATCH_SIZE, RuntimeException::new);
    RowBlock block = (RowBlock) recordReader.nextPage().getLoadedPage().getBlock(0);
    recordReader.close();
    return block;
}
Also used : RowBlock(io.trino.spi.block.RowBlock)

Aggregations

RowBlock (io.trino.spi.block.RowBlock)14 Block (io.trino.spi.block.Block)8 Type (io.trino.spi.type.Type)8 ImmutableList (com.google.common.collect.ImmutableList)6 OrcType (io.trino.orc.metadata.OrcType)6 ArrayList (java.util.ArrayList)6 Test (org.testng.annotations.Test)6 List (java.util.List)5 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)4 Page (io.trino.spi.Page)3 ReaderUtils.verifyStreamType (io.trino.orc.reader.ReaderUtils.verifyStreamType)2 ColumnarRow (io.trino.spi.block.ColumnarRow)2 ColumnarRow.toColumnarRow (io.trino.spi.block.ColumnarRow.toColumnarRow)2 LazyBlock (io.trino.spi.block.LazyBlock)2 RowType (io.trino.spi.type.RowType)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 OrcCorruptionException (io.trino.orc.OrcCorruptionException)1 ColumnReaders.createColumnReader (io.trino.orc.reader.ColumnReaders.createColumnReader)1 Field (io.trino.parquet.Field)1