Search in sources :

Example 36 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class HiveUpdatablePageSource method deleteRowsInternal.

private void deleteRowsInternal(ColumnarRow columnarRow) {
    int positionCount = columnarRow.getPositionCount();
    if (columnarRow.mayHaveNull()) {
        for (int position = 0; position < positionCount; position++) {
            checkArgument(!columnarRow.isNull(position), "In the delete rowIds, found null row at position %s", position);
        }
    }
    Block originalTransactionChannel = columnarRow.getField(ORIGINAL_TRANSACTION_CHANNEL);
    Block[] blockArray = { new RunLengthEncodedBlock(DELETE_OPERATION_BLOCK, positionCount), originalTransactionChannel, columnarRow.getField(BUCKET_CHANNEL), columnarRow.getField(ROW_ID_CHANNEL), RunLengthEncodedBlock.create(BIGINT, writeId, positionCount), new RunLengthEncodedBlock(hiveRowTypeNullsBlock, positionCount) };
    Page deletePage = new Page(blockArray);
    for (int index = 0; index < positionCount; index++) {
        maxWriteId = Math.max(maxWriteId, originalTransactionChannel.getLong(index, 0));
    }
    lazyInitializeDeleteFileWriter();
    deleteFileWriter.orElseThrow(() -> new IllegalArgumentException("deleteFileWriter not present")).appendRows(deletePage);
    rowCount += positionCount;
}
Also used : Utils.nativeValueToBlock(io.trino.spi.predicate.Utils.nativeValueToBlock) Block(io.trino.spi.block.Block) LongArrayBlock(io.trino.spi.block.LongArrayBlock) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) Page(io.trino.spi.Page) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock)

Example 37 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class HivePageSink method writePage.

private void writePage(Page page) {
    int[] writerIndexes = getWriterIndexes(page);
    // position count for each writer
    int[] sizes = new int[writers.size()];
    for (int index : writerIndexes) {
        sizes[index]++;
    }
    // record which positions are used by which writer
    int[][] writerPositions = new int[writers.size()][];
    int[] counts = new int[writers.size()];
    for (int position = 0; position < page.getPositionCount(); position++) {
        int index = writerIndexes[position];
        int count = counts[index];
        if (count == 0) {
            writerPositions[index] = new int[sizes[index]];
        }
        writerPositions[index][count] = position;
        counts[index] = count + 1;
    }
    // invoke the writers
    Page dataPage = getDataPage(page);
    for (int index = 0; index < writerPositions.length; index++) {
        int[] positions = writerPositions[index];
        if (positions == null) {
            continue;
        }
        // If write is partitioned across multiple writers, filter page using dictionary blocks
        Page pageForWriter = dataPage;
        if (positions.length != dataPage.getPositionCount()) {
            verify(positions.length == counts[index]);
            pageForWriter = pageForWriter.getPositions(positions, 0, positions.length);
        }
        HiveWriter writer = writers.get(index);
        long currentWritten = writer.getWrittenBytes();
        long currentMemory = writer.getMemoryUsage();
        writer.append(pageForWriter);
        writtenBytes += (writer.getWrittenBytes() - currentWritten);
        memoryUsage += (writer.getMemoryUsage() - currentMemory);
    }
}
Also used : Page(io.trino.spi.Page)

Example 38 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class HivePageSink method getWriterIndexes.

private int[] getWriterIndexes(Page page) {
    Page partitionColumns = extractColumns(page, partitionColumnsInputIndex);
    Block bucketBlock = buildBucketBlock(page);
    int[] writerIndexes = pagePartitioner.partitionPage(partitionColumns, bucketBlock);
    if (pagePartitioner.getMaxIndex() >= maxOpenWriters) {
        throw new TrinoException(HIVE_TOO_MANY_OPEN_PARTITIONS, format("Exceeded limit of %s open writers for partitions/buckets", maxOpenWriters));
    }
    // expand writers list to new size
    while (writers.size() <= pagePartitioner.getMaxIndex()) {
        writers.add(null);
    }
    // create missing writers
    for (int position = 0; position < page.getPositionCount(); position++) {
        int writerIndex = writerIndexes[position];
        HiveWriter writer = writers.get(writerIndex);
        if (writer != null) {
            // and file names have no random component (e.g. bucket_00000)
            if (bucketFunction != null || isTransactional || writer.getWrittenBytes() <= targetMaxFileSize) {
                continue;
            }
            // close current writer
            closeWriter(writer);
        }
        OptionalInt bucketNumber = OptionalInt.empty();
        if (bucketBlock != null) {
            bucketNumber = OptionalInt.of(bucketBlock.getInt(position, 0));
        }
        writer = writerFactory.createWriter(partitionColumns, position, bucketNumber);
        writers.set(writerIndex, writer);
    }
    verify(writers.size() == pagePartitioner.getMaxIndex() + 1);
    verify(!writers.contains(null));
    return writerIndexes;
}
Also used : Block(io.trino.spi.block.Block) TrinoException(io.trino.spi.TrinoException) Page(io.trino.spi.Page) OptionalInt(java.util.OptionalInt)

Example 39 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class OrcPageSource method getNextPage.

@Override
public Page getNextPage() {
    Page page;
    try {
        if (outstandingPage.isPresent()) {
            page = outstandingPage.get();
            outstandingPage = Optional.empty();
            // Mark no bytes consumed by outstandingPage.
            // We can reset it again below if deletedRows loading yields again.
            // In such case the brief period when it is set to 0 will not be observed externally as
            // page source memory usage is only read by engine after call to getNextPage completes.
            localMemoryContext.setBytes(0);
        } else {
            page = recordReader.nextPage();
        }
    } catch (IOException | RuntimeException e) {
        closeAllSuppress(e, this);
        throw handleException(orcDataSource.getId(), e);
    }
    if (page == null) {
        close();
        return null;
    }
    completedPositions += page.getPositionCount();
    OptionalLong startRowId = originalFileRowId.isPresent() ? OptionalLong.of(originalFileRowId.get() + recordReader.getFilePosition()) : OptionalLong.empty();
    if (deletedRows.isPresent()) {
        boolean deletedRowsYielded = !deletedRows.get().loadOrYield();
        if (deletedRowsYielded) {
            outstandingPage = Optional.of(page);
            localMemoryContext.setBytes(page.getRetainedSizeInBytes());
            // return control to engine so it can update memory usage for query
            return null;
        }
    }
    MaskDeletedRowsFunction maskDeletedRowsFunction = deletedRows.map(deletedRows -> deletedRows.getMaskDeletedRowsFunction(page, startRowId)).orElseGet(() -> MaskDeletedRowsFunction.noMaskForPage(page));
    return getColumnAdaptationsPage(page, maskDeletedRowsFunction, recordReader.getFilePosition());
}
Also used : HiveUpdateProcessor(io.trino.plugin.hive.HiveUpdateProcessor) OrcFileWriter.computeBucketValue(io.trino.plugin.hive.orc.OrcFileWriter.computeBucketValue) Type(io.trino.spi.type.Type) ROW_ID_CHANNEL(io.trino.plugin.hive.HiveUpdatablePageSource.ROW_ID_CHANNEL) Page(io.trino.spi.Page) Utils.nativeValueToBlock(io.trino.spi.predicate.Utils.nativeValueToBlock) LazyBlock(io.trino.spi.block.LazyBlock) OptionalLong(java.util.OptionalLong) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ORIGINAL_TRANSACTION_CHANNEL(io.trino.plugin.hive.HiveUpdatablePageSource.ORIGINAL_TRANSACTION_CHANNEL) OrcRecordReader(io.trino.orc.OrcRecordReader) ImmutableList(com.google.common.collect.ImmutableList) Closer(com.google.common.io.Closer) Block(io.trino.spi.block.Block) Objects.requireNonNull(java.util.Objects.requireNonNull) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) OrcDataSource(io.trino.orc.OrcDataSource) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HIVE_CURSOR_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_CURSOR_ERROR) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) LongArrayBlock(io.trino.spi.block.LongArrayBlock) OrcType(io.trino.orc.metadata.OrcType) MaskDeletedRowsFunction(io.trino.plugin.hive.orc.OrcDeletedRows.MaskDeletedRowsFunction) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) HIVE_BAD_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) Closables.closeAllSuppress(io.trino.plugin.base.util.Closables.closeAllSuppress) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) RowBlock.fromFieldBlocks(io.trino.spi.block.RowBlock.fromFieldBlocks) BUCKET_CHANNEL(io.trino.plugin.hive.HiveUpdatablePageSource.BUCKET_CHANNEL) LazyBlockLoader(io.trino.spi.block.LazyBlockLoader) TrinoException(io.trino.spi.TrinoException) IOException(java.io.IOException) ColumnMetadata(io.trino.orc.metadata.ColumnMetadata) OrcCorruptionException(io.trino.orc.OrcCorruptionException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) UncheckedIOException(java.io.UncheckedIOException) OrcDataSourceId(io.trino.orc.OrcDataSourceId) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) LocalMemoryContext(io.trino.memory.context.LocalMemoryContext) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) MaskDeletedRowsFunction(io.trino.plugin.hive.orc.OrcDeletedRows.MaskDeletedRowsFunction) OptionalLong(java.util.OptionalLong) Page(io.trino.spi.Page) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException)

Example 40 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class ParquetPageSource method getNextPage.

@Override
public Page getNextPage() {
    try {
        batchId++;
        int batchSize = parquetReader.nextBatch();
        if (closed || batchSize <= 0) {
            close();
            return null;
        }
        completedPositions += batchSize;
        Block[] blocks = new Block[fields.size()];
        for (int column = 0; column < blocks.length; column++) {
            if (isIndexColumn(column)) {
                blocks[column] = getRowIndexColumn(parquetReader.lastBatchStartRow(), batchSize);
            } else {
                Type type = types.get(column);
                blocks[column] = fields.get(column).<Block>map(field -> new LazyBlock(batchSize, new ParquetBlockLoader(field))).orElseGet(() -> RunLengthEncodedBlock.create(type, null, batchSize));
            }
        }
        return new Page(batchSize, blocks);
    } catch (TrinoException e) {
        closeAllSuppress(e, this);
        throw e;
    } catch (RuntimeException e) {
        closeAllSuppress(e, this);
        throw new TrinoException(HIVE_CURSOR_ERROR, e);
    }
}
Also used : Type(io.trino.spi.type.Type) LazyBlock(io.trino.spi.block.LazyBlock) LazyBlock(io.trino.spi.block.LazyBlock) Block(io.trino.spi.block.Block) LongArrayBlock(io.trino.spi.block.LongArrayBlock) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) TrinoException(io.trino.spi.TrinoException) Page(io.trino.spi.Page)

Aggregations

Page (io.trino.spi.Page)579 Test (org.testng.annotations.Test)334 Block (io.trino.spi.block.Block)153 Type (io.trino.spi.type.Type)127 MaterializedResult (io.trino.testing.MaterializedResult)109 PlanNodeId (io.trino.sql.planner.plan.PlanNodeId)91 RowPagesBuilder (io.trino.RowPagesBuilder)72 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)68 ImmutableList (com.google.common.collect.ImmutableList)65 ArrayList (java.util.ArrayList)48 BlockBuilder (io.trino.spi.block.BlockBuilder)46 Optional (java.util.Optional)43 TaskContext (io.trino.operator.TaskContext)42 TestingTaskContext (io.trino.testing.TestingTaskContext)41 List (java.util.List)41 DictionaryBlock (io.trino.spi.block.DictionaryBlock)38 OperatorAssertion.toMaterializedResult (io.trino.operator.OperatorAssertion.toMaterializedResult)37 Slice (io.airlift.slice.Slice)36 OperatorFactory (io.trino.operator.OperatorFactory)32 LazyBlock (io.trino.spi.block.LazyBlock)32