use of io.trino.spi.Page in project trino by trinodb.
the class HiveUpdatablePageSource method deleteRowsInternal.
private void deleteRowsInternal(ColumnarRow columnarRow) {
int positionCount = columnarRow.getPositionCount();
if (columnarRow.mayHaveNull()) {
for (int position = 0; position < positionCount; position++) {
checkArgument(!columnarRow.isNull(position), "In the delete rowIds, found null row at position %s", position);
}
}
Block originalTransactionChannel = columnarRow.getField(ORIGINAL_TRANSACTION_CHANNEL);
Block[] blockArray = { new RunLengthEncodedBlock(DELETE_OPERATION_BLOCK, positionCount), originalTransactionChannel, columnarRow.getField(BUCKET_CHANNEL), columnarRow.getField(ROW_ID_CHANNEL), RunLengthEncodedBlock.create(BIGINT, writeId, positionCount), new RunLengthEncodedBlock(hiveRowTypeNullsBlock, positionCount) };
Page deletePage = new Page(blockArray);
for (int index = 0; index < positionCount; index++) {
maxWriteId = Math.max(maxWriteId, originalTransactionChannel.getLong(index, 0));
}
lazyInitializeDeleteFileWriter();
deleteFileWriter.orElseThrow(() -> new IllegalArgumentException("deleteFileWriter not present")).appendRows(deletePage);
rowCount += positionCount;
}
use of io.trino.spi.Page in project trino by trinodb.
the class HivePageSink method writePage.
private void writePage(Page page) {
int[] writerIndexes = getWriterIndexes(page);
// position count for each writer
int[] sizes = new int[writers.size()];
for (int index : writerIndexes) {
sizes[index]++;
}
// record which positions are used by which writer
int[][] writerPositions = new int[writers.size()][];
int[] counts = new int[writers.size()];
for (int position = 0; position < page.getPositionCount(); position++) {
int index = writerIndexes[position];
int count = counts[index];
if (count == 0) {
writerPositions[index] = new int[sizes[index]];
}
writerPositions[index][count] = position;
counts[index] = count + 1;
}
// invoke the writers
Page dataPage = getDataPage(page);
for (int index = 0; index < writerPositions.length; index++) {
int[] positions = writerPositions[index];
if (positions == null) {
continue;
}
// If write is partitioned across multiple writers, filter page using dictionary blocks
Page pageForWriter = dataPage;
if (positions.length != dataPage.getPositionCount()) {
verify(positions.length == counts[index]);
pageForWriter = pageForWriter.getPositions(positions, 0, positions.length);
}
HiveWriter writer = writers.get(index);
long currentWritten = writer.getWrittenBytes();
long currentMemory = writer.getMemoryUsage();
writer.append(pageForWriter);
writtenBytes += (writer.getWrittenBytes() - currentWritten);
memoryUsage += (writer.getMemoryUsage() - currentMemory);
}
}
use of io.trino.spi.Page in project trino by trinodb.
the class HivePageSink method getWriterIndexes.
private int[] getWriterIndexes(Page page) {
Page partitionColumns = extractColumns(page, partitionColumnsInputIndex);
Block bucketBlock = buildBucketBlock(page);
int[] writerIndexes = pagePartitioner.partitionPage(partitionColumns, bucketBlock);
if (pagePartitioner.getMaxIndex() >= maxOpenWriters) {
throw new TrinoException(HIVE_TOO_MANY_OPEN_PARTITIONS, format("Exceeded limit of %s open writers for partitions/buckets", maxOpenWriters));
}
// expand writers list to new size
while (writers.size() <= pagePartitioner.getMaxIndex()) {
writers.add(null);
}
// create missing writers
for (int position = 0; position < page.getPositionCount(); position++) {
int writerIndex = writerIndexes[position];
HiveWriter writer = writers.get(writerIndex);
if (writer != null) {
// and file names have no random component (e.g. bucket_00000)
if (bucketFunction != null || isTransactional || writer.getWrittenBytes() <= targetMaxFileSize) {
continue;
}
// close current writer
closeWriter(writer);
}
OptionalInt bucketNumber = OptionalInt.empty();
if (bucketBlock != null) {
bucketNumber = OptionalInt.of(bucketBlock.getInt(position, 0));
}
writer = writerFactory.createWriter(partitionColumns, position, bucketNumber);
writers.set(writerIndex, writer);
}
verify(writers.size() == pagePartitioner.getMaxIndex() + 1);
verify(!writers.contains(null));
return writerIndexes;
}
use of io.trino.spi.Page in project trino by trinodb.
the class OrcPageSource method getNextPage.
@Override
public Page getNextPage() {
Page page;
try {
if (outstandingPage.isPresent()) {
page = outstandingPage.get();
outstandingPage = Optional.empty();
// Mark no bytes consumed by outstandingPage.
// We can reset it again below if deletedRows loading yields again.
// In such case the brief period when it is set to 0 will not be observed externally as
// page source memory usage is only read by engine after call to getNextPage completes.
localMemoryContext.setBytes(0);
} else {
page = recordReader.nextPage();
}
} catch (IOException | RuntimeException e) {
closeAllSuppress(e, this);
throw handleException(orcDataSource.getId(), e);
}
if (page == null) {
close();
return null;
}
completedPositions += page.getPositionCount();
OptionalLong startRowId = originalFileRowId.isPresent() ? OptionalLong.of(originalFileRowId.get() + recordReader.getFilePosition()) : OptionalLong.empty();
if (deletedRows.isPresent()) {
boolean deletedRowsYielded = !deletedRows.get().loadOrYield();
if (deletedRowsYielded) {
outstandingPage = Optional.of(page);
localMemoryContext.setBytes(page.getRetainedSizeInBytes());
// return control to engine so it can update memory usage for query
return null;
}
}
MaskDeletedRowsFunction maskDeletedRowsFunction = deletedRows.map(deletedRows -> deletedRows.getMaskDeletedRowsFunction(page, startRowId)).orElseGet(() -> MaskDeletedRowsFunction.noMaskForPage(page));
return getColumnAdaptationsPage(page, maskDeletedRowsFunction, recordReader.getFilePosition());
}
use of io.trino.spi.Page in project trino by trinodb.
the class ParquetPageSource method getNextPage.
@Override
public Page getNextPage() {
try {
batchId++;
int batchSize = parquetReader.nextBatch();
if (closed || batchSize <= 0) {
close();
return null;
}
completedPositions += batchSize;
Block[] blocks = new Block[fields.size()];
for (int column = 0; column < blocks.length; column++) {
if (isIndexColumn(column)) {
blocks[column] = getRowIndexColumn(parquetReader.lastBatchStartRow(), batchSize);
} else {
Type type = types.get(column);
blocks[column] = fields.get(column).<Block>map(field -> new LazyBlock(batchSize, new ParquetBlockLoader(field))).orElseGet(() -> RunLengthEncodedBlock.create(type, null, batchSize));
}
}
return new Page(batchSize, blocks);
} catch (TrinoException e) {
closeAllSuppress(e, this);
throw e;
} catch (RuntimeException e) {
closeAllSuppress(e, this);
throw new TrinoException(HIVE_CURSOR_ERROR, e);
}
}
Aggregations