use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.
the class TestScanFilterAndProjectOperator method testRecordCursorYield.
@Test
public void testRecordCursorYield() {
// create a generic long function that yields for projection on every row
// verify we will yield #row times totally
// create a table with 15 rows
int length = 15;
Page input = SequencePageBuilder.createSequencePage(ImmutableList.of(BIGINT), length, 0);
DriverContext driverContext = newDriverContext();
// set up generic long function with a callback to force yield
functionAssertions.addFunctions(new InternalFunctionBundle(new GenericLongFunction("record_cursor", value -> {
driverContext.getYieldSignal().forceYieldForTesting();
return value;
})));
FunctionManager functionManager = functionAssertions.getFunctionManager();
ExpressionCompiler expressionCompiler = new ExpressionCompiler(functionManager, new PageFunctionCompiler(functionManager, 0));
List<RowExpression> projections = ImmutableList.of(call(functionAssertions.getMetadata().resolveFunction(session, QualifiedName.of("generic_long_record_cursor"), fromTypes(BIGINT)), field(0, BIGINT)));
Supplier<CursorProcessor> cursorProcessor = expressionCompiler.compileCursorProcessor(Optional.empty(), projections, "key");
Supplier<PageProcessor> pageProcessor = expressionCompiler.compilePageProcessor(Optional.empty(), projections);
ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory factory = new ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory(0, new PlanNodeId("test"), new PlanNodeId("0"), (session, split, table, columns, dynamicFilter) -> new RecordPageSource(new PageRecordSet(ImmutableList.of(BIGINT), input)), cursorProcessor, pageProcessor, TEST_TABLE_HANDLE, ImmutableList.of(), DynamicFilter.EMPTY, ImmutableList.of(BIGINT), DataSize.ofBytes(0), 0);
SourceOperator operator = factory.createOperator(driverContext);
operator.addSplit(new Split(new CatalogName("test"), TestingSplit.createLocalSplit(), Lifespan.taskWide()));
operator.noMoreSplits();
// start driver; get null value due to yield for the first 15 times
for (int i = 0; i < length; i++) {
driverContext.getYieldSignal().setWithDelay(SECONDS.toNanos(1000), driverContext.getYieldExecutor());
assertNull(operator.getOutput());
driverContext.getYieldSignal().reset();
}
// the 16th yield is not going to prevent the operator from producing a page
driverContext.getYieldSignal().setWithDelay(SECONDS.toNanos(1000), driverContext.getYieldExecutor());
Page output = operator.getOutput();
driverContext.getYieldSignal().reset();
assertNotNull(output);
assertEquals(toValues(BIGINT, output.getBlock(0)), toValues(BIGINT, input.getBlock(0)));
}
use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.
the class AbstractFileFormat method createPageSource.
static ConnectorPageSource createPageSource(HiveRecordCursorProvider cursorProvider, ConnectorSession session, File targetFile, List<String> columnNames, List<Type> columnTypes, HiveStorageFormat format) {
checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes should have the same size");
List<HiveColumnHandle> readColumns = getBaseColumns(columnNames, columnTypes);
Optional<ReaderRecordCursorWithProjections> recordCursorWithProjections = cursorProvider.createRecordCursor(conf, session, new Path(targetFile.getAbsolutePath()), 0, targetFile.length(), targetFile.length(), createSchema(format, columnNames, columnTypes), readColumns, TupleDomain.all(), TESTING_TYPE_MANAGER, false);
checkState(recordCursorWithProjections.isPresent(), "readerPageSourceWithProjections is not present");
checkState(recordCursorWithProjections.get().getProjectedReaderColumns().isEmpty(), "projection should not be required");
return new RecordPageSource(columnTypes, recordCursorWithProjections.get().getRecordCursor());
}
use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.
the class HivePageSourceProvider method createHivePageSource.
public static Optional<ConnectorPageSource> createHivePageSource(Set<HivePageSourceFactory> pageSourceFactories, Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, OptionalInt bucketNumber, long start, long length, long estimatedFileSize, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> columns, TypeManager typeManager, Optional<BucketConversion> bucketConversion, Optional<BucketValidation> bucketValidation, boolean s3SelectPushdownEnabled, Optional<AcidInfo> acidInfo, boolean originalFile, AcidTransaction transaction, List<ColumnMapping> columnMappings) {
if (effectivePredicate.isNone()) {
return Optional.of(new EmptyPageSource());
}
List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(columnMappings);
Optional<BucketAdaptation> bucketAdaptation = createBucketAdaptation(bucketConversion, bucketNumber, regularAndInterimColumnMappings);
Optional<BucketValidator> bucketValidator = createBucketValidator(path, bucketValidation, bucketNumber, regularAndInterimColumnMappings);
for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) {
List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager);
Optional<ReaderPageSource> readerWithProjections = pageSourceFactory.createPageSource(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, acidInfo, bucketNumber, originalFile, transaction);
if (readerWithProjections.isPresent()) {
ConnectorPageSource pageSource = readerWithProjections.get().get();
Optional<ReaderColumns> readerProjections = readerWithProjections.get().getReaderColumns();
Optional<ReaderProjectionsAdapter> adapter = Optional.empty();
if (readerProjections.isPresent()) {
adapter = Optional.of(hiveProjectionsAdapter(desiredColumns, readerProjections.get()));
}
return Optional.of(new HivePageSource(columnMappings, bucketAdaptation, bucketValidator, adapter, typeManager, pageSource));
}
}
for (HiveRecordCursorProvider provider : cursorProviders) {
// GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, doCoercion, typeManager);
Optional<ReaderRecordCursorWithProjections> readerWithProjections = provider.createRecordCursor(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, typeManager, s3SelectPushdownEnabled);
if (readerWithProjections.isPresent()) {
RecordCursor delegate = readerWithProjections.get().getRecordCursor();
Optional<ReaderColumns> projections = readerWithProjections.get().getProjectedReaderColumns();
if (projections.isPresent()) {
ReaderProjectionsAdapter projectionsAdapter = hiveProjectionsAdapter(desiredColumns, projections.get());
delegate = new HiveReaderProjectionsAdaptingRecordCursor(delegate, projectionsAdapter);
}
checkArgument(acidInfo.isEmpty(), "Acid is not supported");
if (bucketAdaptation.isPresent()) {
delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getBucketingVersion(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
}
// Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
if (doCoercion) {
delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
}
// bucket adaptation already validates that data is in the right bucket
if (bucketAdaptation.isEmpty() && bucketValidator.isPresent()) {
delegate = bucketValidator.get().wrapRecordCursor(delegate, typeManager);
}
HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, delegate);
List<Type> columnTypes = columns.stream().map(HiveColumnHandle::getType).collect(toList());
return Optional.of(new RecordPageSource(columnTypes, hiveRecordCursor));
}
}
return Optional.empty();
}
use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.
the class ThriftIndexedTpchService method createLookupPageSource.
@Override
protected ConnectorPageSource createLookupPageSource(SplitInfo splitInfo, List<String> outputColumnNames) {
IndexedTable indexedTable = indexedData.getIndexedTable(splitInfo.getTableName(), schemaNameToScaleFactor(splitInfo.getSchemaName()), ImmutableSet.copyOf(splitInfo.getLookupColumnNames())).orElseThrow(() -> new IllegalArgumentException(format("No such index: %s%s", splitInfo.getTableName(), splitInfo.getLookupColumnNames())));
List<Type> lookupColumnTypes = types(splitInfo.getTableName(), splitInfo.getLookupColumnNames());
RecordSet keyRecordSet = new ListBasedRecordSet(splitInfo.getKeys(), lookupColumnTypes);
RecordSet outputRecordSet = lookupIndexKeys(keyRecordSet, indexedTable, outputColumnNames, splitInfo.getLookupColumnNames());
return new RecordPageSource(outputRecordSet);
}
use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.
the class TpchConnectorIndex method lookup.
@Override
public ConnectorPageSource lookup(RecordSet rawInputRecordSet) {
// convert the input record set from the column ordering in the query to
// match the column ordering of the index
RecordSet inputRecordSet = keyFormatter.apply(rawInputRecordSet);
// lookup the values in the index
RecordSet rawOutputRecordSet = indexedTable.lookupKeys(inputRecordSet);
// expect by the query
return new RecordPageSource(outputFormatter.apply(rawOutputRecordSet));
}
Aggregations