Search in sources :

Example 6 with RecordPageSource

use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.

the class TestScanFilterAndProjectOperator method testRecordCursorYield.

@Test
public void testRecordCursorYield() {
    // create a generic long function that yields for projection on every row
    // verify we will yield #row times totally
    // create a table with 15 rows
    int length = 15;
    Page input = SequencePageBuilder.createSequencePage(ImmutableList.of(BIGINT), length, 0);
    DriverContext driverContext = newDriverContext();
    // set up generic long function with a callback to force yield
    functionAssertions.addFunctions(new InternalFunctionBundle(new GenericLongFunction("record_cursor", value -> {
        driverContext.getYieldSignal().forceYieldForTesting();
        return value;
    })));
    FunctionManager functionManager = functionAssertions.getFunctionManager();
    ExpressionCompiler expressionCompiler = new ExpressionCompiler(functionManager, new PageFunctionCompiler(functionManager, 0));
    List<RowExpression> projections = ImmutableList.of(call(functionAssertions.getMetadata().resolveFunction(session, QualifiedName.of("generic_long_record_cursor"), fromTypes(BIGINT)), field(0, BIGINT)));
    Supplier<CursorProcessor> cursorProcessor = expressionCompiler.compileCursorProcessor(Optional.empty(), projections, "key");
    Supplier<PageProcessor> pageProcessor = expressionCompiler.compilePageProcessor(Optional.empty(), projections);
    ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory factory = new ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory(0, new PlanNodeId("test"), new PlanNodeId("0"), (session, split, table, columns, dynamicFilter) -> new RecordPageSource(new PageRecordSet(ImmutableList.of(BIGINT), input)), cursorProcessor, pageProcessor, TEST_TABLE_HANDLE, ImmutableList.of(), DynamicFilter.EMPTY, ImmutableList.of(BIGINT), DataSize.ofBytes(0), 0);
    SourceOperator operator = factory.createOperator(driverContext);
    operator.addSplit(new Split(new CatalogName("test"), TestingSplit.createLocalSplit(), Lifespan.taskWide()));
    operator.noMoreSplits();
    // start driver; get null value due to yield for the first 15 times
    for (int i = 0; i < length; i++) {
        driverContext.getYieldSignal().setWithDelay(SECONDS.toNanos(1000), driverContext.getYieldExecutor());
        assertNull(operator.getOutput());
        driverContext.getYieldSignal().reset();
    }
    // the 16th yield is not going to prevent the operator from producing a page
    driverContext.getYieldSignal().setWithDelay(SECONDS.toNanos(1000), driverContext.getYieldExecutor());
    Page output = operator.getOutput();
    driverContext.getYieldSignal().reset();
    assertNotNull(output);
    assertEquals(toValues(BIGINT, output.getBlock(0)), toValues(BIGINT, input.getBlock(0)));
}
Also used : PageFunctionCompiler(io.trino.sql.gen.PageFunctionCompiler) CursorProcessor(io.trino.operator.project.CursorProcessor) RowExpression(io.trino.sql.relational.RowExpression) Page(io.trino.spi.Page) PageRecordSet(io.trino.operator.index.PageRecordSet) RecordPageSource(io.trino.spi.connector.RecordPageSource) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) PageProcessor(io.trino.operator.project.PageProcessor) InternalFunctionBundle(io.trino.metadata.InternalFunctionBundle) ExpressionCompiler(io.trino.sql.gen.ExpressionCompiler) CatalogName(io.trino.connector.CatalogName) Split(io.trino.metadata.Split) TestingSplit(io.trino.testing.TestingSplit) FunctionManager(io.trino.metadata.FunctionManager) Test(org.testng.annotations.Test)

Example 7 with RecordPageSource

use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.

the class AbstractFileFormat method createPageSource.

static ConnectorPageSource createPageSource(HiveRecordCursorProvider cursorProvider, ConnectorSession session, File targetFile, List<String> columnNames, List<Type> columnTypes, HiveStorageFormat format) {
    checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes should have the same size");
    List<HiveColumnHandle> readColumns = getBaseColumns(columnNames, columnTypes);
    Optional<ReaderRecordCursorWithProjections> recordCursorWithProjections = cursorProvider.createRecordCursor(conf, session, new Path(targetFile.getAbsolutePath()), 0, targetFile.length(), targetFile.length(), createSchema(format, columnNames, columnTypes), readColumns, TupleDomain.all(), TESTING_TYPE_MANAGER, false);
    checkState(recordCursorWithProjections.isPresent(), "readerPageSourceWithProjections is not present");
    checkState(recordCursorWithProjections.get().getProjectedReaderColumns().isEmpty(), "projection should not be required");
    return new RecordPageSource(columnTypes, recordCursorWithProjections.get().getRecordCursor());
}
Also used : Path(org.apache.hadoop.fs.Path) ReaderRecordCursorWithProjections(io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) RecordPageSource(io.trino.spi.connector.RecordPageSource)

Example 8 with RecordPageSource

use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.

the class HivePageSourceProvider method createHivePageSource.

public static Optional<ConnectorPageSource> createHivePageSource(Set<HivePageSourceFactory> pageSourceFactories, Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, OptionalInt bucketNumber, long start, long length, long estimatedFileSize, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> columns, TypeManager typeManager, Optional<BucketConversion> bucketConversion, Optional<BucketValidation> bucketValidation, boolean s3SelectPushdownEnabled, Optional<AcidInfo> acidInfo, boolean originalFile, AcidTransaction transaction, List<ColumnMapping> columnMappings) {
    if (effectivePredicate.isNone()) {
        return Optional.of(new EmptyPageSource());
    }
    List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(columnMappings);
    Optional<BucketAdaptation> bucketAdaptation = createBucketAdaptation(bucketConversion, bucketNumber, regularAndInterimColumnMappings);
    Optional<BucketValidator> bucketValidator = createBucketValidator(path, bucketValidation, bucketNumber, regularAndInterimColumnMappings);
    for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) {
        List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager);
        Optional<ReaderPageSource> readerWithProjections = pageSourceFactory.createPageSource(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, acidInfo, bucketNumber, originalFile, transaction);
        if (readerWithProjections.isPresent()) {
            ConnectorPageSource pageSource = readerWithProjections.get().get();
            Optional<ReaderColumns> readerProjections = readerWithProjections.get().getReaderColumns();
            Optional<ReaderProjectionsAdapter> adapter = Optional.empty();
            if (readerProjections.isPresent()) {
                adapter = Optional.of(hiveProjectionsAdapter(desiredColumns, readerProjections.get()));
            }
            return Optional.of(new HivePageSource(columnMappings, bucketAdaptation, bucketValidator, adapter, typeManager, pageSource));
        }
    }
    for (HiveRecordCursorProvider provider : cursorProviders) {
        // GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
        boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
        List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, doCoercion, typeManager);
        Optional<ReaderRecordCursorWithProjections> readerWithProjections = provider.createRecordCursor(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, typeManager, s3SelectPushdownEnabled);
        if (readerWithProjections.isPresent()) {
            RecordCursor delegate = readerWithProjections.get().getRecordCursor();
            Optional<ReaderColumns> projections = readerWithProjections.get().getProjectedReaderColumns();
            if (projections.isPresent()) {
                ReaderProjectionsAdapter projectionsAdapter = hiveProjectionsAdapter(desiredColumns, projections.get());
                delegate = new HiveReaderProjectionsAdaptingRecordCursor(delegate, projectionsAdapter);
            }
            checkArgument(acidInfo.isEmpty(), "Acid is not supported");
            if (bucketAdaptation.isPresent()) {
                delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getBucketingVersion(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
            }
            // Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
            if (doCoercion) {
                delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
            }
            // bucket adaptation already validates that data is in the right bucket
            if (bucketAdaptation.isEmpty() && bucketValidator.isPresent()) {
                delegate = bucketValidator.get().wrapRecordCursor(delegate, typeManager);
            }
            HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, delegate);
            List<Type> columnTypes = columns.stream().map(HiveColumnHandle::getType).collect(toList());
            return Optional.of(new RecordPageSource(columnTypes, hiveRecordCursor));
        }
    }
    return Optional.empty();
}
Also used : BucketValidator(io.trino.plugin.hive.HivePageSource.BucketValidator) RecordCursor(io.trino.spi.connector.RecordCursor) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) RecordPageSource(io.trino.spi.connector.RecordPageSource) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) Type(io.trino.spi.type.Type) OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType(io.trino.plugin.hive.orc.OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType) OrcType(io.trino.orc.metadata.OrcType) ReaderRecordCursorWithProjections(io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections)

Example 9 with RecordPageSource

use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.

the class ThriftIndexedTpchService method createLookupPageSource.

@Override
protected ConnectorPageSource createLookupPageSource(SplitInfo splitInfo, List<String> outputColumnNames) {
    IndexedTable indexedTable = indexedData.getIndexedTable(splitInfo.getTableName(), schemaNameToScaleFactor(splitInfo.getSchemaName()), ImmutableSet.copyOf(splitInfo.getLookupColumnNames())).orElseThrow(() -> new IllegalArgumentException(format("No such index: %s%s", splitInfo.getTableName(), splitInfo.getLookupColumnNames())));
    List<Type> lookupColumnTypes = types(splitInfo.getTableName(), splitInfo.getLookupColumnNames());
    RecordSet keyRecordSet = new ListBasedRecordSet(splitInfo.getKeys(), lookupColumnTypes);
    RecordSet outputRecordSet = lookupIndexKeys(keyRecordSet, indexedTable, outputColumnNames, splitInfo.getLookupColumnNames());
    return new RecordPageSource(outputRecordSet);
}
Also used : Type(io.trino.spi.type.Type) RecordSet(io.trino.spi.connector.RecordSet) MappedRecordSet(io.trino.split.MappedRecordSet) IndexedTable(io.trino.testing.tpch.TpchIndexedData.IndexedTable) RecordPageSource(io.trino.spi.connector.RecordPageSource)

Example 10 with RecordPageSource

use of io.trino.spi.connector.RecordPageSource in project trino by trinodb.

the class TpchConnectorIndex method lookup.

@Override
public ConnectorPageSource lookup(RecordSet rawInputRecordSet) {
    // convert the input record set from the column ordering in the query to
    // match the column ordering of the index
    RecordSet inputRecordSet = keyFormatter.apply(rawInputRecordSet);
    // lookup the values in the index
    RecordSet rawOutputRecordSet = indexedTable.lookupKeys(inputRecordSet);
    // expect by the query
    return new RecordPageSource(outputFormatter.apply(rawOutputRecordSet));
}
Also used : RecordSet(io.trino.spi.connector.RecordSet) RecordPageSource(io.trino.spi.connector.RecordPageSource)

Aggregations

RecordPageSource (io.trino.spi.connector.RecordPageSource)10 Page (io.trino.spi.Page)3 ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)3 RecordCursor (io.trino.spi.connector.RecordCursor)3 CatalogName (io.trino.connector.CatalogName)2 Split (io.trino.metadata.Split)2 PageRecordSet (io.trino.operator.index.PageRecordSet)2 CursorProcessor (io.trino.operator.project.CursorProcessor)2 PageProcessor (io.trino.operator.project.PageProcessor)2 ReaderRecordCursorWithProjections (io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections)2 EmptyPageSource (io.trino.spi.connector.EmptyPageSource)2 RecordSet (io.trino.spi.connector.RecordSet)2 Type (io.trino.spi.type.Type)2 MappedRecordSet (io.trino.split.MappedRecordSet)2 PlanNodeId (io.trino.sql.planner.plan.PlanNodeId)2 RowExpression (io.trino.sql.relational.RowExpression)2 TestingSplit (io.trino.testing.TestingSplit)2 Test (org.testng.annotations.Test)2 AbstractIterator (com.google.common.collect.AbstractIterator)1 ImmutableList (com.google.common.collect.ImmutableList)1