Search in sources :

Example 46 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class TestOrcDeleteDeltaPageSource method testReadingDeletedRows.

@Test
public void testReadingDeletedRows() throws Exception {
    File deleteDeltaFile = new File(Resources.getResource("fullacid_delete_delta_test/delete_delta_0000004_0000004_0000/bucket_00000").toURI());
    OrcDeleteDeltaPageSourceFactory pageSourceFactory = new OrcDeleteDeltaPageSourceFactory(new OrcReaderOptions(), ConnectorIdentity.ofUser("test"), new JobConf(new Configuration(false)), HDFS_ENVIRONMENT, new FileFormatDataSourceStats());
    ConnectorPageSource pageSource = pageSourceFactory.createPageSource(new Path(deleteDeltaFile.toURI()), deleteDeltaFile.length()).orElseThrow();
    MaterializedResult materializedRows = MaterializedResult.materializeSourceDataStream(SESSION, pageSource, ImmutableList.of(BIGINT, INTEGER, BIGINT));
    assertEquals(materializedRows.getRowCount(), 1);
    AcidOutputFormat.Options bucketOptions = new AcidOutputFormat.Options(new Configuration(false)).bucket(0);
    assertEquals(materializedRows.getMaterializedRows().get(0), new MaterializedRow(5, 2L, BucketCodec.V1.encode(bucketOptions), 0L));
}
Also used : Path(org.apache.hadoop.fs.Path) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Configuration(org.apache.hadoop.conf.Configuration) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) MaterializedResult(io.trino.testing.MaterializedResult) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) MaterializedRow(io.trino.testing.MaterializedRow) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) Test(org.testng.annotations.Test)

Example 47 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class HivePageSourceProvider method createHivePageSource.

public static Optional<ConnectorPageSource> createHivePageSource(Set<HivePageSourceFactory> pageSourceFactories, Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, OptionalInt bucketNumber, long start, long length, long estimatedFileSize, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> columns, TypeManager typeManager, Optional<BucketConversion> bucketConversion, Optional<BucketValidation> bucketValidation, boolean s3SelectPushdownEnabled, Optional<AcidInfo> acidInfo, boolean originalFile, AcidTransaction transaction, List<ColumnMapping> columnMappings) {
    if (effectivePredicate.isNone()) {
        return Optional.of(new EmptyPageSource());
    }
    List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(columnMappings);
    Optional<BucketAdaptation> bucketAdaptation = createBucketAdaptation(bucketConversion, bucketNumber, regularAndInterimColumnMappings);
    Optional<BucketValidator> bucketValidator = createBucketValidator(path, bucketValidation, bucketNumber, regularAndInterimColumnMappings);
    for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) {
        List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager);
        Optional<ReaderPageSource> readerWithProjections = pageSourceFactory.createPageSource(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, acidInfo, bucketNumber, originalFile, transaction);
        if (readerWithProjections.isPresent()) {
            ConnectorPageSource pageSource = readerWithProjections.get().get();
            Optional<ReaderColumns> readerProjections = readerWithProjections.get().getReaderColumns();
            Optional<ReaderProjectionsAdapter> adapter = Optional.empty();
            if (readerProjections.isPresent()) {
                adapter = Optional.of(hiveProjectionsAdapter(desiredColumns, readerProjections.get()));
            }
            return Optional.of(new HivePageSource(columnMappings, bucketAdaptation, bucketValidator, adapter, typeManager, pageSource));
        }
    }
    for (HiveRecordCursorProvider provider : cursorProviders) {
        // GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
        boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
        List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, doCoercion, typeManager);
        Optional<ReaderRecordCursorWithProjections> readerWithProjections = provider.createRecordCursor(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, typeManager, s3SelectPushdownEnabled);
        if (readerWithProjections.isPresent()) {
            RecordCursor delegate = readerWithProjections.get().getRecordCursor();
            Optional<ReaderColumns> projections = readerWithProjections.get().getProjectedReaderColumns();
            if (projections.isPresent()) {
                ReaderProjectionsAdapter projectionsAdapter = hiveProjectionsAdapter(desiredColumns, projections.get());
                delegate = new HiveReaderProjectionsAdaptingRecordCursor(delegate, projectionsAdapter);
            }
            checkArgument(acidInfo.isEmpty(), "Acid is not supported");
            if (bucketAdaptation.isPresent()) {
                delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getBucketingVersion(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
            }
            // Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
            if (doCoercion) {
                delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
            }
            // bucket adaptation already validates that data is in the right bucket
            if (bucketAdaptation.isEmpty() && bucketValidator.isPresent()) {
                delegate = bucketValidator.get().wrapRecordCursor(delegate, typeManager);
            }
            HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, delegate);
            List<Type> columnTypes = columns.stream().map(HiveColumnHandle::getType).collect(toList());
            return Optional.of(new RecordPageSource(columnTypes, hiveRecordCursor));
        }
    }
    return Optional.empty();
}
Also used : BucketValidator(io.trino.plugin.hive.HivePageSource.BucketValidator) RecordCursor(io.trino.spi.connector.RecordCursor) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) RecordPageSource(io.trino.spi.connector.RecordPageSource) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) Type(io.trino.spi.type.Type) OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType(io.trino.plugin.hive.orc.OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType) OrcType(io.trino.orc.metadata.OrcType) ReaderRecordCursorWithProjections(io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections)

Example 48 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class TestRaptorStorageManager method testReader.

@Test
public void testReader() throws Exception {
    RaptorStorageManager manager = createRaptorStorageManager();
    List<Long> columnIds = ImmutableList.of(2L, 4L, 6L, 7L, 8L, 9L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10), VARBINARY, DATE, BOOLEAN, DOUBLE);
    byte[] bytes1 = octets(0x00, 0xFE, 0xFF);
    byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80);
    StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
    Object[][] doubles = { { 881L, "-inf", null, null, null, Double.NEGATIVE_INFINITY }, { 882L, "+inf", null, null, null, Double.POSITIVE_INFINITY }, { 883L, "nan", null, null, null, Double.NaN }, { 884L, "min", null, null, null, Double.MIN_VALUE }, { 885L, "max", null, null, null, Double.MAX_VALUE }, { 886L, "pzero", null, null, null, 0.0 }, { 887L, "nzero", null, null, null, -0.0 } };
    List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello", wrappedBuffer(bytes1), sqlDate(2001, 8, 22).getDays(), true, 123.45).row(null, null, null, null, null, null).row(456L, "bye", wrappedBuffer(bytes3), sqlDate(2005, 4, 22).getDays(), false, 987.65).rows(doubles).build();
    sink.appendPages(pages);
    List<ShardInfo> shards = getFutureValue(sink.commit());
    assertEquals(shards.size(), 1);
    UUID uuid = Iterables.getOnlyElement(shards).getShardUuid();
    MaterializedResult expected = resultBuilder(SESSION, columnTypes).row(123L, "hello", sqlBinary(bytes1), sqlDate(2001, 8, 22), true, 123.45).row(null, null, null, null, null, null).row(456L, "bye", sqlBinary(bytes3), sqlDate(2005, 4, 22), false, 987.65).rows(doubles).build();
    // no tuple domain (all)
    TupleDomain<RaptorColumnHandle> tupleDomain = TupleDomain.all();
    try (ConnectorPageSource pageSource = getPageSource(manager, columnIds, columnTypes, uuid, tupleDomain)) {
        MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, columnTypes);
        assertEquals(result.getRowCount(), expected.getRowCount());
        assertEquals(result, expected);
    }
    // tuple domain within the column range
    tupleDomain = TupleDomain.fromFixedValues(ImmutableMap.<RaptorColumnHandle, NullableValue>builder().put(new RaptorColumnHandle("c1", 2, BIGINT), NullableValue.of(BIGINT, 124L)).buildOrThrow());
    try (ConnectorPageSource pageSource = getPageSource(manager, columnIds, columnTypes, uuid, tupleDomain)) {
        MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, columnTypes);
        assertEquals(result.getRowCount(), expected.getRowCount());
    }
    // tuple domain outside the column range
    tupleDomain = TupleDomain.fromFixedValues(ImmutableMap.<RaptorColumnHandle, NullableValue>builder().put(new RaptorColumnHandle("c1", 2, BIGINT), NullableValue.of(BIGINT, 122L)).buildOrThrow());
    try (ConnectorPageSource pageSource = getPageSource(manager, columnIds, columnTypes, uuid, tupleDomain)) {
        MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, columnTypes);
        assertEquals(result.getRowCount(), 0);
    }
}
Also used : RaptorColumnHandle(io.trino.plugin.raptor.legacy.RaptorColumnHandle) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) Type(io.trino.spi.type.Type) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) OptionalLong(java.util.OptionalLong) UUID(java.util.UUID) MaterializedResult(io.trino.testing.MaterializedResult) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo) Test(org.testng.annotations.Test)

Example 49 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class TestShardCompactor method getMaterializedRows.

private MaterializedResult getMaterializedRows(List<UUID> uuids, List<Long> columnIds, List<Type> columnTypes) throws IOException {
    MaterializedResult.Builder rows = MaterializedResult.resultBuilder(SESSION, columnTypes);
    for (UUID uuid : uuids) {
        try (ConnectorPageSource pageSource = getPageSource(columnIds, columnTypes, uuid)) {
            MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, columnTypes);
            rows.rows(result.getMaterializedRows());
        }
    }
    return rows.build();
}
Also used : MaterializedResult(io.trino.testing.MaterializedResult) UUID(java.util.UUID) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Example 50 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class TestShardCompactor method getPages.

private List<Page> getPages(Set<UUID> uuids, List<Long> columnIds, List<Type> columnTypes) throws IOException {
    ImmutableList.Builder<Page> pages = ImmutableList.builder();
    for (UUID uuid : uuids) {
        try (ConnectorPageSource pageSource = getPageSource(columnIds, columnTypes, uuid)) {
            while (!pageSource.isFinished()) {
                Page outputPage = pageSource.getNextPage();
                if (outputPage == null) {
                    break;
                }
                pages.add(outputPage.getLoadedPage());
            }
        }
    }
    return pages.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) Page(io.trino.spi.Page) UUID(java.util.UUID) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Aggregations

ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)50 ConnectorSession (io.trino.spi.connector.ConnectorSession)23 Page (io.trino.spi.Page)18 Type (io.trino.spi.type.Type)18 Test (org.testng.annotations.Test)17 ImmutableList (com.google.common.collect.ImmutableList)16 MaterializedResult (io.trino.testing.MaterializedResult)14 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)13 ColumnHandle (io.trino.spi.connector.ColumnHandle)13 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)13 List (java.util.List)12 Optional (java.util.Optional)12 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)11 ImmutableMap (com.google.common.collect.ImmutableMap)10 TestingConnectorSession (io.trino.testing.TestingConnectorSession)10 File (java.io.File)10 Path (org.apache.hadoop.fs.Path)10 TupleDomain (io.trino.spi.predicate.TupleDomain)9 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)9