Search in sources :

Example 21 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class TestOrcPageSourceFactory method readFile.

private static List<Nation> readFile(Map<NationColumn, Integer> columns, OptionalLong nationKeyPredicate, Optional<AcidInfo> acidInfo, String filePath, long fileSize) {
    TupleDomain<HiveColumnHandle> tupleDomain = TupleDomain.all();
    if (nationKeyPredicate.isPresent()) {
        tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(toHiveColumnHandle(NATION_KEY, 0), Domain.singleValue(INTEGER, nationKeyPredicate.getAsLong())));
    }
    List<HiveColumnHandle> columnHandles = columns.entrySet().stream().map(entry -> toHiveColumnHandle(entry.getKey(), entry.getValue())).collect(toImmutableList());
    List<String> columnNames = columnHandles.stream().map(HiveColumnHandle::getName).collect(toImmutableList());
    Optional<ReaderPageSource> pageSourceWithProjections = PAGE_SOURCE_FACTORY.createPageSource(new JobConf(new Configuration(false)), SESSION, new Path(filePath), 0, fileSize, fileSize, createSchema(), columnHandles, tupleDomain, acidInfo, OptionalInt.empty(), false, NO_ACID_TRANSACTION);
    checkArgument(pageSourceWithProjections.isPresent());
    checkArgument(pageSourceWithProjections.get().getReaderColumns().isEmpty(), "projected columns not expected here");
    ConnectorPageSource pageSource = pageSourceWithProjections.get().get();
    int nationKeyColumn = columnNames.indexOf("n_nationkey");
    int nameColumn = columnNames.indexOf("n_name");
    int regionKeyColumn = columnNames.indexOf("n_regionkey");
    int commentColumn = columnNames.indexOf("n_comment");
    ImmutableList.Builder<Nation> rows = ImmutableList.builder();
    while (!pageSource.isFinished()) {
        Page page = pageSource.getNextPage();
        if (page == null) {
            continue;
        }
        page = page.getLoadedPage();
        for (int position = 0; position < page.getPositionCount(); position++) {
            long nationKey = -42;
            if (nationKeyColumn >= 0) {
                nationKey = BIGINT.getLong(page.getBlock(nationKeyColumn), position);
            }
            String name = "<not read>";
            if (nameColumn >= 0) {
                name = VARCHAR.getSlice(page.getBlock(nameColumn), position).toStringUtf8();
            }
            long regionKey = -42;
            if (regionKeyColumn >= 0) {
                regionKey = BIGINT.getLong(page.getBlock(regionKeyColumn), position);
            }
            String comment = "<not read>";
            if (commentColumn >= 0) {
                comment = VARCHAR.getSlice(page.getBlock(commentColumn), position).toStringUtf8();
            }
            rows.add(new Nation(position, nationKey, name, regionKey, comment));
        }
    }
    return rows.build();
}
Also used : URISyntaxException(java.net.URISyntaxException) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Assertions(org.assertj.core.api.Assertions) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) LongPredicate(java.util.function.LongPredicate) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertFalse(org.testng.Assert.assertFalse) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) AcidUtils.deleteDeltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deleteDeltaSubdir) REGION_KEY(io.trino.tpch.NationColumn.REGION_KEY) Nation(io.trino.tpch.Nation) NationGenerator(io.trino.tpch.NationGenerator) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) Resources.getResource(com.google.common.io.Resources.getResource) NATION_KEY(io.trino.tpch.NationColumn.NATION_KEY) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) NAME(io.trino.tpch.NationColumn.NAME) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) COMMENT(io.trino.tpch.NationColumn.COMMENT) NationColumn(io.trino.tpch.NationColumn) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) TABLE_IS_TRANSACTIONAL(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_TRANSACTIONAL) TupleDomain(io.trino.spi.predicate.TupleDomain) AcidInfo(io.trino.plugin.hive.AcidInfo) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) Path(org.apache.hadoop.fs.Path) Nation(io.trino.tpch.Nation) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) JobConf(org.apache.hadoop.mapred.JobConf) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 22 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class TestOrcPredicates method assertFilteredRows.

private void assertFilteredRows(TupleDomain<TestColumn> effectivePredicate, List<TestColumn> columnsToRead, ConnectorSession session, FileSplit split, int expectedRows) {
    ConnectorPageSource pageSource = createPageSource(effectivePredicate, columnsToRead, session, split);
    int filteredRows = 0;
    while (!pageSource.isFinished()) {
        Page page = pageSource.getNextPage();
        if (page != null) {
            filteredRows += page.getPositionCount();
        }
    }
    assertEquals(filteredRows, expectedRows);
}
Also used : Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Example 23 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class TestOrcPredicates method createPageSource.

private ConnectorPageSource createPageSource(TupleDomain<TestColumn> effectivePredicate, List<TestColumn> columnsToRead, ConnectorSession session, FileSplit split) {
    OrcPageSourceFactory readerFactory = new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC);
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, ORC.getSerde());
    // Use full columns in split properties
    ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
    ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
    Set<String> baseColumnNames = new HashSet<>();
    for (TestColumn columnToRead : columnsToRead) {
        String name = columnToRead.getBaseName();
        if (!baseColumnNames.contains(name) && !columnToRead.isPartitionKey()) {
            baseColumnNames.add(name);
            splitPropertiesColumnNames.add(name);
            splitPropertiesColumnTypes.add(columnToRead.getBaseObjectInspector().getTypeName());
        }
    }
    splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
    splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
    List<HivePartitionKey> partitionKeys = columnsToRead.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
    String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
    List<HiveColumnHandle> columnHandles = getColumnHandles(columnsToRead);
    TupleDomain<HiveColumnHandle> predicate = effectivePredicate.transformKeys(testColumn -> {
        Optional<HiveColumnHandle> handle = columnHandles.stream().filter(column -> testColumn.getName().equals(column.getName())).findFirst();
        checkState(handle.isPresent(), "Predicate on invalid column");
        return handle.get();
    });
    List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), split.getLength(), Instant.now().toEpochMilli());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(readerFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), splitProperties, predicate, columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
    assertTrue(pageSource.isPresent());
    return pageSource.get();
}
Also used : HivePageSourceProvider(io.trino.plugin.hive.HivePageSourceProvider) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) FileSplit(org.apache.hadoop.mapred.FileSplit) Configuration(org.apache.hadoop.conf.Configuration) StructuralTestUtil.rowBlockOf(io.trino.testing.StructuralTestUtil.rowBlockOf) AbstractTestHiveFileFormats(io.trino.plugin.hive.AbstractTestHiveFileFormats) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HiveCompressionCodec(io.trino.plugin.hive.HiveCompressionCodec) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) ImmutableSet(com.google.common.collect.ImmutableSet) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ColumnMapping.buildColumnMappings(io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping.buildColumnMappings) TableToPartitionMapping(io.trino.plugin.hive.TableToPartitionMapping) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) NodeVersion(io.trino.plugin.hive.NodeVersion) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HivePartitionKey(io.trino.plugin.hive.HivePartitionKey) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) ConnectorSession(io.trino.spi.connector.ConnectorSession) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TupleDomain(io.trino.spi.predicate.TupleDomain) UTC(org.joda.time.DateTimeZone.UTC) File(java.io.File) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) OrcWriterOptions(io.trino.orc.OrcWriterOptions) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) HiveConfig(io.trino.plugin.hive.HiveConfig) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Properties(java.util.Properties) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) OrcReaderOptions(io.trino.orc.OrcReaderOptions) HivePartitionKey(io.trino.plugin.hive.HivePartitionKey) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HashSet(java.util.HashSet)

Example 24 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class DeltaLakeUpdatablePageSource method copyParquetPageSource.

private DataFileInfo copyParquetPageSource(DeltaLakeWriter fileWriter) throws IOException {
    ReaderPageSource readerPageSource = createParquetPageSource(TupleDomain.all(), allDataColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList()));
    ConnectorPageSource connectorPageSource = readerPageSource.get();
    boolean successfulWrite = true;
    try {
        int pageStart = 0;
        while (!connectorPageSource.isFinished()) {
            Page page = connectorPageSource.getNextPage();
            if (page == null) {
                continue;
            }
            int pagePositionCount = page.getPositionCount();
            int nextToDelete = rowsToDelete.nextSetBit(pageStart);
            if (nextToDelete == -1 || nextToDelete >= pageStart + pagePositionCount) {
            // page is wholly retained
            } else {
                int[] retainedPositions = new int[pagePositionCount];
                int retainedPositionsCount = 0;
                for (int position = 0; position < pagePositionCount; position++) {
                    if (!rowsToDelete.get(pageStart + position)) {
                        retainedPositions[retainedPositionsCount] = position;
                        retainedPositionsCount++;
                    }
                }
                page = page.getPositions(retainedPositions, 0, retainedPositionsCount);
            }
            fileWriter.appendRows(page);
            pageStart += pagePositionCount;
        }
    } catch (Exception e) {
        successfulWrite = false;
        try {
            fileWriter.rollback();
        } catch (Exception rollbackException) {
            if (e != rollbackException) {
                e.addSuppressed(rollbackException);
            }
        }
        throw e;
    } finally {
        if (successfulWrite) {
            fileWriter.commit();
        }
        connectorPageSource.close();
    }
    return fileWriter.getDataFileInfo();
}
Also used : ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) TrinoException(io.trino.spi.TrinoException) IOException(java.io.IOException)

Example 25 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class TestTimestamp method testReadingAs.

private void testReadingAs(Type type, ConnectorSession session, ParquetTester.TempFile tempFile, List<String> columnNames, List<?> expectedValues) throws IOException {
    Iterator<?> expected = expectedValues.iterator();
    try (ConnectorPageSource pageSource = StandardFileFormats.TRINO_PARQUET.createFileFormatReader(session, HDFS_ENVIRONMENT, tempFile.getFile(), columnNames, ImmutableList.of(type))) {
        // skip a page to exercise the decoder's skip() logic
        Page firstPage = pageSource.getNextPage();
        assertTrue(firstPage.getPositionCount() > 0, "Expected first page to have at least 1 row");
        for (int i = 0; i < firstPage.getPositionCount(); i++) {
            expected.next();
        }
        int pageCount = 1;
        while (!pageSource.isFinished()) {
            Page page = pageSource.getNextPage();
            if (page == null) {
                continue;
            }
            pageCount++;
            Block block = page.getBlock(0);
            for (int i = 0; i < block.getPositionCount(); i++) {
                assertThat(type.getObjectValue(session, block, i)).isEqualTo(expected.next());
            }
        }
        assertThat(pageCount).withFailMessage("Expected more than one page but processed %s", pageCount).isGreaterThan(1);
        assertFalse(expected.hasNext(), "Read fewer values than expected");
    }
}
Also used : Block(io.trino.spi.block.Block) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Aggregations

ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)50 ConnectorSession (io.trino.spi.connector.ConnectorSession)23 Page (io.trino.spi.Page)18 Type (io.trino.spi.type.Type)18 Test (org.testng.annotations.Test)17 ImmutableList (com.google.common.collect.ImmutableList)16 MaterializedResult (io.trino.testing.MaterializedResult)14 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)13 ColumnHandle (io.trino.spi.connector.ColumnHandle)13 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)13 List (java.util.List)12 Optional (java.util.Optional)12 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)11 ImmutableMap (com.google.common.collect.ImmutableMap)10 TestingConnectorSession (io.trino.testing.TestingConnectorSession)10 File (java.io.File)10 Path (org.apache.hadoop.fs.Path)10 TupleDomain (io.trino.spi.predicate.TupleDomain)9 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)9