Search in sources :

Example 26 with TableMetadata

use of io.trino.metadata.TableMetadata in project trino by trinodb.

the class BaseHiveConnectorTest method testPathHiddenColumn.

private void testPathHiddenColumn(Session session, HiveStorageFormat storageFormat) {
    @Language("SQL") String createTable = "CREATE TABLE test_path " + "WITH (" + "format = '" + storageFormat + "'," + "partitioned_by = ARRAY['col1']" + ") AS " + "SELECT * FROM (VALUES " + "(0, 0), (3, 0), (6, 0), " + "(1, 1), (4, 1), (7, 1), " + "(2, 2), (5, 2) " + " ) t(col0, col1) ";
    assertUpdate(session, createTable, 8);
    assertTrue(getQueryRunner().tableExists(getSession(), "test_path"));
    TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, "test_path");
    assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
    List<String> columnNames = ImmutableList.of("col0", "col1", PATH_COLUMN_NAME, FILE_SIZE_COLUMN_NAME, FILE_MODIFIED_TIME_COLUMN_NAME, PARTITION_COLUMN_NAME);
    List<ColumnMetadata> columnMetadatas = tableMetadata.getColumns();
    assertEquals(columnMetadatas.size(), columnNames.size());
    for (int i = 0; i < columnMetadatas.size(); i++) {
        ColumnMetadata columnMetadata = columnMetadatas.get(i);
        assertEquals(columnMetadata.getName(), columnNames.get(i));
        if (columnMetadata.getName().equals(PATH_COLUMN_NAME)) {
            // $path should be hidden column
            assertTrue(columnMetadata.isHidden());
        }
    }
    assertEquals(getPartitions("test_path").size(), 3);
    MaterializedResult results = computeActual(session, format("SELECT *, \"%s\" FROM test_path", PATH_COLUMN_NAME));
    Map<Integer, String> partitionPathMap = new HashMap<>();
    for (int i = 0; i < results.getRowCount(); i++) {
        MaterializedRow row = results.getMaterializedRows().get(i);
        int col0 = (int) row.getField(0);
        int col1 = (int) row.getField(1);
        String pathName = (String) row.getField(2);
        String parentDirectory = new Path(pathName).getParent().toString();
        assertTrue(pathName.length() > 0);
        assertEquals(col0 % 3, col1);
        if (partitionPathMap.containsKey(col1)) {
            // the rows in the same partition should be in the same partition directory
            assertEquals(partitionPathMap.get(col1), parentDirectory);
        } else {
            partitionPathMap.put(col1, parentDirectory);
        }
    }
    assertEquals(partitionPathMap.size(), 3);
    assertUpdate(session, "DROP TABLE test_path");
    assertFalse(getQueryRunner().tableExists(session, "test_path"));
}
Also used : TableMetadata(io.trino.metadata.TableMetadata) Path(org.apache.hadoop.fs.Path) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ColumnConstraint(io.trino.sql.planner.planprinter.IoPlanPrinter.ColumnConstraint) Constraint(io.trino.spi.connector.Constraint) Language(org.intellij.lang.annotations.Language) MaterializedResult(io.trino.testing.MaterializedResult) MaterializedRow(io.trino.testing.MaterializedRow)

Example 27 with TableMetadata

use of io.trino.metadata.TableMetadata in project trino by trinodb.

the class BaseHiveConnectorTest method verifyPartitionedBucketedTable.

private void verifyPartitionedBucketedTable(HiveStorageFormat storageFormat, String tableName) {
    TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
    assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
    assertEquals(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY), ImmutableList.of("orderstatus"));
    assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("custkey", "custkey2"));
    assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11);
    List<?> partitions = getPartitions(tableName);
    assertEquals(partitions.size(), 3);
    // verify that we create bucket_count files in each partition
    assertEqualsIgnoreOrder(computeActual(format("SELECT orderstatus, COUNT(DISTINCT \"$path\") FROM %s GROUP BY 1", tableName)), resultBuilder(getSession(), createVarcharType(1), BIGINT).row("F", 11L).row("O", 11L).row("P", 11L).build());
    assertQuery("SELECT * FROM " + tableName, "SELECT custkey, custkey, comment, orderstatus FROM orders");
    for (int i = 1; i <= 30; i++) {
        assertQuery(format("SELECT * FROM %s WHERE custkey = %d AND custkey2 = %d", tableName, i, i), format("SELECT custkey, custkey, comment, orderstatus FROM orders WHERE custkey = %d", i));
    }
}
Also used : TableMetadata(io.trino.metadata.TableMetadata) ColumnConstraint(io.trino.sql.planner.planprinter.IoPlanPrinter.ColumnConstraint) Constraint(io.trino.spi.connector.Constraint)

Example 28 with TableMetadata

use of io.trino.metadata.TableMetadata in project trino by trinodb.

the class BaseHiveConnectorTest method testPartitionPerScanLimit.

@Test
public void testPartitionPerScanLimit() {
    String tableName = "test_partition_per_scan_limit";
    String partitionsTable = "\"" + tableName + "$partitions\"";
    @Language("SQL") String createTable = "" + "CREATE TABLE " + tableName + " " + "(" + "  foo VARCHAR," + "  part BIGINT" + ") " + "WITH (" + "partitioned_by = ARRAY[ 'part' ]" + ") ";
    assertUpdate(createTable);
    TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
    assertEquals(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY), ImmutableList.of("part"));
    // insert 1200 partitions
    for (int i = 0; i < 12; i++) {
        int partStart = i * 100;
        int partEnd = (i + 1) * 100 - 1;
        @Language("SQL") String insertPartitions = "" + "INSERT INTO " + tableName + " " + "SELECT 'bar' foo, part " + "FROM UNNEST(SEQUENCE(" + partStart + ", " + partEnd + ")) AS TMP(part)";
        assertUpdate(insertPartitions, 100);
    }
    // we are not constrained by hive.max-partitions-per-scan when listing partitions
    assertQuery("SELECT * FROM " + partitionsTable + " WHERE part > 490 AND part <= 500", "VALUES 491, 492, 493, 494, 495, 496, 497, 498, 499, 500");
    assertQuery("SELECT * FROM " + partitionsTable + " WHERE part < 0", "SELECT null WHERE false");
    assertQuery("SELECT * FROM " + partitionsTable, "VALUES " + LongStream.range(0, 1200).mapToObj(String::valueOf).collect(joining(",")));
    // verify can query 1000 partitions
    assertQuery("SELECT count(foo) FROM " + tableName + " WHERE part < 1000", "SELECT 1000");
    // verify the rest 200 partitions are successfully inserted
    assertQuery("SELECT count(foo) FROM " + tableName + " WHERE part >= 1000 AND part < 1200", "SELECT 200");
    // verify cannot query more than 1000 partitions
    assertQueryFails("SELECT * FROM " + tableName + " WHERE part < 1001", format("Query over table 'tpch.%s' can potentially read more than 1000 partitions", tableName));
    // verify cannot query all partitions
    assertQueryFails("SELECT * FROM " + tableName, format("Query over table 'tpch.%s' can potentially read more than 1000 partitions", tableName));
    assertUpdate("DROP TABLE " + tableName);
}
Also used : TableMetadata(io.trino.metadata.TableMetadata) Language(org.intellij.lang.annotations.Language) ColumnConstraint(io.trino.sql.planner.planprinter.IoPlanPrinter.ColumnConstraint) Constraint(io.trino.spi.connector.Constraint) Test(org.testng.annotations.Test) BaseConnectorTest(io.trino.testing.BaseConnectorTest)

Example 29 with TableMetadata

use of io.trino.metadata.TableMetadata in project trino by trinodb.

the class BaseHiveConnectorTest method testCreatePartitionedTable.

private void testCreatePartitionedTable(Session session, HiveStorageFormat storageFormat) {
    @Language("SQL") String createTable = "" + "CREATE TABLE test_partitioned_table (" + "  _string VARCHAR" + ",  _varchar VARCHAR(65535)" + ", _char CHAR(10)" + ", _bigint BIGINT" + ", _integer INTEGER" + ", _smallint SMALLINT" + ", _tinyint TINYINT" + ", _real REAL" + ", _double DOUBLE" + ", _boolean BOOLEAN" + ", _decimal_short DECIMAL(3,2)" + ", _decimal_long DECIMAL(30,10)" + ", _partition_string VARCHAR" + ", _partition_varchar VARCHAR(65535)" + ", _partition_char CHAR(10)" + ", _partition_tinyint TINYINT" + ", _partition_smallint SMALLINT" + ", _partition_integer INTEGER" + ", _partition_bigint BIGINT" + ", _partition_boolean BOOLEAN" + ", _partition_decimal_short DECIMAL(3,2)" + ", _partition_decimal_long DECIMAL(30,10)" + ", _partition_date DATE" + ", _partition_timestamp TIMESTAMP" + ") " + "WITH (" + "format = '" + storageFormat + "', " + "partitioned_by = ARRAY[ '_partition_string', '_partition_varchar', '_partition_char', '_partition_tinyint', '_partition_smallint', '_partition_integer', '_partition_bigint', '_partition_boolean', '_partition_decimal_short', '_partition_decimal_long', '_partition_date', '_partition_timestamp']" + ") ";
    if (storageFormat == HiveStorageFormat.AVRO) {
        createTable = createTable.replace(" _smallint SMALLINT,", " _smallint INTEGER,");
        createTable = createTable.replace(" _tinyint TINYINT,", " _tinyint INTEGER,");
    }
    assertUpdate(session, createTable);
    TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, "test_partitioned_table");
    assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
    List<String> partitionedBy = ImmutableList.of("_partition_string", "_partition_varchar", "_partition_char", "_partition_tinyint", "_partition_smallint", "_partition_integer", "_partition_bigint", "_partition_boolean", "_partition_decimal_short", "_partition_decimal_long", "_partition_date", "_partition_timestamp");
    assertEquals(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY), partitionedBy);
    for (ColumnMetadata columnMetadata : tableMetadata.getColumns()) {
        boolean partitionKey = partitionedBy.contains(columnMetadata.getName());
        assertEquals(columnMetadata.getExtraInfo(), columnExtraInfo(partitionKey));
    }
    assertColumnType(tableMetadata, "_string", createUnboundedVarcharType());
    assertColumnType(tableMetadata, "_varchar", createVarcharType(65535));
    assertColumnType(tableMetadata, "_char", createCharType(10));
    assertColumnType(tableMetadata, "_partition_string", createUnboundedVarcharType());
    assertColumnType(tableMetadata, "_partition_varchar", createVarcharType(65535));
    MaterializedResult result = computeActual("SELECT * FROM test_partitioned_table");
    assertEquals(result.getRowCount(), 0);
    @Language("SQL") String select = "" + "SELECT" + " 'foo' _string" + ", 'bar' _varchar" + ", CAST('boo' AS CHAR(10)) _char" + ", CAST(1 AS BIGINT) _bigint" + ", 2 _integer" + ", CAST (3 AS SMALLINT) _smallint" + ", CAST (4 AS TINYINT) _tinyint" + ", CAST('123.45' AS REAL) _real" + ", CAST('3.14' AS DOUBLE) _double" + ", true _boolean" + ", CAST('3.14' AS DECIMAL(3,2)) _decimal_short" + ", CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) _decimal_long" + ", 'foo' _partition_string" + ", 'bar' _partition_varchar" + ", CAST('boo' AS CHAR(10)) _partition_char" + ", CAST(1 AS TINYINT) _partition_tinyint" + ", CAST(1 AS SMALLINT) _partition_smallint" + ", 1 _partition_integer" + ", CAST (1 AS BIGINT) _partition_bigint" + ", true _partition_boolean" + ", CAST('3.14' AS DECIMAL(3,2)) _partition_decimal_short" + ", CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) _partition_decimal_long" + ", CAST('2017-05-01' AS DATE) _partition_date" + ", CAST('2017-05-01 10:12:34' AS TIMESTAMP) _partition_timestamp";
    if (storageFormat == HiveStorageFormat.AVRO) {
        select = select.replace(" CAST (3 AS SMALLINT) _smallint,", " 3 _smallint,");
        select = select.replace(" CAST (4 AS TINYINT) _tinyint,", " 4 _tinyint,");
    }
    assertUpdate(session, "INSERT INTO test_partitioned_table " + select, 1);
    assertQuery(session, "SELECT * FROM test_partitioned_table", select);
    assertQuery(session, "SELECT * FROM test_partitioned_table WHERE" + " 'foo' = _partition_string" + " AND 'bar' = _partition_varchar" + " AND CAST('boo' AS CHAR(10)) = _partition_char" + " AND CAST(1 AS TINYINT) = _partition_tinyint" + " AND CAST(1 AS SMALLINT) = _partition_smallint" + " AND 1 = _partition_integer" + " AND CAST(1 AS BIGINT) = _partition_bigint" + " AND true = _partition_boolean" + " AND CAST('3.14' AS DECIMAL(3,2)) = _partition_decimal_short" + " AND CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) = _partition_decimal_long" + " AND CAST('2017-05-01' AS DATE) = _partition_date" + " AND CAST('2017-05-01 10:12:34' AS TIMESTAMP) = _partition_timestamp", select);
    assertUpdate(session, "DROP TABLE test_partitioned_table");
    assertFalse(getQueryRunner().tableExists(session, "test_partitioned_table"));
}
Also used : TableMetadata(io.trino.metadata.TableMetadata) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Language(org.intellij.lang.annotations.Language) MaterializedResult(io.trino.testing.MaterializedResult)

Example 30 with TableMetadata

use of io.trino.metadata.TableMetadata in project trino by trinodb.

the class LogicalPlanner method createTableExecutePlan.

private RelationPlan createTableExecutePlan(Analysis analysis, TableExecute statement) {
    Table table = statement.getTable();
    TableHandle tableHandle = analysis.getTableHandle(table);
    QualifiedObjectName tableName = createQualifiedObjectName(session, statement, table.getName());
    TableExecuteHandle executeHandle = analysis.getTableExecuteHandle().orElseThrow();
    RelationPlan tableScanPlan = createRelationPlan(analysis, table);
    PlanBuilder sourcePlanBuilder = newPlanBuilder(tableScanPlan, analysis, ImmutableMap.of(), ImmutableMap.of());
    if (statement.getWhere().isPresent()) {
        SubqueryPlanner subqueryPlanner = new SubqueryPlanner(analysis, symbolAllocator, idAllocator, buildLambdaDeclarationToSymbolMap(analysis, symbolAllocator), plannerContext, typeCoercion, Optional.empty(), session, ImmutableMap.of());
        Expression whereExpression = statement.getWhere().get();
        sourcePlanBuilder = subqueryPlanner.handleSubqueries(sourcePlanBuilder, whereExpression, analysis.getSubqueries(statement));
        sourcePlanBuilder = sourcePlanBuilder.withNewRoot(new FilterNode(idAllocator.getNextId(), sourcePlanBuilder.getRoot(), sourcePlanBuilder.rewrite(whereExpression)));
    }
    PlanNode sourcePlanRoot = sourcePlanBuilder.getRoot();
    TableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
    List<String> columnNames = tableMetadata.getColumns().stream().filter(// todo this filter is redundant
    column -> !column.isHidden()).map(ColumnMetadata::getName).collect(toImmutableList());
    TableWriterNode.TableExecuteTarget tableExecuteTarget = new TableWriterNode.TableExecuteTarget(executeHandle, Optional.empty(), tableName.asSchemaTableName());
    Optional<TableLayout> layout = metadata.getLayoutForTableExecute(session, executeHandle);
    List<Symbol> symbols = visibleFields(tableScanPlan);
    // todo extract common method to be used here and in createTableWriterPlan()
    Optional<PartitioningScheme> partitioningScheme = Optional.empty();
    Optional<PartitioningScheme> preferredPartitioningScheme = Optional.empty();
    if (layout.isPresent()) {
        List<Symbol> partitionFunctionArguments = new ArrayList<>();
        layout.get().getPartitionColumns().stream().mapToInt(columnNames::indexOf).mapToObj(symbols::get).forEach(partitionFunctionArguments::add);
        List<Symbol> outputLayout = new ArrayList<>(symbols);
        Optional<PartitioningHandle> partitioningHandle = layout.get().getPartitioning();
        if (partitioningHandle.isPresent()) {
            partitioningScheme = Optional.of(new PartitioningScheme(Partitioning.create(partitioningHandle.get(), partitionFunctionArguments), outputLayout));
        } else {
            // empty connector partitioning handle means evenly partitioning on partitioning columns
            preferredPartitioningScheme = Optional.of(new PartitioningScheme(Partitioning.create(FIXED_HASH_DISTRIBUTION, partitionFunctionArguments), outputLayout));
        }
    }
    verify(columnNames.size() == symbols.size(), "columnNames.size() != symbols.size(): %s and %s", columnNames, symbols);
    TableFinishNode commitNode = new TableFinishNode(idAllocator.getNextId(), new TableExecuteNode(idAllocator.getNextId(), sourcePlanRoot, tableExecuteTarget, symbolAllocator.newSymbol("partialrows", BIGINT), symbolAllocator.newSymbol("fragment", VARBINARY), symbols, columnNames, partitioningScheme, preferredPartitioningScheme), tableExecuteTarget, symbolAllocator.newSymbol("rows", BIGINT), Optional.empty(), Optional.empty());
    return new RelationPlan(commitNode, analysis.getRootScope(), commitNode.getOutputSymbols(), Optional.empty());
}
Also used : TableExecuteNode(io.trino.sql.planner.plan.TableExecuteNode) FilterNode(io.trino.sql.planner.plan.FilterNode) ArrayList(java.util.ArrayList) TableExecuteHandle(io.trino.metadata.TableExecuteHandle) TableFinishNode(io.trino.sql.planner.plan.TableFinishNode) PlanBuilder.newPlanBuilder(io.trino.sql.planner.PlanBuilder.newPlanBuilder) PlanNode(io.trino.sql.planner.plan.PlanNode) TableWriterNode(io.trino.sql.planner.plan.TableWriterNode) TableLayout(io.trino.metadata.TableLayout) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) TableMetadata(io.trino.metadata.TableMetadata) Table(io.trino.sql.tree.Table) MetadataUtil.createQualifiedObjectName(io.trino.metadata.MetadataUtil.createQualifiedObjectName) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) CoalesceExpression(io.trino.sql.tree.CoalesceExpression) IfExpression(io.trino.sql.tree.IfExpression) Expression(io.trino.sql.tree.Expression) TableHandle(io.trino.metadata.TableHandle)

Aggregations

TableMetadata (io.trino.metadata.TableMetadata)30 Language (org.intellij.lang.annotations.Language)18 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)10 Constraint (io.trino.spi.connector.Constraint)10 ColumnConstraint (io.trino.sql.planner.planprinter.IoPlanPrinter.ColumnConstraint)10 TableHandle (io.trino.metadata.TableHandle)7 BaseConnectorTest (io.trino.testing.BaseConnectorTest)7 MaterializedResult (io.trino.testing.MaterializedResult)7 Test (org.testng.annotations.Test)7 Session (io.trino.Session)6 QualifiedObjectName (io.trino.metadata.QualifiedObjectName)5 MaterializedRow (io.trino.testing.MaterializedRow)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 HiveQueryRunner.createBucketedSession (io.trino.plugin.hive.HiveQueryRunner.createBucketedSession)4 ColumnHandle (io.trino.spi.connector.ColumnHandle)4 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)4 TableScanNode (io.trino.sql.planner.plan.TableScanNode)4 HashMap (java.util.HashMap)4 LinkedHashMap (java.util.LinkedHashMap)4 ImmutableList (com.google.common.collect.ImmutableList)3