use of io.trino.metadata.TableMetadata in project trino by trinodb.
the class BaseHiveConnectorTest method testPathHiddenColumn.
private void testPathHiddenColumn(Session session, HiveStorageFormat storageFormat) {
@Language("SQL") String createTable = "CREATE TABLE test_path " + "WITH (" + "format = '" + storageFormat + "'," + "partitioned_by = ARRAY['col1']" + ") AS " + "SELECT * FROM (VALUES " + "(0, 0), (3, 0), (6, 0), " + "(1, 1), (4, 1), (7, 1), " + "(2, 2), (5, 2) " + " ) t(col0, col1) ";
assertUpdate(session, createTable, 8);
assertTrue(getQueryRunner().tableExists(getSession(), "test_path"));
TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, "test_path");
assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
List<String> columnNames = ImmutableList.of("col0", "col1", PATH_COLUMN_NAME, FILE_SIZE_COLUMN_NAME, FILE_MODIFIED_TIME_COLUMN_NAME, PARTITION_COLUMN_NAME);
List<ColumnMetadata> columnMetadatas = tableMetadata.getColumns();
assertEquals(columnMetadatas.size(), columnNames.size());
for (int i = 0; i < columnMetadatas.size(); i++) {
ColumnMetadata columnMetadata = columnMetadatas.get(i);
assertEquals(columnMetadata.getName(), columnNames.get(i));
if (columnMetadata.getName().equals(PATH_COLUMN_NAME)) {
// $path should be hidden column
assertTrue(columnMetadata.isHidden());
}
}
assertEquals(getPartitions("test_path").size(), 3);
MaterializedResult results = computeActual(session, format("SELECT *, \"%s\" FROM test_path", PATH_COLUMN_NAME));
Map<Integer, String> partitionPathMap = new HashMap<>();
for (int i = 0; i < results.getRowCount(); i++) {
MaterializedRow row = results.getMaterializedRows().get(i);
int col0 = (int) row.getField(0);
int col1 = (int) row.getField(1);
String pathName = (String) row.getField(2);
String parentDirectory = new Path(pathName).getParent().toString();
assertTrue(pathName.length() > 0);
assertEquals(col0 % 3, col1);
if (partitionPathMap.containsKey(col1)) {
// the rows in the same partition should be in the same partition directory
assertEquals(partitionPathMap.get(col1), parentDirectory);
} else {
partitionPathMap.put(col1, parentDirectory);
}
}
assertEquals(partitionPathMap.size(), 3);
assertUpdate(session, "DROP TABLE test_path");
assertFalse(getQueryRunner().tableExists(session, "test_path"));
}
use of io.trino.metadata.TableMetadata in project trino by trinodb.
the class BaseHiveConnectorTest method verifyPartitionedBucketedTable.
private void verifyPartitionedBucketedTable(HiveStorageFormat storageFormat, String tableName) {
TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
assertEquals(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY), ImmutableList.of("orderstatus"));
assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("custkey", "custkey2"));
assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11);
List<?> partitions = getPartitions(tableName);
assertEquals(partitions.size(), 3);
// verify that we create bucket_count files in each partition
assertEqualsIgnoreOrder(computeActual(format("SELECT orderstatus, COUNT(DISTINCT \"$path\") FROM %s GROUP BY 1", tableName)), resultBuilder(getSession(), createVarcharType(1), BIGINT).row("F", 11L).row("O", 11L).row("P", 11L).build());
assertQuery("SELECT * FROM " + tableName, "SELECT custkey, custkey, comment, orderstatus FROM orders");
for (int i = 1; i <= 30; i++) {
assertQuery(format("SELECT * FROM %s WHERE custkey = %d AND custkey2 = %d", tableName, i, i), format("SELECT custkey, custkey, comment, orderstatus FROM orders WHERE custkey = %d", i));
}
}
use of io.trino.metadata.TableMetadata in project trino by trinodb.
the class BaseHiveConnectorTest method testPartitionPerScanLimit.
@Test
public void testPartitionPerScanLimit() {
String tableName = "test_partition_per_scan_limit";
String partitionsTable = "\"" + tableName + "$partitions\"";
@Language("SQL") String createTable = "" + "CREATE TABLE " + tableName + " " + "(" + " foo VARCHAR," + " part BIGINT" + ") " + "WITH (" + "partitioned_by = ARRAY[ 'part' ]" + ") ";
assertUpdate(createTable);
TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
assertEquals(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY), ImmutableList.of("part"));
// insert 1200 partitions
for (int i = 0; i < 12; i++) {
int partStart = i * 100;
int partEnd = (i + 1) * 100 - 1;
@Language("SQL") String insertPartitions = "" + "INSERT INTO " + tableName + " " + "SELECT 'bar' foo, part " + "FROM UNNEST(SEQUENCE(" + partStart + ", " + partEnd + ")) AS TMP(part)";
assertUpdate(insertPartitions, 100);
}
// we are not constrained by hive.max-partitions-per-scan when listing partitions
assertQuery("SELECT * FROM " + partitionsTable + " WHERE part > 490 AND part <= 500", "VALUES 491, 492, 493, 494, 495, 496, 497, 498, 499, 500");
assertQuery("SELECT * FROM " + partitionsTable + " WHERE part < 0", "SELECT null WHERE false");
assertQuery("SELECT * FROM " + partitionsTable, "VALUES " + LongStream.range(0, 1200).mapToObj(String::valueOf).collect(joining(",")));
// verify can query 1000 partitions
assertQuery("SELECT count(foo) FROM " + tableName + " WHERE part < 1000", "SELECT 1000");
// verify the rest 200 partitions are successfully inserted
assertQuery("SELECT count(foo) FROM " + tableName + " WHERE part >= 1000 AND part < 1200", "SELECT 200");
// verify cannot query more than 1000 partitions
assertQueryFails("SELECT * FROM " + tableName + " WHERE part < 1001", format("Query over table 'tpch.%s' can potentially read more than 1000 partitions", tableName));
// verify cannot query all partitions
assertQueryFails("SELECT * FROM " + tableName, format("Query over table 'tpch.%s' can potentially read more than 1000 partitions", tableName));
assertUpdate("DROP TABLE " + tableName);
}
use of io.trino.metadata.TableMetadata in project trino by trinodb.
the class BaseHiveConnectorTest method testCreatePartitionedTable.
private void testCreatePartitionedTable(Session session, HiveStorageFormat storageFormat) {
@Language("SQL") String createTable = "" + "CREATE TABLE test_partitioned_table (" + " _string VARCHAR" + ", _varchar VARCHAR(65535)" + ", _char CHAR(10)" + ", _bigint BIGINT" + ", _integer INTEGER" + ", _smallint SMALLINT" + ", _tinyint TINYINT" + ", _real REAL" + ", _double DOUBLE" + ", _boolean BOOLEAN" + ", _decimal_short DECIMAL(3,2)" + ", _decimal_long DECIMAL(30,10)" + ", _partition_string VARCHAR" + ", _partition_varchar VARCHAR(65535)" + ", _partition_char CHAR(10)" + ", _partition_tinyint TINYINT" + ", _partition_smallint SMALLINT" + ", _partition_integer INTEGER" + ", _partition_bigint BIGINT" + ", _partition_boolean BOOLEAN" + ", _partition_decimal_short DECIMAL(3,2)" + ", _partition_decimal_long DECIMAL(30,10)" + ", _partition_date DATE" + ", _partition_timestamp TIMESTAMP" + ") " + "WITH (" + "format = '" + storageFormat + "', " + "partitioned_by = ARRAY[ '_partition_string', '_partition_varchar', '_partition_char', '_partition_tinyint', '_partition_smallint', '_partition_integer', '_partition_bigint', '_partition_boolean', '_partition_decimal_short', '_partition_decimal_long', '_partition_date', '_partition_timestamp']" + ") ";
if (storageFormat == HiveStorageFormat.AVRO) {
createTable = createTable.replace(" _smallint SMALLINT,", " _smallint INTEGER,");
createTable = createTable.replace(" _tinyint TINYINT,", " _tinyint INTEGER,");
}
assertUpdate(session, createTable);
TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, "test_partitioned_table");
assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
List<String> partitionedBy = ImmutableList.of("_partition_string", "_partition_varchar", "_partition_char", "_partition_tinyint", "_partition_smallint", "_partition_integer", "_partition_bigint", "_partition_boolean", "_partition_decimal_short", "_partition_decimal_long", "_partition_date", "_partition_timestamp");
assertEquals(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY), partitionedBy);
for (ColumnMetadata columnMetadata : tableMetadata.getColumns()) {
boolean partitionKey = partitionedBy.contains(columnMetadata.getName());
assertEquals(columnMetadata.getExtraInfo(), columnExtraInfo(partitionKey));
}
assertColumnType(tableMetadata, "_string", createUnboundedVarcharType());
assertColumnType(tableMetadata, "_varchar", createVarcharType(65535));
assertColumnType(tableMetadata, "_char", createCharType(10));
assertColumnType(tableMetadata, "_partition_string", createUnboundedVarcharType());
assertColumnType(tableMetadata, "_partition_varchar", createVarcharType(65535));
MaterializedResult result = computeActual("SELECT * FROM test_partitioned_table");
assertEquals(result.getRowCount(), 0);
@Language("SQL") String select = "" + "SELECT" + " 'foo' _string" + ", 'bar' _varchar" + ", CAST('boo' AS CHAR(10)) _char" + ", CAST(1 AS BIGINT) _bigint" + ", 2 _integer" + ", CAST (3 AS SMALLINT) _smallint" + ", CAST (4 AS TINYINT) _tinyint" + ", CAST('123.45' AS REAL) _real" + ", CAST('3.14' AS DOUBLE) _double" + ", true _boolean" + ", CAST('3.14' AS DECIMAL(3,2)) _decimal_short" + ", CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) _decimal_long" + ", 'foo' _partition_string" + ", 'bar' _partition_varchar" + ", CAST('boo' AS CHAR(10)) _partition_char" + ", CAST(1 AS TINYINT) _partition_tinyint" + ", CAST(1 AS SMALLINT) _partition_smallint" + ", 1 _partition_integer" + ", CAST (1 AS BIGINT) _partition_bigint" + ", true _partition_boolean" + ", CAST('3.14' AS DECIMAL(3,2)) _partition_decimal_short" + ", CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) _partition_decimal_long" + ", CAST('2017-05-01' AS DATE) _partition_date" + ", CAST('2017-05-01 10:12:34' AS TIMESTAMP) _partition_timestamp";
if (storageFormat == HiveStorageFormat.AVRO) {
select = select.replace(" CAST (3 AS SMALLINT) _smallint,", " 3 _smallint,");
select = select.replace(" CAST (4 AS TINYINT) _tinyint,", " 4 _tinyint,");
}
assertUpdate(session, "INSERT INTO test_partitioned_table " + select, 1);
assertQuery(session, "SELECT * FROM test_partitioned_table", select);
assertQuery(session, "SELECT * FROM test_partitioned_table WHERE" + " 'foo' = _partition_string" + " AND 'bar' = _partition_varchar" + " AND CAST('boo' AS CHAR(10)) = _partition_char" + " AND CAST(1 AS TINYINT) = _partition_tinyint" + " AND CAST(1 AS SMALLINT) = _partition_smallint" + " AND 1 = _partition_integer" + " AND CAST(1 AS BIGINT) = _partition_bigint" + " AND true = _partition_boolean" + " AND CAST('3.14' AS DECIMAL(3,2)) = _partition_decimal_short" + " AND CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) = _partition_decimal_long" + " AND CAST('2017-05-01' AS DATE) = _partition_date" + " AND CAST('2017-05-01 10:12:34' AS TIMESTAMP) = _partition_timestamp", select);
assertUpdate(session, "DROP TABLE test_partitioned_table");
assertFalse(getQueryRunner().tableExists(session, "test_partitioned_table"));
}
use of io.trino.metadata.TableMetadata in project trino by trinodb.
the class LogicalPlanner method createTableExecutePlan.
private RelationPlan createTableExecutePlan(Analysis analysis, TableExecute statement) {
Table table = statement.getTable();
TableHandle tableHandle = analysis.getTableHandle(table);
QualifiedObjectName tableName = createQualifiedObjectName(session, statement, table.getName());
TableExecuteHandle executeHandle = analysis.getTableExecuteHandle().orElseThrow();
RelationPlan tableScanPlan = createRelationPlan(analysis, table);
PlanBuilder sourcePlanBuilder = newPlanBuilder(tableScanPlan, analysis, ImmutableMap.of(), ImmutableMap.of());
if (statement.getWhere().isPresent()) {
SubqueryPlanner subqueryPlanner = new SubqueryPlanner(analysis, symbolAllocator, idAllocator, buildLambdaDeclarationToSymbolMap(analysis, symbolAllocator), plannerContext, typeCoercion, Optional.empty(), session, ImmutableMap.of());
Expression whereExpression = statement.getWhere().get();
sourcePlanBuilder = subqueryPlanner.handleSubqueries(sourcePlanBuilder, whereExpression, analysis.getSubqueries(statement));
sourcePlanBuilder = sourcePlanBuilder.withNewRoot(new FilterNode(idAllocator.getNextId(), sourcePlanBuilder.getRoot(), sourcePlanBuilder.rewrite(whereExpression)));
}
PlanNode sourcePlanRoot = sourcePlanBuilder.getRoot();
TableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
List<String> columnNames = tableMetadata.getColumns().stream().filter(// todo this filter is redundant
column -> !column.isHidden()).map(ColumnMetadata::getName).collect(toImmutableList());
TableWriterNode.TableExecuteTarget tableExecuteTarget = new TableWriterNode.TableExecuteTarget(executeHandle, Optional.empty(), tableName.asSchemaTableName());
Optional<TableLayout> layout = metadata.getLayoutForTableExecute(session, executeHandle);
List<Symbol> symbols = visibleFields(tableScanPlan);
// todo extract common method to be used here and in createTableWriterPlan()
Optional<PartitioningScheme> partitioningScheme = Optional.empty();
Optional<PartitioningScheme> preferredPartitioningScheme = Optional.empty();
if (layout.isPresent()) {
List<Symbol> partitionFunctionArguments = new ArrayList<>();
layout.get().getPartitionColumns().stream().mapToInt(columnNames::indexOf).mapToObj(symbols::get).forEach(partitionFunctionArguments::add);
List<Symbol> outputLayout = new ArrayList<>(symbols);
Optional<PartitioningHandle> partitioningHandle = layout.get().getPartitioning();
if (partitioningHandle.isPresent()) {
partitioningScheme = Optional.of(new PartitioningScheme(Partitioning.create(partitioningHandle.get(), partitionFunctionArguments), outputLayout));
} else {
// empty connector partitioning handle means evenly partitioning on partitioning columns
preferredPartitioningScheme = Optional.of(new PartitioningScheme(Partitioning.create(FIXED_HASH_DISTRIBUTION, partitionFunctionArguments), outputLayout));
}
}
verify(columnNames.size() == symbols.size(), "columnNames.size() != symbols.size(): %s and %s", columnNames, symbols);
TableFinishNode commitNode = new TableFinishNode(idAllocator.getNextId(), new TableExecuteNode(idAllocator.getNextId(), sourcePlanRoot, tableExecuteTarget, symbolAllocator.newSymbol("partialrows", BIGINT), symbolAllocator.newSymbol("fragment", VARBINARY), symbols, columnNames, partitioningScheme, preferredPartitioningScheme), tableExecuteTarget, symbolAllocator.newSymbol("rows", BIGINT), Optional.empty(), Optional.empty());
return new RelationPlan(commitNode, analysis.getRootScope(), commitNode.getOutputSymbols(), Optional.empty());
}
Aggregations