use of org.apache.drill.metastore.statistics.ColumnStatistics in project drill by apache.
the class TestMetastoreWithEasyFormatPlugin method testNonEmptyTableWithEmptyFile.
@Test
public void testNonEmptyTableWithEmptyFile() throws Exception {
String tableName = "csv_with_empty_file";
dirTestWatcher.copyResourceToTestTmp(Paths.get("store", "text", "directoryWithEmptyCSV", "empty.csv"), Paths.get(tableName, "empty.csv"));
File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("store", "text", "data", "nations.csv"), Paths.get(tableName, "nations.csv")).getParentFile();
TableInfo tableInfo = getTableInfo(tableName, "tmp", "csv");
TupleMetadata schema = new SchemaBuilder().add("n_nationkey", TypeProtos.MinorType.INT).add("n_name", TypeProtos.MinorType.VARCHAR).add("n_regionkey", TypeProtos.MinorType.INT).add("n_comment", TypeProtos.MinorType.VARCHAR).build();
Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("n_nationkey"), getColumnStatistics(0, 24, 25L, TypeProtos.MinorType.INT)).put(SchemaPath.getSimplePath("n_name"), getColumnStatistics("ALGERIA", "VIETNAM", 25L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("n_regionkey"), getColumnStatistics(0, 4, 25L, TypeProtos.MinorType.INT)).put(SchemaPath.getSimplePath("n_comment"), getColumnStatistics("alfoxespromiseslylyaccordingtotheregularaccounts.boldrequestsalon", "yfinalpackages.slowfoxescajolequickly.quicklysilentplateletsbreachironicaccounts.unusualpintobe", 25L, TypeProtos.MinorType.VARCHAR)).build();
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(25L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
try {
testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` " + "(schema=>'inline=(" + "`n_nationkey` INT not null," + "`n_name` VARCHAR not null," + "`n_regionkey` INT not null," + "`n_comment` VARCHAR not null)')) REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
assertEquals(expectedTableMetadata, tableMetadata);
List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(2, filesMetadata.size());
String query = "select * from dfs.tmp.`%s`";
queryBuilder().sql(query, tableName).planMatcher().include("usedMetastore=true").match();
long rowCount = queryBuilder().sql(query, tableName).run().recordCount();
assertEquals(25, rowCount);
} finally {
run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
}
}
use of org.apache.drill.metastore.statistics.ColumnStatistics in project drill by apache.
the class TestMetastoreWithEasyFormatPlugin method testAnalyzeOnTextTableWithHeader.
@Test
public void testAnalyzeOnTextTableWithHeader() throws Exception {
String tableName = "store/text/data/cars.csvh";
File tablePath = dirTestWatcher.copyResourceToRoot(Paths.get(tableName));
TableInfo tableInfo = getTableInfo(tableName, "default", "csvh");
TupleMetadata schema = new SchemaBuilder().add("Year", TypeProtos.MinorType.VARCHAR).add("Make", TypeProtos.MinorType.VARCHAR).add("Model", TypeProtos.MinorType.VARCHAR).add("Description", TypeProtos.MinorType.VARCHAR).add("Price", TypeProtos.MinorType.VARCHAR).build();
ImmutableMap<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("Description"), getColumnStatistics("", "ac, abs, moon", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Make"), getColumnStatistics("Chevy", "Jeep", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Model"), getColumnStatistics("E350", "Venture \"Extended Edition, Very Large\"", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Price"), getColumnStatistics("3000.00", "5000.00", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Year"), getColumnStatistics("1996", "1999", 4L, TypeProtos.MinorType.VARCHAR)).build();
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(tablePath.toURI().getPath())).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(4L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(tablePath)).build();
try {
testBuilder().sqlQuery("analyze table dfs.`%s` refresh metadata", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
assertEquals(expectedTableMetadata, actualTableMetadata);
} finally {
run("analyze table dfs.`%s` drop metadata if exists", tableName);
}
}
use of org.apache.drill.metastore.statistics.ColumnStatistics in project drill by apache.
the class TestMetastoreWithEasyFormatPlugin method testAnalyzeOnJsonTable.
@Test
public void testAnalyzeOnJsonTable() throws Exception {
String tableName = "multilevel/json";
TableInfo tableInfo = getTableInfo(tableName, "default", "json");
File table = dirTestWatcher.copyResourceToRoot(Paths.get(tableName));
Path tablePath = new Path(table.toURI().getPath());
TupleMetadata schema = new SchemaBuilder().addNullable("dir0", TypeProtos.MinorType.VARCHAR).addNullable("dir1", TypeProtos.MinorType.VARCHAR).addNullable("o_orderkey", TypeProtos.MinorType.BIGINT).addNullable("o_custkey", TypeProtos.MinorType.BIGINT).addNullable("o_orderstatus", TypeProtos.MinorType.VARCHAR).addNullable("o_totalprice", TypeProtos.MinorType.FLOAT8).addNullable("o_orderdate", TypeProtos.MinorType.VARCHAR).addNullable("o_orderpriority", TypeProtos.MinorType.VARCHAR).addNullable("o_clerk", TypeProtos.MinorType.VARCHAR).addNullable("o_shippriority", TypeProtos.MinorType.BIGINT).addNullable("o_comment", TypeProtos.MinorType.VARCHAR).build();
Map<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = new HashMap<>(TABLE_COLUMN_STATISTICS);
tableColumnStatistics.put(SchemaPath.getSimplePath("o_custkey"), getColumnStatistics(25L, 1498L, 120L, TypeProtos.MinorType.BIGINT));
tableColumnStatistics.put(SchemaPath.getSimplePath("o_orderdate"), getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1996-12-19T00:00:00.000-08:00", 120L, TypeProtos.MinorType.VARCHAR));
tableColumnStatistics.put(SchemaPath.getSimplePath("o_orderkey"), getColumnStatistics(1L, 1319L, 120L, TypeProtos.MinorType.BIGINT));
tableColumnStatistics.put(SchemaPath.getSimplePath("o_shippriority"), getColumnStatistics(0L, 0L, 120L, TypeProtos.MinorType.BIGINT));
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
TableInfo baseTableInfo = TableInfo.builder().name(tableName).storagePlugin("dfs").workspace("default").build();
Map<SchemaPath, ColumnStatistics<?>> dir0CSVStats = new HashMap<>(DIR0_1994_SEGMENT_COLUMN_STATISTICS);
dir0CSVStats.put(SchemaPath.getSimplePath("o_custkey"), getColumnStatistics(25L, 1469L, 40L, TypeProtos.MinorType.BIGINT));
dir0CSVStats.put(SchemaPath.getSimplePath("o_orderdate"), getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1994-12-23T00:00:00.000-08:00", 40L, TypeProtos.MinorType.VARCHAR));
dir0CSVStats.put(SchemaPath.getSimplePath("o_orderkey"), getColumnStatistics(5L, 1031L, 40L, TypeProtos.MinorType.BIGINT));
dir0CSVStats.put(SchemaPath.getSimplePath("o_shippriority"), getColumnStatistics(0L, 0L, 40L, TypeProtos.MinorType.BIGINT));
SegmentMetadata dir0 = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994").key("1994").build()).path(new Path(tablePath, "1994")).schema(schema).lastModifiedTime(getMaxLastModified(new File(table, "1994"))).column(SchemaPath.getSimplePath("dir0")).columnsStatistics(dir0CSVStats).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(40L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.json"), new Path(tablePath, "1994/Q2/orders_94_q2.json"), new Path(tablePath, "1994/Q3/orders_94_q3.json"), new Path(tablePath, "1994/Q4/orders_94_q4.json"))).partitionValues(Collections.singletonList("1994")).build();
Set<Path> expectedTopLevelSegmentLocations = ImmutableSet.of(new Path(tablePath, "1994"), new Path(tablePath, "1995"), new Path(tablePath, "1996"));
Set<Set<Path>> expectedSegmentFilesLocations = new HashSet<>();
Set<Path> segmentFiles = ImmutableSet.of(new Path(tablePath, "1994/Q2/orders_94_q2.json"), new Path(tablePath, "1994/Q4/orders_94_q4.json"), new Path(tablePath, "1994/Q1/orders_94_q1.json"), new Path(tablePath, "1994/Q3/orders_94_q3.json"));
expectedSegmentFilesLocations.add(segmentFiles);
segmentFiles = ImmutableSet.of(new Path(tablePath, "1995/Q2/orders_95_q2.json"), new Path(tablePath, "1995/Q4/orders_95_q4.json"), new Path(tablePath, "1995/Q1/orders_95_q1.json"), new Path(tablePath, "1995/Q3/orders_95_q3.json"));
expectedSegmentFilesLocations.add(segmentFiles);
segmentFiles = ImmutableSet.of(new Path(tablePath, "1996/Q3/orders_96_q3.json"), new Path(tablePath, "1996/Q2/orders_96_q2.json"), new Path(tablePath, "1996/Q4/orders_96_q4.json"), new Path(tablePath, "1996/Q1/orders_96_q1.json"));
expectedSegmentFilesLocations.add(segmentFiles);
Map<SchemaPath, ColumnStatistics<?>> dir0q1Stats = new HashMap<>(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS);
dir0q1Stats.put(SchemaPath.getSimplePath("o_custkey"), getColumnStatistics(392L, 1411L, 10L, TypeProtos.MinorType.BIGINT));
dir0q1Stats.put(SchemaPath.getSimplePath("o_orderdate"), getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1994-03-26T00:00:00.000-08:00", 10L, TypeProtos.MinorType.VARCHAR));
dir0q1Stats.put(SchemaPath.getSimplePath("o_orderkey"), getColumnStatistics(66L, 833L, 10L, TypeProtos.MinorType.BIGINT));
dir0q1Stats.put(SchemaPath.getSimplePath("o_shippriority"), getColumnStatistics(0L, 0L, 10L, TypeProtos.MinorType.BIGINT));
long dir0q1lastModified = new File(new File(new File(table, "1994"), "Q1"), "orders_94_q1.json").lastModified();
FileMetadata dir01994q1File = FileMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.FILE).identifier("1994/Q1/orders_94_q1.json").key("1994").build()).schema(schema).lastModifiedTime(dir0q1lastModified).columnsStatistics(dir0q1Stats).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).path(new Path(tablePath, "1994/Q1/orders_94_q1.json")).build();
try {
testBuilder().sqlQuery("analyze table table(dfs.`%s`(schema=>%s)) refresh metadata", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
assertEquals(expectedTableMetadata, actualTableMetadata);
List<SegmentMetadata> topSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir0`");
SegmentMetadata actualDir0Metadata = topSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994")).findAny().orElseThrow(() -> new AssertionError("Segment is absent"));
Set<Path> locations = actualDir0Metadata.getLocations();
actualDir0Metadata.toBuilder().locations(locations);
assertEquals(dir0, actualDir0Metadata);
Set<Path> topLevelSegmentLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocation).collect(Collectors.toSet());
// verify top segments locations
assertEquals(expectedTopLevelSegmentLocations, topLevelSegmentLocations);
Set<Set<Path>> segmentFilesLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocations).collect(Collectors.toSet());
assertEquals(expectedSegmentFilesLocations, segmentFilesLocations);
// verify nested segments
List<SegmentMetadata> nestedSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir1`");
assertEquals(12, nestedSegmentMetadata.size());
SegmentMetadata dir01994q1Segment = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994/Q1").key("1994").build()).path(new Path(new Path(tablePath, "1994"), "Q1")).schema(schema).lastModifiedTime(getMaxLastModified(new File(new File(table, "1994"), "Q1"))).column(SchemaPath.getSimplePath("dir1")).columnsStatistics(dir0q1Stats).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.json"))).partitionValues(Collections.singletonList("Q1")).build();
// verify segment for 1994
assertEquals(dir01994q1Segment, nestedSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1")).findAny().orElse(null));
// verify files metadata
List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(12, filesMetadata.size());
// verify first file metadata
assertEquals(dir01994q1File, filesMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.json")).findAny().orElse(null));
} finally {
run("analyze table dfs.`%s` drop metadata if exists", tableName);
}
}
use of org.apache.drill.metastore.statistics.ColumnStatistics in project drill by apache.
the class TestMetastoreWithEasyFormatPlugin method testEmptyCSV.
@Test
public void testEmptyCSV() throws Exception {
String tableName = "store/text/directoryWithEmptyCSV/empty.csv";
File tablePath = dirTestWatcher.copyResourceToRoot(Paths.get(tableName));
TableInfo tableInfo = getTableInfo(tableName, "default", "csv");
TupleMetadata schema = new SchemaBuilder().add("Description", TypeProtos.MinorType.VARCHAR).build();
ImmutableMap<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("Description"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR)).build();
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(tablePath.toURI().getPath())).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(tablePath)).build();
try {
testBuilder().sqlQuery("analyze table table(dfs.`%s` (schema=>'inline=(`Description` VARCHAR not null)')) refresh metadata", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
assertEquals(expectedTableMetadata, tableMetadata);
List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(1, filesMetadata.size());
String query = "select * from dfs.`%s`";
queryBuilder().sql(query, tableName).planMatcher().include("usedMetastore=true").match();
testBuilder().sqlQuery(query, tableName).unOrdered().baselineColumns("Description").expectsEmptyResultSet().go();
} finally {
run("analyze table dfs.`%s` drop metadata if exists", tableName);
}
}
use of org.apache.drill.metastore.statistics.ColumnStatistics in project drill by apache.
the class TestInfoSchemaWithMetastore method testColumns.
@Test
public void testColumns() throws Exception {
BaseTableMetadata tableNoSchema = BaseTableMetadata.builder().tableInfo(TableInfo.builder().storagePlugin("dfs").workspace("tmp").name("table_no_schema").type("PARQUET").build()).metadataInfo(MetadataInfo.builder().type(MetadataType.TABLE).key(MetadataInfo.GENERAL_INFO_KEY).build()).location(new Path("/tmp", "table_no_schema")).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).partitionKeys(Collections.emptyMap()).build();
TupleMetadata schema = new SchemaBuilder().addNullable("bigint_col", TypeProtos.MinorType.BIGINT).addDecimal("decimal_col", TypeProtos.MinorType.VARDECIMAL, TypeProtos.DataMode.OPTIONAL, 10, 2).add("interval_col", TypeProtos.MinorType.INTERVALYEAR).addArray("array_col", TypeProtos.MinorType.BIT).addMap("struct_col").addNullable("struct_bigint", TypeProtos.MinorType.BIGINT).add("struct_varchar", TypeProtos.MinorType.VARCHAR).addMap("nested_struct").addNullable("nested_struct_boolean", TypeProtos.MinorType.BIT).add("nested_struct_varchar", TypeProtos.MinorType.VARCHAR).resumeMap().resumeSchema().buildSchema();
PrimitiveColumnMetadata varcharCol = new PrimitiveColumnMetadata("varchar_col", TypeProtos.MajorType.newBuilder().setMinorType(TypeProtos.MinorType.VARCHAR).setMode(TypeProtos.DataMode.REQUIRED).build());
varcharCol.setDefaultValue("ABC");
PrimitiveColumnMetadata timestampColumn = new PrimitiveColumnMetadata("timestamp_col", TypeProtos.MajorType.newBuilder().setMinorType(TypeProtos.MinorType.TIMESTAMP).setMode(TypeProtos.DataMode.REQUIRED).build());
timestampColumn.setFormat("yyyy-MM-dd HH:mm:ss");
schema.addColumn(varcharCol);
schema.addColumn(timestampColumn);
Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
columnsStatistics.put(SchemaPath.parseFromString("varchar_col"), new ColumnStatistics<>(Arrays.asList(new StatisticsHolder<>("aaa", ColumnStatisticsKind.MIN_VALUE), new StatisticsHolder<>("zzz", ColumnStatisticsKind.MAX_VALUE))));
columnsStatistics.put(SchemaPath.parseFromString("struct_col.nested_struct.nested_struct_varchar"), new ColumnStatistics<>(Arrays.asList(new StatisticsHolder<>("bbb", ColumnStatisticsKind.MIN_VALUE), new StatisticsHolder<>("ccc", ColumnStatisticsKind.MAX_VALUE))));
columnsStatistics.put(SchemaPath.parseFromString("bigint_col"), new ColumnStatistics<>(Arrays.asList(new StatisticsHolder<>(100L, ColumnStatisticsKind.NULLS_COUNT), new StatisticsHolder<>(10.5D, ColumnStatisticsKind.NDV))));
columnsStatistics.put(SchemaPath.parseFromString("struct_col.struct_bigint"), new ColumnStatistics<>(Collections.singletonList(new StatisticsHolder<>(10.5D, ColumnStatisticsKind.NON_NULL_COUNT))));
ZonedDateTime currentTime = currentUtcTime();
String tableName = "table_with_schema";
BaseTableMetadata tableWithSchema = BaseTableMetadata.builder().tableInfo(TableInfo.builder().storagePlugin("dfs").workspace("tmp").name(tableName).type("PARQUET").build()).metadataInfo(MetadataInfo.builder().type(MetadataType.TABLE).key(MetadataInfo.GENERAL_INFO_KEY).build()).location(new Path("/tmp", tableName)).schema(schema).metadataStatistics(Collections.emptyList()).columnsStatistics(columnsStatistics).partitionKeys(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
metastore.tables().modify().overwrite(tableNoSchema.toMetadataUnit(), tableWithSchema.toMetadataUnit()).execute();
List<String> columns = Arrays.asList(InfoSchemaConstants.SHRD_COL_TABLE_CATALOG, InfoSchemaConstants.SHRD_COL_TABLE_SCHEMA, InfoSchemaConstants.SHRD_COL_TABLE_NAME, InfoSchemaConstants.COLS_COL_COLUMN_NAME, InfoSchemaConstants.COLS_COL_ORDINAL_POSITION, InfoSchemaConstants.COLS_COL_COLUMN_DEFAULT, InfoSchemaConstants.COLS_COL_IS_NULLABLE, InfoSchemaConstants.COLS_COL_DATA_TYPE, InfoSchemaConstants.COLS_COL_CHARACTER_MAXIMUM_LENGTH, InfoSchemaConstants.COLS_COL_CHARACTER_OCTET_LENGTH, InfoSchemaConstants.COLS_COL_NUMERIC_PRECISION, InfoSchemaConstants.COLS_COL_NUMERIC_PRECISION_RADIX, InfoSchemaConstants.COLS_COL_NUMERIC_SCALE, InfoSchemaConstants.COLS_COL_DATETIME_PRECISION, InfoSchemaConstants.COLS_COL_INTERVAL_TYPE, InfoSchemaConstants.COLS_COL_INTERVAL_PRECISION, InfoSchemaConstants.COLS_COL_COLUMN_SIZE, InfoSchemaConstants.COLS_COL_COLUMN_FORMAT, InfoSchemaConstants.COLS_COL_NUM_NULLS, InfoSchemaConstants.COLS_COL_MIN_VAL, InfoSchemaConstants.COLS_COL_MAX_VAL, InfoSchemaConstants.COLS_COL_NDV, InfoSchemaConstants.COLS_COL_EST_NUM_NON_NULLS, InfoSchemaConstants.COLS_COL_IS_NESTED);
client.testBuilder().sqlQuery("select %s from information_schema.`columns` where table_name " + "in ('%s', '%s')", String.join(", ", columns), tableNoSchema.getTableInfo().name(), tableName).unOrdered().baselineColumns(columns.toArray(new String[0])).baselineValues("DRILL", "dfs.tmp", tableName, "bigint_col", 1, null, "YES", "BIGINT", null, null, 0, 2, 0, null, null, null, 20, null, 100L, null, null, 10.5D, null, false).baselineValues("DRILL", "dfs.tmp", tableName, "decimal_col", 2, null, "YES", "DECIMAL", null, null, 10, 10, 2, null, null, null, 12, null, null, null, null, null, null, false).baselineValues("DRILL", "dfs.tmp", tableName, "interval_col", 3, null, "NO", "INTERVAL", null, null, null, null, null, null, "INTERVAL YEAR TO MONTH", 0, 9, null, null, null, null, null, null, false).baselineValues("DRILL", "dfs.tmp", tableName, "array_col", 4, null, "NO", "ARRAY", null, null, null, null, null, null, null, null, 0, null, null, null, null, null, null, false).baselineValues("DRILL", "dfs.tmp", tableName, "struct_col", 5, null, "NO", "STRUCT", null, null, null, null, null, null, null, null, 0, null, null, null, null, null, null, false).baselineValues("DRILL", "dfs.tmp", tableName, "struct_col.struct_bigint", 5, null, "YES", "BIGINT", null, null, 0, 2, 0, null, null, null, 20, null, null, null, null, null, 10.5D, true).baselineValues("DRILL", "dfs.tmp", tableName, "struct_col.struct_varchar", 5, null, "NO", "CHARACTER VARYING", 65535, 65535, null, null, null, null, null, null, 65535, null, null, null, null, null, null, true).baselineValues("DRILL", "dfs.tmp", tableName, "struct_col.nested_struct", 5, null, "NO", "STRUCT", null, null, null, null, null, null, null, null, 0, null, null, null, null, null, null, true).baselineValues("DRILL", "dfs.tmp", tableName, "struct_col.nested_struct.nested_struct_boolean", 5, null, "YES", "BOOLEAN", null, null, null, null, null, null, null, null, 1, null, null, null, null, null, null, true).baselineValues("DRILL", "dfs.tmp", tableName, "struct_col.nested_struct.nested_struct_varchar", 5, null, "NO", "CHARACTER VARYING", 65535, 65535, null, null, null, null, null, null, 65535, null, null, "bbb", "ccc", null, null, true).baselineValues("DRILL", "dfs.tmp", tableName, "varchar_col", 6, "ABC", "NO", "CHARACTER VARYING", 65535, 65535, null, null, null, null, null, null, 65535, null, null, "aaa", "zzz", null, null, false).baselineValues("DRILL", "dfs.tmp", tableName, "timestamp_col", 7, null, "NO", "TIMESTAMP", null, null, null, null, null, 19, null, null, 19, "yyyy-MM-dd HH:mm:ss", null, null, null, null, null, false).go();
}
Aggregations