use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class TestMetastoreCommands method testAnalyzeEmptyRequiredParquetTable.
@Test
public void testAnalyzeEmptyRequiredParquetTable() throws Exception {
String tableName = "analyze_empty_simple_required";
run("create table dfs.tmp.%s as select 1 as `date`, 'a' as name from (values(1)) where 1 = 2", tableName);
File table = new File(dirTestWatcher.getDfsTestTmpDir(), tableName);
TableInfo tableInfo = getTableInfo(tableName, "tmp");
TupleMetadata schema = new SchemaBuilder().add("date", TypeProtos.MinorType.INT).add("name", TypeProtos.MinorType.VARCHAR).build();
Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("name"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("date"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.INT)).build();
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
try {
testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
assertEquals(expectedTableMetadata, tableMetadata);
List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(1, filesMetadata.size());
List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
assertEquals(1, rowGroupsMetadata.size());
testBuilder().sqlQuery("select COLUMN_NAME from INFORMATION_SCHEMA.`COLUMNS` where table_name='%s'", tableName).unOrdered().baselineColumns("COLUMN_NAME").baselineValues("date").baselineValues("name").go();
} finally {
run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
run("drop table if exists dfs.tmp.`%s`", tableName);
}
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class TestMetastoreCommands method testIncrementalAnalyzeUpdatedFile.
@Test
public void testIncrementalAnalyzeUpdatedFile() throws Exception {
String tableName = "multilevel/parquetUpdatedFile";
File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
TableInfo tableInfo = getTableInfo(tableName, "tmp");
try {
testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
List<SegmentMetadata> segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
assertEquals(15, segmentMetadata.size());
List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(12, filesMetadata.size());
List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
assertEquals(12, rowGroupsMetadata.size());
File fileToUpdate = new File(new File(new File(table, "1994"), "Q4"), "orders_94_q4.parquet");
long lastModified = fileToUpdate.lastModified();
FileUtils.deleteQuietly(fileToUpdate);
// replaces original file
dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel", "parquet", "1994", "Q1", "orders_94_q1.parquet"), Paths.get(tableName, "1994", "Q4", "orders_94_q4.parquet"));
long newLastModified = lastModified + 1000;
assertTrue(fileToUpdate.setLastModified(newLastModified));
testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
Map<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = new HashMap<>(TABLE_COLUMN_STATISTICS);
tableColumnStatistics.computeIfPresent(SchemaPath.getSimplePath("o_clerk"), (logicalExpressions, columnStatistics) -> columnStatistics.cloneWith(new ColumnStatistics<>(Collections.singletonList(new StatisticsHolder<>("Clerk#000000006", ColumnStatisticsKind.MIN_VALUE)))));
tableColumnStatistics.computeIfPresent(SchemaPath.getSimplePath("o_totalprice"), (logicalExpressions, columnStatistics) -> columnStatistics.cloneWith(new ColumnStatistics<>(Collections.singletonList(new StatisticsHolder<>(328207.15, ColumnStatisticsKind.MAX_VALUE)))));
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(new Path(table.toURI().getPath())).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(newLastModified).build();
assertEquals(expectedTableMetadata, actualTableMetadata);
segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
assertEquals(15, segmentMetadata.size());
filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(12, filesMetadata.size());
rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
assertEquals(12, rowGroupsMetadata.size());
} finally {
run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
FileUtils.deleteQuietly(table);
}
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class TestMetastoreCommands method testSelectWithDisabledMetastore.
@Test
public void testSelectWithDisabledMetastore() throws Exception {
String tableName = "region_parquet";
TableInfo tableInfo = getTableInfo(tableName, "tmp");
try {
run("create table dfs.tmp.`%s` as\n" + "select * from cp.`tpch/region.parquet`", tableName);
testBuilder().sqlQuery("analyze table dfs.tmp.`%s` columns none REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
String query = "select mykey from dfs.tmp.`%s` where mykey is null";
long actualRowCount = queryBuilder().sql(query, tableName).run().recordCount();
assertEquals("Row count does not match the expected value", 5, actualRowCount);
String usedMetaPattern = "usedMetastore=true";
queryBuilder().sql(query, tableName).planMatcher().include(usedMetaPattern).match();
client.alterSession(ExecConstants.METASTORE_ENABLED, false);
queryBuilder().sql(query, tableName).run().recordCount();
assertEquals("Row count does not match the expected value", 5, actualRowCount);
usedMetaPattern = "usedMetastore=false";
queryBuilder().sql(query, tableName).planMatcher().include(usedMetaPattern).match();
} finally {
cluster.drillbit().getContext().getMetastoreRegistry().get().tables().modify().delete(Delete.builder().metadataType(MetadataType.ALL).filter(tableInfo.toFilter()).build()).execute();
run("drop table if exists dfs.tmp.`%s`", tableName);
client.resetSession(ExecConstants.METASTORE_ENABLED);
}
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class TestMetastoreCommands method testAnalyzeLowerLevelMetadata.
@Test
public void testAnalyzeLowerLevelMetadata() throws Exception {
// checks that metadata for levels below specified in analyze statement is absent
String tableName = "multilevel/parquetLowerLevel";
TableInfo tableInfo = getTableInfo(tableName, "tmp");
dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
List<MetadataType> analyzeLevels = Arrays.asList(MetadataType.FILE, MetadataType.SEGMENT, MetadataType.TABLE);
for (MetadataType analyzeLevel : analyzeLevels) {
try {
testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA '%s' level", tableName, analyzeLevel.name()).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
Set<MetadataType> emptyMetadataLevels = Arrays.stream(MetadataType.values()).filter(metadataType -> metadataType.compareTo(analyzeLevel) > 0 && // for the case when there are no segment metadata, default segment is present
metadataType.compareTo(MetadataType.SEGMENT) > 0 && metadataType.compareTo(MetadataType.ALL) < 0).collect(Collectors.toSet());
BasicTablesRequests.RequestMetadata requestMetadata = BasicTablesRequests.RequestMetadata.builder().tableInfo(tableInfo).metadataTypes(emptyMetadataLevels).build();
List<TableMetadataUnit> metadataUnitList = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().request(requestMetadata);
assertTrue(String.format("Some metadata [%s] for [%s] analyze query level is present" + metadataUnitList, emptyMetadataLevels, analyzeLevel), metadataUnitList.isEmpty());
} finally {
run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
}
}
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class TestMetastoreCommands method testAnalyzeEmptyNullableParquetTable.
@Test
public void testAnalyzeEmptyNullableParquetTable() throws Exception {
File table = dirTestWatcher.copyResourceToRoot(Paths.get("parquet", "empty", "simple", "empty_simple.parquet"));
String tableName = "parquet/empty/simple/empty_simple.parquet";
TableInfo tableInfo = getTableInfo(tableName, "default");
TupleMetadata schema = new SchemaBuilder().addNullable("id", TypeProtos.MinorType.BIGINT).addNullable("name", TypeProtos.MinorType.VARCHAR).build();
Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("name"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("id"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.BIGINT)).build();
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
try {
testBuilder().sqlQuery("ANALYZE TABLE dfs.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
assertEquals(expectedTableMetadata, tableMetadata);
List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(1, filesMetadata.size());
List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
assertEquals(1, rowGroupsMetadata.size());
} finally {
run("analyze table dfs.`%s` drop metadata if exists", tableName);
}
}
Aggregations