Search in sources :

Example 11 with MetastoreTableInfo

use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.

the class TestMetastoreWithEasyFormatPlugin method testEmptyCSV.

@Test
public void testEmptyCSV() throws Exception {
    String tableName = "store/text/directoryWithEmptyCSV/empty.csv";
    File tablePath = dirTestWatcher.copyResourceToRoot(Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "default", "csv");
    TupleMetadata schema = new SchemaBuilder().add("Description", TypeProtos.MinorType.VARCHAR).build();
    ImmutableMap<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("Description"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR)).build();
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(tablePath.toURI().getPath())).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(tablePath)).build();
    try {
        testBuilder().sqlQuery("analyze table table(dfs.`%s` (schema=>'inline=(`Description` VARCHAR not null)')) refresh metadata", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
        MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
        assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
        BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, tableMetadata);
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(1, filesMetadata.size());
        String query = "select * from dfs.`%s`";
        queryBuilder().sql(query, tableName).planMatcher().include("usedMetastore=true").match();
        testBuilder().sqlQuery(query, tableName).unOrdered().baselineColumns("Description").expectsEmptyResultSet().go();
    } finally {
        run("analyze table dfs.`%s` drop metadata if exists", tableName);
    }
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 12 with MetastoreTableInfo

use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.

the class TestMetastoreCommands method testAnalyzeEmptyRequiredParquetTable.

@Test
public void testAnalyzeEmptyRequiredParquetTable() throws Exception {
    String tableName = "analyze_empty_simple_required";
    run("create table dfs.tmp.%s as select 1 as `date`, 'a' as name from (values(1)) where 1 = 2", tableName);
    File table = new File(dirTestWatcher.getDfsTestTmpDir(), tableName);
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    TupleMetadata schema = new SchemaBuilder().add("date", TypeProtos.MinorType.INT).add("name", TypeProtos.MinorType.VARCHAR).build();
    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("name"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("date"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.INT)).build();
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
        assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
        BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, tableMetadata);
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(1, filesMetadata.size());
        List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
        assertEquals(1, rowGroupsMetadata.size());
        testBuilder().sqlQuery("select COLUMN_NAME from INFORMATION_SCHEMA.`COLUMNS` where table_name='%s'", tableName).unOrdered().baselineColumns("COLUMN_NAME").baselineValues("date").baselineValues("name").go();
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        run("drop table if exists dfs.tmp.`%s`", tableName);
    }
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 13 with MetastoreTableInfo

use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.

the class TestMetastoreCommands method testAnalyzeEmptyNullableParquetTable.

@Test
public void testAnalyzeEmptyNullableParquetTable() throws Exception {
    File table = dirTestWatcher.copyResourceToRoot(Paths.get("parquet", "empty", "simple", "empty_simple.parquet"));
    String tableName = "parquet/empty/simple/empty_simple.parquet";
    TableInfo tableInfo = getTableInfo(tableName, "default");
    TupleMetadata schema = new SchemaBuilder().addNullable("id", TypeProtos.MinorType.BIGINT).addNullable("name", TypeProtos.MinorType.VARCHAR).build();
    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("name"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("id"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.BIGINT)).build();
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
        MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
        assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
        BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, tableMetadata);
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(1, filesMetadata.size());
        List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
        assertEquals(1, rowGroupsMetadata.size());
    } finally {
        run("analyze table dfs.`%s` drop metadata if exists", tableName);
    }
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 14 with MetastoreTableInfo

use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.

the class TestMetastoreCommands method testAnalyzeNonEmptyTableWithEmptyFile.

@Test
public void testAnalyzeNonEmptyTableWithEmptyFile() throws Exception {
    String tableName = "parquet_with_empty_file";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("parquet", "empty", "simple"), Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    TupleMetadata schema = new SchemaBuilder().addNullable("id", TypeProtos.MinorType.BIGINT).addNullable("name", TypeProtos.MinorType.VARCHAR).build();
    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("name"), getColumnStatistics("Tom", "Tom", 1L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("id"), getColumnStatistics(2L, 2L, 1L, TypeProtos.MinorType.BIGINT)).build();
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(1L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
        assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
        BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, tableMetadata);
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(2, filesMetadata.size());
        List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
        assertEquals(2, rowGroupsMetadata.size());
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
    }
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 15 with MetastoreTableInfo

use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.

the class TestMetastoreCommands method testIncrementalAnalyzeUnchangedTable.

@Test
public void testIncrementalAnalyzeUnchangedTable() throws Exception {
    String tableName = "multilevel/parquetUnchanged";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    long lastModifiedTime = getMaxLastModified(table);
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        List<SegmentMetadata> segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentMetadata.size());
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(false, "Table metadata is up to date, analyze wasn't performed.").go();
        segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentMetadata.size());
        long postAnalyzeLastModifiedTime = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo).lastModifiedTime();
        assertEquals(lastModifiedTime, postAnalyzeLastModifiedTime);
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        FileUtils.deleteQuietly(table);
    }
}
Also used : SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Aggregations

MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)16 Test (org.junit.Test)13 TableInfo (org.apache.drill.metastore.metadata.TableInfo)11 MetastoreTest (org.apache.drill.categories.MetastoreTest)9 SlowTest (org.apache.drill.categories.SlowTest)9 ClusterTest (org.apache.drill.test.ClusterTest)9 File (java.io.File)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)6 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)6 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)5 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)5 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)5 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)5 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)5 Path (org.apache.hadoop.fs.Path)5 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)3 MetadataControllerContext (org.apache.drill.exec.metastore.analyze.MetadataControllerContext)2 TestMetastoreCommands.getBaseTableMetadata (org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata)2 TestMetastoreCommands.getColumnStatistics (org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics)2 AbstractBasicTablesRequestsTest (org.apache.drill.metastore.components.tables.AbstractBasicTablesRequestsTest)2