Search in sources :

Example 46 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreCommands method testAnalyzeWithMapColumns.

@Test
public void testAnalyzeWithMapColumns() throws Exception {
    String tableName = "complex";
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("store/parquet/complex/complex.parquet"), Paths.get(tableName));
    TupleMetadata schema = new SchemaBuilder().addNullable("trans_id", TypeProtos.MinorType.BIGINT).addNullable("date", TypeProtos.MinorType.VARCHAR).addNullable("time", TypeProtos.MinorType.VARCHAR).addNullable("amount", TypeProtos.MinorType.FLOAT8).addMap("user_info").addNullable("cust_id", TypeProtos.MinorType.BIGINT).addNullable("device", TypeProtos.MinorType.VARCHAR).addNullable("state", TypeProtos.MinorType.VARCHAR).resumeSchema().addMap("marketing_info").addNullable("camp_id", TypeProtos.MinorType.BIGINT).addArray("keywords", TypeProtos.MinorType.VARCHAR).resumeSchema().addMap("trans_info").addArray("prod_id", TypeProtos.MinorType.BIGINT).addNullable("purch_flag", TypeProtos.MinorType.VARCHAR).resumeSchema().build();
    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getCompoundPath("user_info", "state"), getColumnStatistics("ct", "nj", 5L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("date"), getColumnStatistics("2013-05-16", "2013-07-26", 5L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("time"), getColumnStatistics("04:56:59", "15:31:45", 5L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getCompoundPath("user_info", "cust_id"), getColumnStatistics(11L, 86623L, 5L, TypeProtos.MinorType.BIGINT)).put(SchemaPath.getSimplePath("amount"), getColumnStatistics(20.25, 500.75, 5L, TypeProtos.MinorType.FLOAT8)).put(SchemaPath.getCompoundPath("user_info", "device"), getColumnStatistics("AOS4.2", "IOS7", 5L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getCompoundPath("marketing_info", "camp_id"), getColumnStatistics(4L, 17L, 5L, TypeProtos.MinorType.BIGINT)).put(SchemaPath.getSimplePath("trans_id"), getColumnStatistics(0L, 4L, 5L, TypeProtos.MinorType.BIGINT)).put(SchemaPath.getCompoundPath("trans_info", "purch_flag"), getColumnStatistics("false", "true", 5L, TypeProtos.MinorType.VARCHAR)).build();
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(5L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    try {
        testBuilder().sqlQuery("analyze table dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
    }
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 47 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreCommands method testIncrementalAnalyzeWithFewerColumns.

@Test
public void testIncrementalAnalyzeWithFewerColumns() throws Exception {
    String tableName = "multilevel/parquetFewerColumns";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    Map<SchemaPath, ColumnStatistics<?>> updatedTableColumnStatistics = new HashMap<>();
    SchemaPath orderStatusPath = SchemaPath.getSimplePath("o_orderstatus");
    SchemaPath orderDatePath = SchemaPath.getSimplePath("o_orderdate");
    SchemaPath dir0Path = SchemaPath.getSimplePath("dir0");
    SchemaPath dir1Path = SchemaPath.getSimplePath("dir1");
    updatedTableColumnStatistics.put(orderStatusPath, TABLE_COLUMN_STATISTICS.get(orderStatusPath));
    updatedTableColumnStatistics.put(orderDatePath, TABLE_COLUMN_STATISTICS.get(orderDatePath));
    updatedTableColumnStatistics.put(dir0Path, TABLE_COLUMN_STATISTICS.get(dir0Path));
    updatedTableColumnStatistics.put(dir1Path, TABLE_COLUMN_STATISTICS.get(dir1Path));
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(tablePath).columnsStatistics(updatedTableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ROW_GROUP, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).interestingColumns(Arrays.asList(orderStatusPath, orderDatePath)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` columns(o_orderstatus, o_orderdate) REFRESH METADATA 'row_group' LEVEL", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        // checks that analyze wasn't produced though interesting columns list differs, but it is a sublist of previously analyzed table
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` columns(o_orderstatus) REFRESH METADATA 'row_group' LEVEL", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(false, "Table metadata is up to date, analyze wasn't performed.").go();
        actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) HashMap(java.util.HashMap) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 48 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreCommands method testIncrementalAnalyzeRemovedFile.

@Test
public void testIncrementalAnalyzeRemovedFile() throws Exception {
    String tableName = "multilevel/parquetRemovedFile";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    BaseTableMetadata expectedTableMetadata = getBaseTableMetadata(tableInfo, table);
    try {
        dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel", "parquet", "1994", "Q4", "orders_94_q4.parquet"), Paths.get(tableName, "1994", "Q4", "orders_94_q4_1.parquet"));
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        List<SegmentMetadata> segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentMetadata.size());
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(13, filesMetadata.size());
        List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
        assertEquals(13, rowGroupsMetadata.size());
        FileUtils.deleteQuietly(new File(new File(new File(table, "1994"), "Q4"), "orders_94_q4_1.parquet"));
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentMetadata.size());
        filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(12, filesMetadata.size());
        rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
        assertEquals(12, rowGroupsMetadata.size());
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        FileUtils.deleteQuietly(table);
    }
}
Also used : SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 49 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreCommands method testIncrementalAnalyzeRemovedParentSegment.

@Test
public void testIncrementalAnalyzeRemovedParentSegment() throws Exception {
    String tableName = "multilevel/parquetRemovedParent";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    BaseTableMetadata expectedTableMetadata = getBaseTableMetadata(tableInfo, table);
    try {
        dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet", "1994"), Paths.get(tableName, "1993"));
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        List<SegmentMetadata> segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(20, segmentMetadata.size());
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(16, filesMetadata.size());
        List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
        assertEquals(16, rowGroupsMetadata.size());
        FileUtils.deleteQuietly(new File(table, "1993"));
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentMetadata.size());
        filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(12, filesMetadata.size());
        rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
        assertEquals(12, rowGroupsMetadata.size());
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        FileUtils.deleteQuietly(table);
    }
}
Also used : SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 50 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreCommands method testIncrementalAnalyzeWithDifferentMetadataLevel.

@Test
public void testIncrementalAnalyzeWithDifferentMetadataLevel() throws Exception {
    String tableName = "multilevel/parquetDifferentMetadataLevel";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(tablePath).columnsStatistics(TABLE_COLUMN_STATISTICS).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.FILE, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA 'file' LEVEL", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        List<RowGroupMetadata> rowGroupMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        assertTrue(rowGroupMetadata.isEmpty());
        // checks that analyze was produced since metadata level more specific
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA 'row_group' LEVEL", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(tablePath).columnsStatistics(TABLE_COLUMN_STATISTICS).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ROW_GROUP, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
        assertEquals(expectedTableMetadata, actualTableMetadata);
        rowGroupMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
        assertEquals(12, rowGroupMetadata.size());
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Aggregations

TableInfo (org.apache.drill.metastore.metadata.TableInfo)58 MetastoreTest (org.apache.drill.categories.MetastoreTest)50 Test (org.junit.Test)50 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)39 SchemaPath (org.apache.drill.common.expression.SchemaPath)37 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)37 Path (org.apache.hadoop.fs.Path)36 ClusterTest (org.apache.drill.test.ClusterTest)33 SlowTest (org.apache.drill.categories.SlowTest)32 File (java.io.File)29 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)29 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)25 HashMap (java.util.HashMap)24 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)23 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)23 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)23 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)20 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)18 BaseTest (org.apache.drill.test.BaseTest)17 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)16