Search in sources :

Example 6 with BaseTableMetadata

use of org.apache.drill.metastore.metadata.BaseTableMetadata in project drill by apache.

the class TestMetastoreCommands method testIncrementalAnalyzeWithEmptyColumns.

@Test
public void testIncrementalAnalyzeWithEmptyColumns() throws Exception {
    String tableName = "multilevel/parquetEmptyColumns";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    Map<SchemaPath, ColumnStatistics<?>> updatedTableColumnStatistics = new HashMap<>();
    SchemaPath orderStatusPath = SchemaPath.getSimplePath("o_orderstatus");
    SchemaPath orderDatePath = SchemaPath.getSimplePath("o_orderdate");
    SchemaPath dir0Path = SchemaPath.getSimplePath("dir0");
    SchemaPath dir1Path = SchemaPath.getSimplePath("dir1");
    updatedTableColumnStatistics.put(orderStatusPath, TABLE_COLUMN_STATISTICS.get(orderStatusPath));
    updatedTableColumnStatistics.put(orderDatePath, TABLE_COLUMN_STATISTICS.get(orderDatePath));
    updatedTableColumnStatistics.put(dir0Path, TABLE_COLUMN_STATISTICS.get(dir0Path));
    updatedTableColumnStatistics.put(dir1Path, TABLE_COLUMN_STATISTICS.get(dir1Path));
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(tablePath).columnsStatistics(updatedTableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ROW_GROUP, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).interestingColumns(Arrays.asList(orderStatusPath, orderDatePath)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` columns(o_orderstatus, o_orderdate) REFRESH METADATA 'row_group' LEVEL", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        // checks that analyze wasn't produced though interesting columns list differs, but it is a sublist of previously analyzed table
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` columns NONE REFRESH METADATA 'row_group' LEVEL", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(false, "Table metadata is up to date, analyze wasn't performed.").go();
        actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) HashMap(java.util.HashMap) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 7 with BaseTableMetadata

use of org.apache.drill.metastore.metadata.BaseTableMetadata in project drill by apache.

the class TestMetastoreCommands method testIncrementalAnalyzeNewFile.

@Test
public void testIncrementalAnalyzeNewFile() throws Exception {
    String tableName = "multilevel/parquetNewFile";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    // updates statistics values due to new segment
    Map<SchemaPath, ColumnStatistics<?>> updatedStatistics = new HashMap<>(TABLE_COLUMN_STATISTICS);
    updatedStatistics.replaceAll((logicalExpressions, columnStatistics) -> columnStatistics.cloneWith(new ColumnStatistics<>(Arrays.asList(new StatisticsHolder<>(130L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(130L, ColumnStatisticsKind.NON_NULL_VALUES_COUNT)))));
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(tablePath).columnsStatistics(updatedStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(130L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        List<SegmentMetadata> segmentsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentsMetadata.size());
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(12, filesMetadata.size());
        List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
        assertEquals(12, rowGroupsMetadata.size());
        dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel", "parquet", "1994", "Q4", "orders_94_q4.parquet"), Paths.get(tableName, "1994", "Q4", "orders_94_q4_1.parquet"));
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        segmentsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        // verifies that segments count left unchanged
        assertEquals(15, segmentsMetadata.size());
        filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(13, filesMetadata.size());
        rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
        assertEquals(13, rowGroupsMetadata.size());
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        FileUtils.deleteQuietly(table);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) HashMap(java.util.HashMap) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 8 with BaseTableMetadata

use of org.apache.drill.metastore.metadata.BaseTableMetadata in project drill by apache.

the class TestMetastoreCommands method testDropMetadata.

@Test
public void testDropMetadata() throws Exception {
    String tableName = "tableDropMetadata";
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    try {
        run("create table dfs.tmp.`%s` as\n" + "select * from cp.`tpch/region.parquet`", tableName);
        testBuilder().sqlQuery("analyze table dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
        assertTrue(metastoreTableInfo.isExists());
        BaseTableMetadata baseTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertNotNull(baseTableMetadata);
        List<RowGroupMetadata> rowGroupMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
        assertEquals(1, rowGroupMetadata.size());
        testBuilder().sqlQuery("analyze table dfs.tmp.`%s` drop metadata", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Metadata for table [%s] dropped.", tableName)).go();
        metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
        assertFalse(metastoreTableInfo.isExists());
        baseTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertNull(baseTableMetadata);
        rowGroupMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, (String) null, null);
        assertEquals(0, rowGroupMetadata.size());
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        run("drop table if exists dfs.tmp.`%s`", tableName);
        client.resetSession(ExecConstants.METASTORE_ENABLED);
    }
}
Also used : MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 9 with BaseTableMetadata

use of org.apache.drill.metastore.metadata.BaseTableMetadata in project drill by apache.

the class TestMetastoreCommands method testDefaultSegment.

@Test
public void testDefaultSegment() throws Exception {
    String tableName = "multilevel/parquet/1994/Q1";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get(tableName), Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    Map<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = new HashMap<>(TABLE_COLUMN_STATISTICS);
    tableColumnStatistics.remove(SchemaPath.getSimplePath("dir0"));
    tableColumnStatistics.remove(SchemaPath.getSimplePath("dir1"));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_orderstatus"), getColumnStatistics("F", "F", 120L, TypeProtos.MinorType.VARCHAR));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_orderkey"), getColumnStatistics(66, 833, 833L, TypeProtos.MinorType.INT));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_clerk"), getColumnStatistics("Clerk#000000062", "Clerk#000000973", 120L, TypeProtos.MinorType.VARCHAR));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_totalprice"), getColumnStatistics(3266.69, 132531.73, 120L, TypeProtos.MinorType.FLOAT8));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_comment"), getColumnStatistics(" special pinto beans use quickly furiously even depende", "y pending requests integrate", 120L, TypeProtos.MinorType.VARCHAR));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_custkey"), getColumnStatistics(392, 1411, 120L, TypeProtos.MinorType.INT));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_orderdate"), getColumnStatistics(757382400000L, 764640000000L, 120L, TypeProtos.MinorType.DATE));
    tableColumnStatistics.replaceAll((logicalExpressions, columnStatistics) -> columnStatistics.cloneWith(new ColumnStatistics<>(Arrays.asList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(10L, ColumnStatisticsKind.NON_NULL_VALUES_COUNT)))));
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(new SchemaBuilder().add("o_orderkey", TypeProtos.MinorType.INT).add("o_custkey", TypeProtos.MinorType.INT).add("o_orderstatus", TypeProtos.MinorType.VARCHAR).add("o_totalprice", TypeProtos.MinorType.FLOAT8).add("o_orderdate", TypeProtos.MinorType.DATE).add("o_orderpriority", TypeProtos.MinorType.VARCHAR).add("o_clerk", TypeProtos.MinorType.VARCHAR).add("o_shippriority", TypeProtos.MinorType.INT).add("o_comment", TypeProtos.MinorType.VARCHAR).build()).location(tablePath).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    SegmentMetadata defaultSegment = SegmentMetadata.builder().tableInfo(TableInfo.builder().name(tableName).storagePlugin("dfs").workspace("tmp").build()).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key(MetadataInfo.DEFAULT_SEGMENT_KEY).build()).lastModifiedTime(new File(table, "orders_94_q1.parquet").lastModified()).columnsStatistics(Collections.emptyMap()).metadataStatistics(Collections.emptyList()).path(tablePath).locations(ImmutableSet.of(new Path(tablePath, "orders_94_q1.parquet"))).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        List<SegmentMetadata> segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(1, segmentMetadata.size());
        assertEquals(defaultSegment, segmentMetadata.get(0));
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) HashMap(java.util.HashMap) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 10 with BaseTableMetadata

use of org.apache.drill.metastore.metadata.BaseTableMetadata in project drill by apache.

the class TestMetastoreCommands method testTableMetadataWithLevels.

@Test
public void testTableMetadataWithLevels() throws Exception {
    List<MetadataType> analyzeLevels = Arrays.asList(MetadataType.ROW_GROUP, MetadataType.FILE, MetadataType.SEGMENT, MetadataType.TABLE);
    String tableName = "multilevel/parquetLevels";
    File tablePath = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    for (MetadataType analyzeLevel : analyzeLevels) {
        BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(new Path(tablePath.toURI().getPath())).columnsStatistics(TABLE_COLUMN_STATISTICS).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(analyzeLevel, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(tablePath)).build();
        try {
            testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA '%s' level", tableName, analyzeLevel.name()).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
            BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
            assertEquals(String.format("Table metadata mismatch for [%s] metadata level", analyzeLevel), expectedTableMetadata, actualTableMetadata);
        } finally {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) MetadataType(org.apache.drill.metastore.metadata.MetadataType) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Aggregations

BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)35 MetastoreTest (org.apache.drill.categories.MetastoreTest)33 Test (org.junit.Test)33 ClusterTest (org.apache.drill.test.ClusterTest)30 TableInfo (org.apache.drill.metastore.metadata.TableInfo)29 SchemaPath (org.apache.drill.common.expression.SchemaPath)28 SlowTest (org.apache.drill.categories.SlowTest)27 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)27 Path (org.apache.hadoop.fs.Path)27 File (java.io.File)26 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)23 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)21 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)17 HashMap (java.util.HashMap)16 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)15 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)13 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)12 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)12 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)11 TestMetastoreCommands.getBaseTableMetadata (org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata)6