Search in sources :

Example 36 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestTableMetadataUnitConversion method testSegmentMetadata.

@Test
public void testSegmentMetadata() {
    TableInfo tableInfo = data.basicTableInfo;
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.SEGMENT).key("part_int=3").identifier("part_int=3/part_varchar=g").build();
    Path path = new Path("/tmp/nation");
    String unitPath = path.toUri().getPath();
    Set<Path> locations = new HashSet<>();
    locations.add(new Path("part_int=3/part_varchar=g/0_0_0.parquet"));
    locations.add(new Path("part_int=3/part_varchar=g/0_0_1.parquet"));
    List<String> unitLocations = locations.stream().map(location -> location.toUri().getPath()).collect(Collectors.toList());
    // check required fields
    SegmentMetadata requiredFieldsMetadata = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).path(path).locations(locations).build();
    TableMetadataUnit requiredFieldsExpectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(BaseMetadata.UNDEFINED_TIME).path(path.toUri().getPath()).location(unitPath).locations(unitLocations).build();
    TableMetadataUnit requiredFieldsUnit = requiredFieldsMetadata.toMetadataUnit();
    assertEquals(requiredFieldsExpectedUnit, requiredFieldsUnit);
    assertNotNull(SegmentMetadata.builder().metadataUnit(requiredFieldsUnit).build());
    SchemaPath column = SchemaPath.getSimplePath("dir1");
    List<String> partitionValues = Collections.singletonList("part_varchar=g");
    SegmentMetadata allFieldsMetadata = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).path(path).locations(locations).column(column).partitionValues(partitionValues).build();
    TableMetadataUnit allFieldsExpectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).path(path.toUri().getPath()).location(unitPath).locations(unitLocations).column(column.toString()).partitionValues(partitionValues).build();
    TableMetadataUnit allFieldsUnit = allFieldsMetadata.toMetadataUnit();
    assertEquals(allFieldsExpectedUnit, allFieldsUnit);
    assertNotNull(SegmentMetadata.builder().metadataUnit(allFieldsUnit).build());
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) HashMap(java.util.HashMap) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) BaseTest(org.apache.drill.test.BaseTest) HashSet(java.util.HashSet) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Assert.assertNotNull(org.junit.Assert.assertNotNull) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) Set(java.util.Set) Test(org.junit.Test) Category(org.junit.experimental.categories.Category) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) MetastoreTest(org.apache.drill.categories.MetastoreTest) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableInfo(org.apache.drill.metastore.metadata.TableInfo) HashSet(java.util.HashSet) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Example 37 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreWithEasyFormatPlugin method testNonEmptyTableWithEmptyFile.

@Test
public void testNonEmptyTableWithEmptyFile() throws Exception {
    String tableName = "csv_with_empty_file";
    dirTestWatcher.copyResourceToTestTmp(Paths.get("store", "text", "directoryWithEmptyCSV", "empty.csv"), Paths.get(tableName, "empty.csv"));
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("store", "text", "data", "nations.csv"), Paths.get(tableName, "nations.csv")).getParentFile();
    TableInfo tableInfo = getTableInfo(tableName, "tmp", "csv");
    TupleMetadata schema = new SchemaBuilder().add("n_nationkey", TypeProtos.MinorType.INT).add("n_name", TypeProtos.MinorType.VARCHAR).add("n_regionkey", TypeProtos.MinorType.INT).add("n_comment", TypeProtos.MinorType.VARCHAR).build();
    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("n_nationkey"), getColumnStatistics(0, 24, 25L, TypeProtos.MinorType.INT)).put(SchemaPath.getSimplePath("n_name"), getColumnStatistics("ALGERIA", "VIETNAM", 25L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("n_regionkey"), getColumnStatistics(0, 4, 25L, TypeProtos.MinorType.INT)).put(SchemaPath.getSimplePath("n_comment"), getColumnStatistics("alfoxespromiseslylyaccordingtotheregularaccounts.boldrequestsalon", "yfinalpackages.slowfoxescajolequickly.quicklysilentplateletsbreachironicaccounts.unusualpintobe", 25L, TypeProtos.MinorType.VARCHAR)).build();
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(25L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` " + "(schema=>'inline=(" + "`n_nationkey` INT not null," + "`n_name` VARCHAR not null," + "`n_regionkey` INT not null," + "`n_comment` VARCHAR not null)')) REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
        assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
        BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, tableMetadata);
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(2, filesMetadata.size());
        String query = "select * from dfs.tmp.`%s`";
        queryBuilder().sql(query, tableName).planMatcher().include("usedMetastore=true").match();
        long rowCount = queryBuilder().sql(query, tableName).run().recordCount();
        assertEquals(25, rowCount);
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
    }
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 38 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreWithEasyFormatPlugin method testAnalyzeOnTextTableWithHeader.

@Test
public void testAnalyzeOnTextTableWithHeader() throws Exception {
    String tableName = "store/text/data/cars.csvh";
    File tablePath = dirTestWatcher.copyResourceToRoot(Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "default", "csvh");
    TupleMetadata schema = new SchemaBuilder().add("Year", TypeProtos.MinorType.VARCHAR).add("Make", TypeProtos.MinorType.VARCHAR).add("Model", TypeProtos.MinorType.VARCHAR).add("Description", TypeProtos.MinorType.VARCHAR).add("Price", TypeProtos.MinorType.VARCHAR).build();
    ImmutableMap<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("Description"), getColumnStatistics("", "ac, abs, moon", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Make"), getColumnStatistics("Chevy", "Jeep", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Model"), getColumnStatistics("E350", "Venture \"Extended Edition, Very Large\"", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Price"), getColumnStatistics("3000.00", "5000.00", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Year"), getColumnStatistics("1996", "1999", 4L, TypeProtos.MinorType.VARCHAR)).build();
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(tablePath.toURI().getPath())).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(4L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(tablePath)).build();
    try {
        testBuilder().sqlQuery("analyze table dfs.`%s` refresh metadata", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
    } finally {
        run("analyze table dfs.`%s` drop metadata if exists", tableName);
    }
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 39 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreWithEasyFormatPlugin method testAnalyzeOnJsonTable.

@Test
public void testAnalyzeOnJsonTable() throws Exception {
    String tableName = "multilevel/json";
    TableInfo tableInfo = getTableInfo(tableName, "default", "json");
    File table = dirTestWatcher.copyResourceToRoot(Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    TupleMetadata schema = new SchemaBuilder().addNullable("dir0", TypeProtos.MinorType.VARCHAR).addNullable("dir1", TypeProtos.MinorType.VARCHAR).addNullable("o_orderkey", TypeProtos.MinorType.BIGINT).addNullable("o_custkey", TypeProtos.MinorType.BIGINT).addNullable("o_orderstatus", TypeProtos.MinorType.VARCHAR).addNullable("o_totalprice", TypeProtos.MinorType.FLOAT8).addNullable("o_orderdate", TypeProtos.MinorType.VARCHAR).addNullable("o_orderpriority", TypeProtos.MinorType.VARCHAR).addNullable("o_clerk", TypeProtos.MinorType.VARCHAR).addNullable("o_shippriority", TypeProtos.MinorType.BIGINT).addNullable("o_comment", TypeProtos.MinorType.VARCHAR).build();
    Map<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = new HashMap<>(TABLE_COLUMN_STATISTICS);
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_custkey"), getColumnStatistics(25L, 1498L, 120L, TypeProtos.MinorType.BIGINT));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_orderdate"), getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1996-12-19T00:00:00.000-08:00", 120L, TypeProtos.MinorType.VARCHAR));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_orderkey"), getColumnStatistics(1L, 1319L, 120L, TypeProtos.MinorType.BIGINT));
    tableColumnStatistics.put(SchemaPath.getSimplePath("o_shippriority"), getColumnStatistics(0L, 0L, 120L, TypeProtos.MinorType.BIGINT));
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    TableInfo baseTableInfo = TableInfo.builder().name(tableName).storagePlugin("dfs").workspace("default").build();
    Map<SchemaPath, ColumnStatistics<?>> dir0CSVStats = new HashMap<>(DIR0_1994_SEGMENT_COLUMN_STATISTICS);
    dir0CSVStats.put(SchemaPath.getSimplePath("o_custkey"), getColumnStatistics(25L, 1469L, 40L, TypeProtos.MinorType.BIGINT));
    dir0CSVStats.put(SchemaPath.getSimplePath("o_orderdate"), getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1994-12-23T00:00:00.000-08:00", 40L, TypeProtos.MinorType.VARCHAR));
    dir0CSVStats.put(SchemaPath.getSimplePath("o_orderkey"), getColumnStatistics(5L, 1031L, 40L, TypeProtos.MinorType.BIGINT));
    dir0CSVStats.put(SchemaPath.getSimplePath("o_shippriority"), getColumnStatistics(0L, 0L, 40L, TypeProtos.MinorType.BIGINT));
    SegmentMetadata dir0 = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994").key("1994").build()).path(new Path(tablePath, "1994")).schema(schema).lastModifiedTime(getMaxLastModified(new File(table, "1994"))).column(SchemaPath.getSimplePath("dir0")).columnsStatistics(dir0CSVStats).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(40L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.json"), new Path(tablePath, "1994/Q2/orders_94_q2.json"), new Path(tablePath, "1994/Q3/orders_94_q3.json"), new Path(tablePath, "1994/Q4/orders_94_q4.json"))).partitionValues(Collections.singletonList("1994")).build();
    Set<Path> expectedTopLevelSegmentLocations = ImmutableSet.of(new Path(tablePath, "1994"), new Path(tablePath, "1995"), new Path(tablePath, "1996"));
    Set<Set<Path>> expectedSegmentFilesLocations = new HashSet<>();
    Set<Path> segmentFiles = ImmutableSet.of(new Path(tablePath, "1994/Q2/orders_94_q2.json"), new Path(tablePath, "1994/Q4/orders_94_q4.json"), new Path(tablePath, "1994/Q1/orders_94_q1.json"), new Path(tablePath, "1994/Q3/orders_94_q3.json"));
    expectedSegmentFilesLocations.add(segmentFiles);
    segmentFiles = ImmutableSet.of(new Path(tablePath, "1995/Q2/orders_95_q2.json"), new Path(tablePath, "1995/Q4/orders_95_q4.json"), new Path(tablePath, "1995/Q1/orders_95_q1.json"), new Path(tablePath, "1995/Q3/orders_95_q3.json"));
    expectedSegmentFilesLocations.add(segmentFiles);
    segmentFiles = ImmutableSet.of(new Path(tablePath, "1996/Q3/orders_96_q3.json"), new Path(tablePath, "1996/Q2/orders_96_q2.json"), new Path(tablePath, "1996/Q4/orders_96_q4.json"), new Path(tablePath, "1996/Q1/orders_96_q1.json"));
    expectedSegmentFilesLocations.add(segmentFiles);
    Map<SchemaPath, ColumnStatistics<?>> dir0q1Stats = new HashMap<>(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS);
    dir0q1Stats.put(SchemaPath.getSimplePath("o_custkey"), getColumnStatistics(392L, 1411L, 10L, TypeProtos.MinorType.BIGINT));
    dir0q1Stats.put(SchemaPath.getSimplePath("o_orderdate"), getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1994-03-26T00:00:00.000-08:00", 10L, TypeProtos.MinorType.VARCHAR));
    dir0q1Stats.put(SchemaPath.getSimplePath("o_orderkey"), getColumnStatistics(66L, 833L, 10L, TypeProtos.MinorType.BIGINT));
    dir0q1Stats.put(SchemaPath.getSimplePath("o_shippriority"), getColumnStatistics(0L, 0L, 10L, TypeProtos.MinorType.BIGINT));
    long dir0q1lastModified = new File(new File(new File(table, "1994"), "Q1"), "orders_94_q1.json").lastModified();
    FileMetadata dir01994q1File = FileMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.FILE).identifier("1994/Q1/orders_94_q1.json").key("1994").build()).schema(schema).lastModifiedTime(dir0q1lastModified).columnsStatistics(dir0q1Stats).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).path(new Path(tablePath, "1994/Q1/orders_94_q1.json")).build();
    try {
        testBuilder().sqlQuery("analyze table table(dfs.`%s`(schema=>%s)) refresh metadata", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        List<SegmentMetadata> topSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir0`");
        SegmentMetadata actualDir0Metadata = topSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994")).findAny().orElseThrow(() -> new AssertionError("Segment is absent"));
        Set<Path> locations = actualDir0Metadata.getLocations();
        actualDir0Metadata.toBuilder().locations(locations);
        assertEquals(dir0, actualDir0Metadata);
        Set<Path> topLevelSegmentLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocation).collect(Collectors.toSet());
        // verify top segments locations
        assertEquals(expectedTopLevelSegmentLocations, topLevelSegmentLocations);
        Set<Set<Path>> segmentFilesLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocations).collect(Collectors.toSet());
        assertEquals(expectedSegmentFilesLocations, segmentFilesLocations);
        // verify nested segments
        List<SegmentMetadata> nestedSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir1`");
        assertEquals(12, nestedSegmentMetadata.size());
        SegmentMetadata dir01994q1Segment = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994/Q1").key("1994").build()).path(new Path(new Path(tablePath, "1994"), "Q1")).schema(schema).lastModifiedTime(getMaxLastModified(new File(new File(table, "1994"), "Q1"))).column(SchemaPath.getSimplePath("dir1")).columnsStatistics(dir0q1Stats).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.json"))).partitionValues(Collections.singletonList("Q1")).build();
        // verify segment for 1994
        assertEquals(dir01994q1Segment, nestedSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1")).findAny().orElse(null));
        // verify files metadata
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(12, filesMetadata.size());
        // verify first file metadata
        assertEquals(dir01994q1File, filesMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.json")).findAny().orElse(null));
    } finally {
        run("analyze table dfs.`%s` drop metadata if exists", tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) ClusterTest(org.apache.drill.test.ClusterTest) TABLE_META_INFO(org.apache.drill.exec.sql.TestMetastoreCommands.TABLE_META_INFO) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TABLE_COLUMN_STATISTICS(org.apache.drill.exec.sql.TestMetastoreCommands.TABLE_COLUMN_STATISTICS) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SlowTest(org.apache.drill.categories.SlowTest) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) Set(java.util.Set) Category(org.junit.experimental.categories.Category) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) TestMetastoreCommands.getMaxLastModified(org.apache.drill.exec.sql.TestMetastoreCommands.getMaxLastModified) MetastoreTest(org.apache.drill.categories.MetastoreTest) ExecConstants(org.apache.drill.exec.ExecConstants) ClusterFixtureBuilder(org.apache.drill.test.ClusterFixtureBuilder) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) HashMap(java.util.HashMap) HashSet(java.util.HashSet) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) ExpectedException(org.junit.rules.ExpectedException) Before(org.junit.Before) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) ClusterFixture(org.apache.drill.test.ClusterFixture) Assert.assertTrue(org.junit.Assert.assertTrue) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) File(java.io.File) ImmutableMap(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap) Rule(org.junit.Rule) DIR0_1994_SEGMENT_COLUMN_STATISTICS(org.apache.drill.exec.sql.TestMetastoreCommands.DIR0_1994_SEGMENT_COLUMN_STATISTICS) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Paths(java.nio.file.Paths) DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS(org.apache.drill.exec.sql.TestMetastoreCommands.DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) ImmutableSet(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Set(java.util.Set) HashSet(java.util.HashSet) ImmutableSet(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet) HashMap(java.util.HashMap) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) HashSet(java.util.HashSet) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 40 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreWithEasyFormatPlugin method testEmptyCSV.

@Test
public void testEmptyCSV() throws Exception {
    String tableName = "store/text/directoryWithEmptyCSV/empty.csv";
    File tablePath = dirTestWatcher.copyResourceToRoot(Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "default", "csv");
    TupleMetadata schema = new SchemaBuilder().add("Description", TypeProtos.MinorType.VARCHAR).build();
    ImmutableMap<SchemaPath, ColumnStatistics<?>> tableColumnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("Description"), getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR)).build();
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(tablePath.toURI().getPath())).columnsStatistics(tableColumnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(tablePath)).build();
    try {
        testBuilder().sqlQuery("analyze table table(dfs.`%s` (schema=>'inline=(`Description` VARCHAR not null)')) refresh metadata", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
        MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
        assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
        BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, tableMetadata);
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(1, filesMetadata.size());
        String query = "select * from dfs.`%s`";
        queryBuilder().sql(query, tableName).planMatcher().include("usedMetastore=true").match();
        testBuilder().sqlQuery(query, tableName).unOrdered().baselineColumns("Description").expectsEmptyResultSet().go();
    } finally {
        run("analyze table dfs.`%s` drop metadata if exists", tableName);
    }
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Aggregations

TableInfo (org.apache.drill.metastore.metadata.TableInfo)58 MetastoreTest (org.apache.drill.categories.MetastoreTest)50 Test (org.junit.Test)50 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)39 SchemaPath (org.apache.drill.common.expression.SchemaPath)37 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)37 Path (org.apache.hadoop.fs.Path)36 ClusterTest (org.apache.drill.test.ClusterTest)33 SlowTest (org.apache.drill.categories.SlowTest)32 File (java.io.File)29 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)29 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)25 HashMap (java.util.HashMap)24 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)23 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)23 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)23 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)20 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)18 BaseTest (org.apache.drill.test.BaseTest)17 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)16