Search in sources :

Example 11 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class AbstractTablesMetastoreTest method testOverwriteSeveralUnits.

@Test
public void testOverwriteSeveralUnits() {
    TableInfo tableInfo = TableInfo.builder().storagePlugin("dfs").workspace("tmp").name("nation").build();
    TableMetadataUnit topLevelSegment = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataKey("1994").metadataIdentifier("1994").metadataType(MetadataType.SEGMENT.name()).location("/tmp/nation/1994").lastModifiedTime(System.currentTimeMillis()).build();
    TableMetadataUnit firstNestedSegment = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataKey("1994").metadataIdentifier("1994/Q1").metadataType(MetadataType.SEGMENT.name()).location("/tmp/nation/1994/Q1").lastModifiedTime(System.currentTimeMillis()).build();
    TableMetadataUnit secondNestedSegment = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataKey("1994").metadataIdentifier("1994/Q2").metadataType(MetadataType.SEGMENT.name()).location("/tmp/nation/1994/Q2").lastModifiedTime(System.currentTimeMillis()).build();
    tables.modify().overwrite(topLevelSegment, firstNestedSegment, secondNestedSegment).execute();
    List<TableMetadataUnit> units = tables.read().metadataType(MetadataType.SEGMENT).filter(tableInfo.toFilter()).execute();
    assertEquals(3, units.size());
    tables.modify().overwrite(topLevelSegment, firstNestedSegment).execute();
    List<TableMetadataUnit> updatedUnits = tables.read().metadataType(MetadataType.SEGMENT).filter(tableInfo.toFilter()).execute();
    assertEquals(2, updatedUnits.size());
    Set<String> metadataIdentifiers = updatedUnits.stream().map(TableMetadataUnit::metadataIdentifier).collect(Collectors.toSet());
    assertEquals(Sets.newHashSet("1994", "1994/Q1"), metadataIdentifiers);
}
Also used : TableInfo(org.apache.drill.metastore.metadata.TableInfo) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Example 12 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestTableMetadataUnitConversion method testBaseTableMetadata.

@Test
public void testBaseTableMetadata() {
    TableInfo tableInfo = data.fullTableInfo;
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.TABLE).key(MetadataInfo.GENERAL_INFO_KEY).build();
    Map<String, String> partitionKeys = new HashMap<>();
    partitionKeys.put("dir0", "2018");
    partitionKeys.put("dir1", "2019");
    // check required fields
    BaseTableMetadata requiredFieldsMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).partitionKeys(partitionKeys).build();
    TableMetadataUnit requiredFieldsExpectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).owner(tableInfo.owner()).tableType(tableInfo.type()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(BaseMetadata.UNDEFINED_TIME).partitionKeys(partitionKeys).build();
    TableMetadataUnit requiredFieldsUnit = requiredFieldsMetadata.toMetadataUnit();
    assertEquals(requiredFieldsExpectedUnit, requiredFieldsUnit);
    assertNotNull(BaseTableMetadata.builder().metadataUnit(requiredFieldsUnit).build());
    Path location = new Path("/tmp/nation");
    List<SchemaPath> interestingColumns = Arrays.asList(SchemaPath.getSimplePath("a"), SchemaPath.getSimplePath("b"));
    // check all fields
    BaseTableMetadata allFieldsMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).location(location).partitionKeys(partitionKeys).interestingColumns(interestingColumns).build();
    TableMetadataUnit allFieldsExpectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).owner(tableInfo.owner()).tableType(tableInfo.type()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).location(location.toUri().getPath()).partitionKeys(partitionKeys).interestingColumns(interestingColumns.stream().map(SchemaPath::toString).collect(Collectors.toList())).build();
    TableMetadataUnit allFieldsUnit = allFieldsMetadata.toMetadataUnit();
    assertEquals(allFieldsExpectedUnit, allFieldsUnit);
    assertNotNull(BaseTableMetadata.builder().metadataUnit(allFieldsUnit).build());
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) HashMap(java.util.HashMap) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Example 13 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestTableMetadataUnitConversion method testRowGroupMetadata.

@Test
public void testRowGroupMetadata() {
    TableInfo tableInfo = data.basicTableInfo;
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.ROW_GROUP).key("part_int=3").identifier("part_int=3/part_varchar=g/0_0_0.parquet").build();
    Path path = new Path("/tmp/nation/part_int=3/part_varchar=g/0_0_0.parquet");
    int rowGroupIndex = 1;
    Map<String, Float> hostAffinity = new HashMap<>();
    hostAffinity.put("host1", 1F);
    hostAffinity.put("host2", 2F);
    RowGroupMetadata metadata = RowGroupMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).path(path).rowGroupIndex(rowGroupIndex).hostAffinity(hostAffinity).build();
    TableMetadataUnit expectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).path(path.toUri().getPath()).location(path.getParent().toUri().getPath()).rowGroupIndex(rowGroupIndex).hostAffinity(hostAffinity).build();
    TableMetadataUnit actualUnit = metadata.toMetadataUnit();
    assertEquals(expectedUnit, actualUnit);
    assertNotNull(RowGroupMetadata.builder().metadataUnit(actualUnit).build());
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) HashMap(java.util.HashMap) TableInfo(org.apache.drill.metastore.metadata.TableInfo) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Example 14 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestTableMetadataUnitConversion method testPartitionMetadata.

@Test
public void testPartitionMetadata() {
    TableInfo tableInfo = data.basicTableInfo;
    SchemaPath column = SchemaPath.getSimplePath("part_varchar");
    List<String> partitionValues = Collections.singletonList("g");
    Set<Path> locations = new HashSet<>();
    locations.add(new Path("part_int=3/part_varchar=g/0_0_0.parquet"));
    locations.add(new Path("part_int=3/part_varchar=g/0_0_1.parquet"));
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.PARTITION).key("part_int=3").identifier("part_int=3/part_varchar=g").build();
    PartitionMetadata metadata = PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).column(column).partitionValues(partitionValues).locations(locations).build();
    TableMetadataUnit expectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).column(column.toString()).partitionValues(partitionValues).locations(locations.stream().map(location -> location.toUri().getPath()).collect(Collectors.toList())).build();
    TableMetadataUnit actualUnit = metadata.toMetadataUnit();
    assertEquals(expectedUnit, actualUnit);
    assertNotNull(PartitionMetadata.builder().metadataUnit(actualUnit).build());
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) HashMap(java.util.HashMap) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) BaseTest(org.apache.drill.test.BaseTest) HashSet(java.util.HashSet) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Assert.assertNotNull(org.junit.Assert.assertNotNull) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) Set(java.util.Set) Test(org.junit.Test) Category(org.junit.experimental.categories.Category) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) MetastoreTest(org.apache.drill.categories.MetastoreTest) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaPath(org.apache.drill.common.expression.SchemaPath) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) HashSet(java.util.HashSet) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Example 15 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestInfoSchemaWithMetastore method testPartitions.

@Test
public void testPartitions() throws Exception {
    String tableName = "table_with_partitions";
    ZonedDateTime currentTime = currentUtcTime();
    TableInfo tableInfo = TableInfo.builder().storagePlugin("dfs").workspace("tmp").name(tableName).type("PARQUET").build();
    SegmentMetadata defaultSegment = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key(MetadataInfo.DEFAULT_SEGMENT_KEY).build()).path(new Path("/tmp", tableName)).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
    SegmentMetadata segment = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key("part_int=3").identifier("part_int=3").build()).column(SchemaPath.parseFromString("dir0")).partitionValues(Collections.singletonList("part_int=3")).path(new Path(String.format("/tmp/%s/part_int=3", tableName))).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
    PartitionMetadata partition = PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.PARTITION).key("part_int=3").identifier("part_int=3/part_varchar=g").build()).column(SchemaPath.parseFromString("part_varchar")).partitionValues(Collections.singletonList("g")).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
    metastore.tables().modify().overwrite(defaultSegment.toMetadataUnit(), segment.toMetadataUnit(), partition.toMetadataUnit()).execute();
    List<String> columns = Arrays.asList(InfoSchemaConstants.SHRD_COL_TABLE_CATALOG, InfoSchemaConstants.SHRD_COL_TABLE_SCHEMA, InfoSchemaConstants.SHRD_COL_TABLE_NAME, InfoSchemaConstants.PARTITIONS_COL_METADATA_KEY, InfoSchemaConstants.PARTITIONS_COL_METADATA_TYPE, InfoSchemaConstants.PARTITIONS_COL_METADATA_IDENTIFIER, InfoSchemaConstants.PARTITIONS_COL_PARTITION_COLUMN, InfoSchemaConstants.PARTITIONS_COL_PARTITION_VALUE, InfoSchemaConstants.PARTITIONS_COL_LOCATION, InfoSchemaConstants.PARTITIONS_COL_LAST_MODIFIED_TIME);
    client.testBuilder().sqlQuery("select %s from information_schema.`partitions` where table_name = '%s'", String.join(", ", columns), tableName).unOrdered().baselineColumns(columns.toArray(new String[0])).baselineValues("DRILL", "dfs.tmp", tableName, "part_int=3", MetadataType.SEGMENT.name(), "part_int=3", "`dir0`", "part_int=3", "/tmp/table_with_partitions/part_int=3", currentTime.toLocalDateTime()).baselineValues("DRILL", "dfs.tmp", tableName, "part_int=3", MetadataType.PARTITION.name(), "part_int=3/part_varchar=g", "`part_varchar`", "g", null, currentTime.toLocalDateTime()).go();
}
Also used : SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ZonedDateTime(java.time.ZonedDateTime) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) ClusterTest(org.apache.drill.test.ClusterTest) Test(org.junit.Test) UnlikelyTest(org.apache.drill.categories.UnlikelyTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) SqlTest(org.apache.drill.categories.SqlTest)

Aggregations

TableInfo (org.apache.drill.metastore.metadata.TableInfo)58 MetastoreTest (org.apache.drill.categories.MetastoreTest)50 Test (org.junit.Test)50 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)39 SchemaPath (org.apache.drill.common.expression.SchemaPath)37 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)37 Path (org.apache.hadoop.fs.Path)36 ClusterTest (org.apache.drill.test.ClusterTest)33 SlowTest (org.apache.drill.categories.SlowTest)32 File (java.io.File)29 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)29 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)25 HashMap (java.util.HashMap)24 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)23 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)23 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)23 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)20 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)18 BaseTest (org.apache.drill.test.BaseTest)17 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)16