Search in sources :

Example 1 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class BasicTablesTransformer method all.

public static MetadataHolder all(List<TableMetadataUnit> units) {
    List<BaseTableMetadata> tables = new ArrayList<>();
    List<SegmentMetadata> segments = new ArrayList<>();
    List<FileMetadata> files = new ArrayList<>();
    List<RowGroupMetadata> rowGroups = new ArrayList<>();
    List<PartitionMetadata> partitions = new ArrayList<>();
    for (TableMetadataUnit unit : units) {
        MetadataType metadataType = MetadataType.fromValue(unit.metadataType());
        if (metadataType == null) {
            continue;
        }
        switch(metadataType) {
            case TABLE:
                tables.add(BaseTableMetadata.builder().metadataUnit(unit).build());
                break;
            case SEGMENT:
                segments.add(SegmentMetadata.builder().metadataUnit(unit).build());
                break;
            case FILE:
                files.add(FileMetadata.builder().metadataUnit(unit).build());
                break;
            case ROW_GROUP:
                rowGroups.add(RowGroupMetadata.builder().metadataUnit(unit).build());
                break;
            case PARTITION:
                partitions.add(PartitionMetadata.builder().metadataUnit(unit).build());
                break;
            default:
                // Ignore unsupported type
                break;
        }
    }
    return new MetadataHolder(tables, segments, files, rowGroups, partitions);
}
Also used : ArrayList(java.util.ArrayList) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) MetadataType(org.apache.drill.metastore.metadata.MetadataType) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata)

Example 2 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class MetadataControllerBatch method getIncomingLocations.

private Set<Path> getIncomingLocations(TupleReader reader) {
    Set<Path> childLocations = new HashSet<>();
    ObjectReader metadataColumnReader = reader.column(MetastoreAnalyzeConstants.METADATA_TYPE);
    Preconditions.checkNotNull(metadataColumnReader, "metadataType column wasn't found");
    MetadataType metadataType = MetadataType.valueOf(metadataColumnReader.scalar().getString());
    switch(metadataType) {
        case SEGMENT:
        case PARTITION:
            {
                ObjectReader locationsReader = reader.column(MetastoreAnalyzeConstants.LOCATIONS_FIELD);
                // populate list of file locations from "locations" field if it is present in the schema
                if (locationsReader != null && locationsReader.type() == ObjectType.ARRAY) {
                    ArrayReader array = locationsReader.array();
                    while (array.next()) {
                        childLocations.add(new Path(array.scalar().getString()));
                    }
                    break;
                }
                // in the opposite case, populate list of file locations using underlying metadata
                ObjectReader underlyingMetadataReader = reader.column(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD);
                if (underlyingMetadataReader != null) {
                    // current row contains information about underlying metadata
                    ArrayReader array = underlyingMetadataReader.array();
                    array.rewind();
                    while (array.next()) {
                        childLocations.addAll(getIncomingLocations(array.tuple()));
                    }
                }
                break;
            }
        case FILE:
            {
                childLocations.add(new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString()));
            }
        default:
            break;
    }
    return childLocations;
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) MetadataType(org.apache.drill.metastore.metadata.MetadataType) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) HashSet(java.util.HashSet)

Example 3 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class TestTableMetadataUnitConversion method testPartitionMetadata.

@Test
public void testPartitionMetadata() {
    TableInfo tableInfo = data.basicTableInfo;
    SchemaPath column = SchemaPath.getSimplePath("part_varchar");
    List<String> partitionValues = Collections.singletonList("g");
    Set<Path> locations = new HashSet<>();
    locations.add(new Path("part_int=3/part_varchar=g/0_0_0.parquet"));
    locations.add(new Path("part_int=3/part_varchar=g/0_0_1.parquet"));
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.PARTITION).key("part_int=3").identifier("part_int=3/part_varchar=g").build();
    PartitionMetadata metadata = PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).column(column).partitionValues(partitionValues).locations(locations).build();
    TableMetadataUnit expectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).column(column.toString()).partitionValues(partitionValues).locations(locations.stream().map(location -> location.toUri().getPath()).collect(Collectors.toList())).build();
    TableMetadataUnit actualUnit = metadata.toMetadataUnit();
    assertEquals(expectedUnit, actualUnit);
    assertNotNull(PartitionMetadata.builder().metadataUnit(actualUnit).build());
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) HashMap(java.util.HashMap) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) BaseTest(org.apache.drill.test.BaseTest) HashSet(java.util.HashSet) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Assert.assertNotNull(org.junit.Assert.assertNotNull) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) Set(java.util.Set) Test(org.junit.Test) Category(org.junit.experimental.categories.Category) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) MetastoreTest(org.apache.drill.categories.MetastoreTest) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaPath(org.apache.drill.common.expression.SchemaPath) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) HashSet(java.util.HashSet) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Example 4 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class AbstractTransformer method toOverwrite.

protected RdbmsOperation.Overwrite toOverwrite(String metadataTypeString, List<T> units) {
    MetadataType metadataType = MetadataType.fromValue(metadataTypeString);
    if (metadataType == null) {
        throw new RdbmsMetastoreException("Metadata type must be specified during insert / update");
    } else {
        MetadataMapper<T, ? extends Record> mapper = toMapper(metadataType);
        List<Condition> deleteConditions = mapper.toDeleteConditions(units);
        List<? extends Record> records = units.stream().map(mapper::toRecord).collect(Collectors.toList());
        return new RdbmsOperation.Overwrite(mapper.table(), deleteConditions, records);
    }
}
Also used : Condition(org.jooq.Condition) RdbmsMetastoreException(org.apache.drill.metastore.rdbms.exception.RdbmsMetastoreException) MetadataType(org.apache.drill.metastore.metadata.MetadataType)

Example 5 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class TestMetastoreCommands method testTableMetadataWithLevels.

@Test
public void testTableMetadataWithLevels() throws Exception {
    List<MetadataType> analyzeLevels = Arrays.asList(MetadataType.ROW_GROUP, MetadataType.FILE, MetadataType.SEGMENT, MetadataType.TABLE);
    String tableName = "multilevel/parquetLevels";
    File tablePath = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    for (MetadataType analyzeLevel : analyzeLevels) {
        BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(new Path(tablePath.toURI().getPath())).columnsStatistics(TABLE_COLUMN_STATISTICS).metadataStatistics(Arrays.asList(new StatisticsHolder<>(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(analyzeLevel, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(tablePath)).build();
        try {
            testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA '%s' level", tableName, analyzeLevel.name()).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
            BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
            assertEquals(String.format("Table metadata mismatch for [%s] metadata level", analyzeLevel), expectedTableMetadata, actualTableMetadata);
        } finally {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) MetadataType(org.apache.drill.metastore.metadata.MetadataType) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Aggregations

MetadataType (org.apache.drill.metastore.metadata.MetadataType)11 SchemaPath (org.apache.drill.common.expression.SchemaPath)9 Path (org.apache.hadoop.fs.Path)7 Arrays (java.util.Arrays)6 Collections (java.util.Collections)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)6 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)6 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)6 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 HashSet (java.util.HashSet)4 MetastoreTest (org.apache.drill.categories.MetastoreTest)4 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)4 BaseMetadata (org.apache.drill.metastore.metadata.BaseMetadata)4 HashMap (java.util.HashMap)3 Set (java.util.Set)3 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)3