Search in sources :

Example 31 with TableMetadataUnit

use of org.apache.drill.metastore.components.tables.TableMetadataUnit in project drill by apache.

the class TestMetastoreCommands method testAnalyzeLowerLevelMetadata.

@Test
public void testAnalyzeLowerLevelMetadata() throws Exception {
    // checks that metadata for levels below specified in analyze statement is absent
    String tableName = "multilevel/parquetLowerLevel";
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    List<MetadataType> analyzeLevels = Arrays.asList(MetadataType.FILE, MetadataType.SEGMENT, MetadataType.TABLE);
    for (MetadataType analyzeLevel : analyzeLevels) {
        try {
            testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA '%s' level", tableName, analyzeLevel.name()).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
            Set<MetadataType> emptyMetadataLevels = Arrays.stream(MetadataType.values()).filter(metadataType -> metadataType.compareTo(analyzeLevel) > 0 && // for the case when there are no segment metadata, default segment is present
            metadataType.compareTo(MetadataType.SEGMENT) > 0 && metadataType.compareTo(MetadataType.ALL) < 0).collect(Collectors.toSet());
            BasicTablesRequests.RequestMetadata requestMetadata = BasicTablesRequests.RequestMetadata.builder().tableInfo(tableInfo).metadataTypes(emptyMetadataLevels).build();
            List<TableMetadataUnit> metadataUnitList = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().request(requestMetadata);
            assertTrue(String.format("Some metadata [%s] for [%s] analyze query level is present" + metadataUnitList, emptyMetadataLevels, analyzeLevel), metadataUnitList.isEmpty());
        } finally {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        }
    }
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) Arrays(java.util.Arrays) ClusterTest(org.apache.drill.test.ClusterTest) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SlowTest(org.apache.drill.categories.SlowTest) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Assert.fail(org.junit.Assert.fail) AnalyzeParquetInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeParquetInfoProvider) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) Set(java.util.Set) Category(org.junit.experimental.categories.Category) Instant(java.time.Instant) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) MetastoreTest(org.apache.drill.categories.MetastoreTest) ExecConstants(org.apache.drill.exec.ExecConstants) ClusterFixtureBuilder(org.apache.drill.test.ClusterFixtureBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) HashSet(java.util.HashSet) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) ExpectedException(org.junit.rules.ExpectedException) Before(org.junit.Before) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) ClusterFixture(org.apache.drill.test.ClusterFixture) Assert.assertNotNull(org.junit.Assert.assertNotNull) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) UserRemoteException(org.apache.drill.common.exceptions.UserRemoteException) Assert.assertTrue(org.junit.Assert.assertTrue) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) File(java.io.File) ImmutableMap(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap) Rule(org.junit.Rule) Assert.assertNull(org.junit.Assert.assertNull) Ignore(org.junit.Ignore) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Paths(java.nio.file.Paths) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) ImmutableSet(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) MetadataType(org.apache.drill.metastore.metadata.MetadataType) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 32 with TableMetadataUnit

use of org.apache.drill.metastore.components.tables.TableMetadataUnit in project drill by apache.

the class MetadataControllerBatch method getMetadataUnits.

private List<TableMetadataUnit> getMetadataUnits(TupleReader reader, int nestingLevel) {
    List<TableMetadataUnit> metadataUnits = new ArrayList<>();
    TupleMetadata columnMetadata = reader.tupleSchema();
    ObjectReader metadataColumnReader = reader.column(MetastoreAnalyzeConstants.METADATA_TYPE);
    Preconditions.checkNotNull(metadataColumnReader, "metadataType column wasn't found");
    ObjectReader underlyingMetadataReader = reader.column(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD);
    if (underlyingMetadataReader != null) {
        if (!underlyingMetadataReader.schema().isArray()) {
            throw new IllegalStateException("Incoming vector with name `collected_map` should be repeated map");
        }
        // current row contains information about underlying metadata
        ArrayReader array = underlyingMetadataReader.array();
        while (array.next()) {
            metadataUnits.addAll(getMetadataUnits(array.tuple(), nestingLevel + 1));
        }
    }
    List<StatisticsHolder<?>> metadataStatistics = getMetadataStatistics(reader, columnMetadata);
    Long rowCount = (Long) metadataStatistics.stream().filter(statisticsHolder -> statisticsHolder.getStatisticsKind() == TableStatisticsKind.ROW_COUNT).findAny().map(StatisticsHolder::getStatisticsValue).orElse(null);
    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = getColumnStatistics(reader, columnMetadata, rowCount);
    MetadataType metadataType = MetadataType.valueOf(metadataColumnReader.scalar().getString());
    BaseMetadata metadata;
    switch(metadataType) {
        case TABLE:
            {
                metadata = getTableMetadata(reader, metadataStatistics, columnStatistics);
                break;
            }
        case SEGMENT:
            {
                metadata = getSegmentMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case PARTITION:
            {
                metadata = getPartitionMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case FILE:
            {
                metadata = getFileMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case ROW_GROUP:
            {
                metadata = getRowGroupMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        default:
            throw new UnsupportedOperationException("Unsupported metadata type: " + metadataType);
    }
    metadataUnits.add(metadata.toMetadataUnit());
    return metadataUnits;
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) ArrayList(java.util.ArrayList) MetadataType(org.apache.drill.metastore.metadata.MetadataType) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader)

Example 33 with TableMetadataUnit

use of org.apache.drill.metastore.components.tables.TableMetadataUnit in project drill by apache.

the class MetadataControllerBatch method getMetadataUnits.

private List<TableMetadataUnit> getMetadataUnits(VectorContainer container) {
    List<TableMetadataUnit> metadataUnits = new ArrayList<>();
    RowSetReader reader = DirectRowSet.fromContainer(container).reader();
    while (reader.next()) {
        metadataUnits.addAll(getMetadataUnits(reader, 0));
    }
    if (metadataToHandle != null) {
        // leaves only table metadata and metadata which belongs to segments to be overridden
        metadataUnits = metadataUnits.stream().filter(tableMetadataUnit -> metadataToHandle.values().stream().map(MetadataInfo::key).anyMatch(s -> s.equals(tableMetadataUnit.metadataKey())) || MetadataType.TABLE.name().equals(tableMetadataUnit.metadataType())).collect(Collectors.toList());
        // leaves only metadata which should be fetched from the Metastore
        metadataUnits.stream().map(TableMetadataUnit::metadataIdentifier).forEach(metadataToHandle::remove);
        List<TableMetadataUnit> metadata = metadataToHandle.isEmpty() ? Collections.emptyList() : tables.basicRequests().metadata(popConfig.getContext().tableInfo(), metadataToHandle.values());
        metadataUnits.addAll(metadata);
    }
    // checks whether metadataUnits contains not only table metadata before adding default segment
    // to avoid case when only table metadata should be updated and / or root segments removed
    boolean insertDefaultSegment = metadataUnits.size() > 1 && metadataUnits.stream().noneMatch(metadataUnit -> metadataUnit.metadataType().equals(MetadataType.SEGMENT.name()));
    if (insertDefaultSegment) {
        TableMetadataUnit defaultSegmentMetadata = getDefaultSegment(metadataUnits);
        metadataUnits.add(defaultSegmentMetadata);
    }
    return metadataUnits;
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) ArrayList(java.util.ArrayList) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader)

Aggregations

TableMetadataUnit (org.apache.drill.metastore.components.tables.TableMetadataUnit)33 Test (org.junit.Test)26 RdbmsBaseTest (org.apache.drill.metastore.rdbms.RdbmsBaseTest)9 HashMap (java.util.HashMap)8 IcebergBaseTest (org.apache.drill.metastore.iceberg.IcebergBaseTest)8 Document (org.bson.Document)6 Map (java.util.Map)5 GenericRecord (org.apache.iceberg.data.GenericRecord)5 Record (org.apache.iceberg.data.Record)5 ArrayList (java.util.ArrayList)4 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)4 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)3 InputDataTransformer (org.apache.drill.metastore.iceberg.transform.InputDataTransformer)3 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)3 MetadataType (org.apache.drill.metastore.metadata.MetadataType)3 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)3 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)3 Condition (org.jooq.Condition)3 MethodHandle (java.lang.invoke.MethodHandle)2