Search in sources :

Example 6 with MetadataInfo

use of org.apache.drill.metastore.metadata.MetadataInfo in project drill by apache.

the class MetadataControllerBatch method getPartitionMetadata.

private PartitionMetadata getPartitionMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
    List<String> segmentColumns = popConfig.getContext().segmentColumns();
    String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
    List<String> partitionValues = segmentColumns.stream().limit(nestingLevel).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
    String metadataIdentifier = MetadataIdentifierUtils.getMetadataIdentifierKey(partitionValues);
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.PARTITION).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
    return PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).locations(getIncomingLocations(reader)).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo)

Example 7 with MetadataInfo

use of org.apache.drill.metastore.metadata.MetadataInfo in project drill by apache.

the class MetadataControllerBatch method writeToMetastore.

private void writeToMetastore() {
    MetadataControllerContext mdContext = popConfig.getContext();
    FilterExpression deleteFilter = mdContext.tableInfo().toFilter();
    for (MetadataInfo metadataInfo : mdContext.metadataToRemove()) {
        deleteFilter = FilterExpression.and(deleteFilter, FilterExpression.equal(MetastoreColumn.METADATA_KEY, metadataInfo.key()));
    }
    Modify<TableMetadataUnit> modify = tables.modify();
    if (!popConfig.getContext().metadataToRemove().isEmpty()) {
        modify.delete(Delete.builder().metadataType(MetadataType.SEGMENT, MetadataType.FILE, MetadataType.ROW_GROUP, MetadataType.PARTITION).filter(deleteFilter).build());
    }
    MetastoreTableInfo metastoreTableInfo = mdContext.metastoreTableInfo();
    if (tables.basicRequests().hasMetastoreTableInfoChanged(metastoreTableInfo)) {
        throw UserException.executionError(null).message("Metadata for table [%s] was changed before analyze is finished", tableInfo.name()).build(logger);
    }
    modify.overwrite(metadataUnits).execute();
}
Also used : MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression)

Example 8 with MetadataInfo

use of org.apache.drill.metastore.metadata.MetadataInfo in project drill by apache.

the class MetadataControllerBatch method getFileMetadata.

private FileMetadata getFileMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
    List<String> segmentColumns = popConfig.getContext().segmentColumns();
    String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
    List<String> partitionValues = segmentColumns.stream().limit(nestingLevel - 1).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
    Path path = new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString());
    String metadataIdentifier = MetadataIdentifierUtils.getFileMetadataIdentifier(partitionValues, path);
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.FILE).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
    return FileMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(path).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo)

Example 9 with MetadataInfo

use of org.apache.drill.metastore.metadata.MetadataInfo in project drill by apache.

the class TestTableMetadataUnitConversion method testBaseTableMetadata.

@Test
public void testBaseTableMetadata() {
    TableInfo tableInfo = data.fullTableInfo;
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.TABLE).key(MetadataInfo.GENERAL_INFO_KEY).build();
    Map<String, String> partitionKeys = new HashMap<>();
    partitionKeys.put("dir0", "2018");
    partitionKeys.put("dir1", "2019");
    // check required fields
    BaseTableMetadata requiredFieldsMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).partitionKeys(partitionKeys).build();
    TableMetadataUnit requiredFieldsExpectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).owner(tableInfo.owner()).tableType(tableInfo.type()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(BaseMetadata.UNDEFINED_TIME).partitionKeys(partitionKeys).build();
    TableMetadataUnit requiredFieldsUnit = requiredFieldsMetadata.toMetadataUnit();
    assertEquals(requiredFieldsExpectedUnit, requiredFieldsUnit);
    assertNotNull(BaseTableMetadata.builder().metadataUnit(requiredFieldsUnit).build());
    Path location = new Path("/tmp/nation");
    List<SchemaPath> interestingColumns = Arrays.asList(SchemaPath.getSimplePath("a"), SchemaPath.getSimplePath("b"));
    // check all fields
    BaseTableMetadata allFieldsMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).location(location).partitionKeys(partitionKeys).interestingColumns(interestingColumns).build();
    TableMetadataUnit allFieldsExpectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).owner(tableInfo.owner()).tableType(tableInfo.type()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).location(location.toUri().getPath()).partitionKeys(partitionKeys).interestingColumns(interestingColumns.stream().map(SchemaPath::toString).collect(Collectors.toList())).build();
    TableMetadataUnit allFieldsUnit = allFieldsMetadata.toMetadataUnit();
    assertEquals(allFieldsExpectedUnit, allFieldsUnit);
    assertNotNull(BaseTableMetadata.builder().metadataUnit(allFieldsUnit).build());
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) HashMap(java.util.HashMap) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Example 10 with MetadataInfo

use of org.apache.drill.metastore.metadata.MetadataInfo in project drill by apache.

the class TestTableMetadataUnitConversion method testRowGroupMetadata.

@Test
public void testRowGroupMetadata() {
    TableInfo tableInfo = data.basicTableInfo;
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.ROW_GROUP).key("part_int=3").identifier("part_int=3/part_varchar=g/0_0_0.parquet").build();
    Path path = new Path("/tmp/nation/part_int=3/part_varchar=g/0_0_0.parquet");
    int rowGroupIndex = 1;
    Map<String, Float> hostAffinity = new HashMap<>();
    hostAffinity.put("host1", 1F);
    hostAffinity.put("host2", 2F);
    RowGroupMetadata metadata = RowGroupMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).path(path).rowGroupIndex(rowGroupIndex).hostAffinity(hostAffinity).build();
    TableMetadataUnit expectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).path(path.toUri().getPath()).location(path.getParent().toUri().getPath()).rowGroupIndex(rowGroupIndex).hostAffinity(hostAffinity).build();
    TableMetadataUnit actualUnit = metadata.toMetadataUnit();
    assertEquals(expectedUnit, actualUnit);
    assertNotNull(RowGroupMetadata.builder().metadataUnit(actualUnit).build());
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) HashMap(java.util.HashMap) TableInfo(org.apache.drill.metastore.metadata.TableInfo) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Aggregations

MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)23 SchemaPath (org.apache.drill.common.expression.SchemaPath)21 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)16 Path (org.apache.hadoop.fs.Path)16 TableInfo (org.apache.drill.metastore.metadata.TableInfo)15 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)14 HashMap (java.util.HashMap)13 HashSet (java.util.HashSet)13 List (java.util.List)13 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)13 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)13 ArrayList (java.util.ArrayList)12 Collections (java.util.Collections)12 Collectors (java.util.stream.Collectors)12 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)12 MetadataType (org.apache.drill.metastore.metadata.MetadataType)12 Map (java.util.Map)11 Set (java.util.Set)10 TypeProtos (org.apache.drill.common.types.TypeProtos)10 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)10