Search in sources :

Example 56 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class MetadataControllerBatch method getMetadataUnits.

private List<TableMetadataUnit> getMetadataUnits(VectorContainer container) {
    List<TableMetadataUnit> metadataUnits = new ArrayList<>();
    RowSetReader reader = DirectRowSet.fromContainer(container).reader();
    while (reader.next()) {
        metadataUnits.addAll(getMetadataUnits(reader, 0));
    }
    if (metadataToHandle != null) {
        // leaves only table metadata and metadata which belongs to segments to be overridden
        metadataUnits = metadataUnits.stream().filter(tableMetadataUnit -> metadataToHandle.values().stream().map(MetadataInfo::key).anyMatch(s -> s.equals(tableMetadataUnit.metadataKey())) || MetadataType.TABLE.name().equals(tableMetadataUnit.metadataType())).collect(Collectors.toList());
        // leaves only metadata which should be fetched from the Metastore
        metadataUnits.stream().map(TableMetadataUnit::metadataIdentifier).forEach(metadataToHandle::remove);
        List<TableMetadataUnit> metadata = metadataToHandle.isEmpty() ? Collections.emptyList() : tables.basicRequests().metadata(popConfig.getContext().tableInfo(), metadataToHandle.values());
        metadataUnits.addAll(metadata);
    }
    // checks whether metadataUnits contains not only table metadata before adding default segment
    // to avoid case when only table metadata should be updated and / or root segments removed
    boolean insertDefaultSegment = metadataUnits.size() > 1 && metadataUnits.stream().noneMatch(metadataUnit -> metadataUnit.metadataType().equals(MetadataType.SEGMENT.name()));
    if (insertDefaultSegment) {
        TableMetadataUnit defaultSegmentMetadata = getDefaultSegment(metadataUnits);
        metadataUnits.add(defaultSegmentMetadata);
    }
    return metadataUnits;
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) ArrayList(java.util.ArrayList) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader)

Example 57 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class MetastoreDropTableMetadataHandler method getPlan.

@Override
public PhysicalPlan getPlan(SqlNode sqlNode) throws ForemanSetupException {
    if (!context.getOptions().getOption(ExecConstants.METASTORE_ENABLED_VALIDATOR)) {
        throw UserException.validationError().message("Running ANALYZE TABLE DROP command when Metastore is disabled (`metastore.enabled` is set to false)").build(logger);
    }
    SqlDropTableMetadata dropTableMetadata = unwrap(sqlNode, SqlDropTableMetadata.class);
    AbstractSchema drillSchema = SchemaUtilites.resolveToDrillSchema(config.getConverter().getDefaultSchema(), dropTableMetadata.getSchemaPath());
    List<String> schemaPath = drillSchema.getSchemaPath();
    String pluginName = schemaPath.get(0);
    String workspaceName = Strings.join(schemaPath.subList(1, schemaPath.size()), AbstractSchema.SCHEMA_SEPARATOR);
    TableInfo tableInfo = TableInfo.builder().name(dropTableMetadata.getName()).storagePlugin(pluginName).workspace(workspaceName).build();
    try {
        Tables tables = context.getMetastoreRegistry().get().tables();
        MetastoreTableInfo metastoreTableInfo = tables.basicRequests().metastoreTableInfo(tableInfo);
        if (!metastoreTableInfo.isExists()) {
            if (dropTableMetadata.checkMetadataExistence()) {
                throw UserException.validationError().message("Metadata for table [%s] not found.", dropTableMetadata.getName()).build(logger);
            }
            return DirectPlan.createDirectPlan(context, false, String.format("Metadata for table [%s] does not exist.", dropTableMetadata.getName()));
        }
        tables.modify().delete(Delete.builder().metadataType(MetadataType.ALL).filter(tableInfo.toFilter()).build()).execute();
    } catch (MetastoreException e) {
        logger.error("Error when dropping metadata for table {}", dropTableMetadata.getName(), e);
        return DirectPlan.createDirectPlan(context, false, e.getMessage());
    }
    return DirectPlan.createDirectPlan(context, true, String.format("Metadata for table [%s] dropped.", dropTableMetadata.getName()));
}
Also used : MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) SqlDropTableMetadata(org.apache.drill.exec.planner.sql.parser.SqlDropTableMetadata) AbstractSchema(org.apache.drill.exec.store.AbstractSchema) MetastoreException(org.apache.drill.metastore.exceptions.MetastoreException) Tables(org.apache.drill.metastore.components.tables.Tables) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo)

Example 58 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class FileMetadataInfoCollector method init.

private void init(FormatSelection selection, PlannerSettings settings, Supplier<TableScan> tableScanSupplier, List<SchemaPath> interestingColumns, int segmentColumnsCount) throws IOException {
    List<SchemaPath> metastoreInterestingColumns = Optional.ofNullable(basicRequests.interestingColumnsAndPartitionKeys(tableInfo).interestingColumns()).map(metastoreInterestingColumnNames -> metastoreInterestingColumnNames.stream().map(SchemaPath::parseFromString).collect(Collectors.toList())).orElse(null);
    Map<String, Long> filesNamesLastModifiedTime = basicRequests.filesLastModifiedTime(tableInfo, null, null);
    List<String> newFiles = new ArrayList<>();
    List<String> updatedFiles = new ArrayList<>();
    List<String> removedFiles = new ArrayList<>(filesNamesLastModifiedTime.keySet());
    List<String> allFiles = new ArrayList<>();
    for (FileStatus fileStatus : getFileStatuses(selection)) {
        String path = Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toUri().getPath();
        Long lastModificationTime = filesNamesLastModifiedTime.get(path);
        if (lastModificationTime == null) {
            newFiles.add(path);
        } else if (lastModificationTime < fileStatus.getModificationTime()) {
            updatedFiles.add(path);
        }
        removedFiles.remove(path);
        allFiles.add(path);
    }
    String selectionRoot = selection.getSelection().getSelectionRoot().toUri().getPath();
    if (!Objects.equals(metastoreInterestingColumns, interestingColumns) && metastoreInterestingColumns != null && (interestingColumns == null || !metastoreInterestingColumns.containsAll(interestingColumns)) || TableStatisticsKind.ANALYZE_METADATA_LEVEL.getValue(basicRequests.tableMetadata(tableInfo)).compareTo(metadataLevel) != 0) {
        // do not update table scan and lists of segments / files / row groups,
        // metadata should be recalculated
        tableScan = tableScanSupplier.get();
        metadataToRemove.addAll(getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0));
        return;
    }
    // checks whether there are no new, updated and removed files
    if (!newFiles.isEmpty() || !updatedFiles.isEmpty() || !removedFiles.isEmpty()) {
        List<String> scanFiles = new ArrayList<>(newFiles);
        scanFiles.addAll(updatedFiles);
        // updates scan to read updated / new files
        tableScan = getTableScan(settings, tableScanSupplier.get(), scanFiles);
        // iterates from the end;
        // takes deepest updated segments;
        // finds their parents:
        // - fetches all segments for parent level;
        // - filters segments to leave parents only;
        // obtains all child segments;
        // filters child segments for filtered parent segments
        int lastSegmentIndex = segmentColumnsCount - 1;
        List<String> scanAndRemovedFiles = new ArrayList<>(scanFiles);
        scanAndRemovedFiles.addAll(removedFiles);
        // 1. Obtain files info for files from the same folder without removed files
        // 2. Get segments for obtained files + segments for removed files
        // 3. Get parent segments
        // 4. Get other segments for the same parent segment
        // 5. Remove segments which have only removed files (matched for removedFileInfo and don't match to filesInfo)
        // 6. Do the same for parent segments
        List<MetadataInfo> allFilesInfo = getMetadataInfoList(selectionRoot, allFiles, MetadataType.FILE, 0);
        // first pass: collect updated segments even without files, they will be removed later
        List<MetadataInfo> leafSegments = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, lastSegmentIndex);
        List<MetadataInfo> removedFilesMetadata = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.FILE, 0);
        List<MetadataInfo> scanFilesInfo = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.FILE, 0);
        // files from scan + files from the same folder without removed files
        filesInfo = leafSegments.stream().filter(parent -> scanFilesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).flatMap(parent -> allFilesInfo.stream().filter(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
        Multimap<Integer, MetadataInfo> allSegments = populateSegments(removedFiles, allFiles, selectionRoot, lastSegmentIndex, leafSegments, removedFilesMetadata);
        List<MetadataInfo> allRowGroupsInfo = getAllRowGroupsMetadataInfos(allFiles);
        rowGroupsInfo = allRowGroupsInfo.stream().filter(child -> filesInfo.stream().map(MetadataInfo::identifier).anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent, child.identifier()))).collect(Collectors.toList());
        List<MetadataInfo> segmentsToUpdate = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, 0);
        allMetaToHandle = Streams.concat(allSegments.values().stream(), allFilesInfo.stream(), allRowGroupsInfo.stream()).filter(child -> segmentsToUpdate.stream().anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).filter(parent -> removedFilesMetadata.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) || filesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
        // removed top-level segments are handled separately since their metadata is not overridden when producing writing to the Metastore
        List<MetadataInfo> removedTopSegments = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0).stream().filter(parent -> removedFilesMetadata.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) && allFilesInfo.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
        metadataToRemove.addAll(removedTopSegments);
        segmentsToUpdate.stream().filter(segment -> !removedTopSegments.contains(segment)).forEach(allMetaToHandle::add);
    } else {
        // table metadata may still be actual
        outdated = false;
    }
}
Also used : SchemalessScan(org.apache.drill.exec.physical.base.SchemalessScan) MetadataType(org.apache.drill.metastore.metadata.MetadataType) TableScan(org.apache.calcite.rel.core.TableScan) Arrays(java.util.Arrays) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) FileSystem(org.apache.hadoop.fs.FileSystem) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) Streams(org.apache.drill.shaded.guava.com.google.common.collect.Streams) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) FileStatus(org.apache.hadoop.fs.FileStatus) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) ColumnExplorer(org.apache.drill.exec.store.ColumnExplorer) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) ImpersonationUtil(org.apache.drill.exec.util.ImpersonationUtil) Path(org.apache.hadoop.fs.Path) FileSelection(org.apache.drill.exec.store.dfs.FileSelection) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) PartitionLocation(org.apache.drill.exec.planner.PartitionLocation) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) DrillFileSystemUtil(org.apache.drill.exec.util.DrillFileSystemUtil) List(java.util.List) Lists(org.apache.drill.shaded.guava.com.google.common.collect.Lists) FileSystemPartitionDescriptor(org.apache.drill.exec.planner.FileSystemPartitionDescriptor) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Optional(java.util.Optional) Collections(java.util.Collections) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) SchemaPath(org.apache.drill.common.expression.SchemaPath)

Aggregations

TableInfo (org.apache.drill.metastore.metadata.TableInfo)58 MetastoreTest (org.apache.drill.categories.MetastoreTest)50 Test (org.junit.Test)50 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)39 SchemaPath (org.apache.drill.common.expression.SchemaPath)37 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)37 Path (org.apache.hadoop.fs.Path)36 ClusterTest (org.apache.drill.test.ClusterTest)33 SlowTest (org.apache.drill.categories.SlowTest)32 File (java.io.File)29 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)29 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)25 HashMap (java.util.HashMap)24 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)23 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)23 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)23 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)20 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)18 BaseTest (org.apache.drill.test.BaseTest)17 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)16