use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class MetadataControllerBatch method getMetadataUnits.
private List<TableMetadataUnit> getMetadataUnits(VectorContainer container) {
List<TableMetadataUnit> metadataUnits = new ArrayList<>();
RowSetReader reader = DirectRowSet.fromContainer(container).reader();
while (reader.next()) {
metadataUnits.addAll(getMetadataUnits(reader, 0));
}
if (metadataToHandle != null) {
// leaves only table metadata and metadata which belongs to segments to be overridden
metadataUnits = metadataUnits.stream().filter(tableMetadataUnit -> metadataToHandle.values().stream().map(MetadataInfo::key).anyMatch(s -> s.equals(tableMetadataUnit.metadataKey())) || MetadataType.TABLE.name().equals(tableMetadataUnit.metadataType())).collect(Collectors.toList());
// leaves only metadata which should be fetched from the Metastore
metadataUnits.stream().map(TableMetadataUnit::metadataIdentifier).forEach(metadataToHandle::remove);
List<TableMetadataUnit> metadata = metadataToHandle.isEmpty() ? Collections.emptyList() : tables.basicRequests().metadata(popConfig.getContext().tableInfo(), metadataToHandle.values());
metadataUnits.addAll(metadata);
}
// checks whether metadataUnits contains not only table metadata before adding default segment
// to avoid case when only table metadata should be updated and / or root segments removed
boolean insertDefaultSegment = metadataUnits.size() > 1 && metadataUnits.stream().noneMatch(metadataUnit -> metadataUnit.metadataType().equals(MetadataType.SEGMENT.name()));
if (insertDefaultSegment) {
TableMetadataUnit defaultSegmentMetadata = getDefaultSegment(metadataUnits);
metadataUnits.add(defaultSegmentMetadata);
}
return metadataUnits;
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class MetastoreDropTableMetadataHandler method getPlan.
@Override
public PhysicalPlan getPlan(SqlNode sqlNode) throws ForemanSetupException {
if (!context.getOptions().getOption(ExecConstants.METASTORE_ENABLED_VALIDATOR)) {
throw UserException.validationError().message("Running ANALYZE TABLE DROP command when Metastore is disabled (`metastore.enabled` is set to false)").build(logger);
}
SqlDropTableMetadata dropTableMetadata = unwrap(sqlNode, SqlDropTableMetadata.class);
AbstractSchema drillSchema = SchemaUtilites.resolveToDrillSchema(config.getConverter().getDefaultSchema(), dropTableMetadata.getSchemaPath());
List<String> schemaPath = drillSchema.getSchemaPath();
String pluginName = schemaPath.get(0);
String workspaceName = Strings.join(schemaPath.subList(1, schemaPath.size()), AbstractSchema.SCHEMA_SEPARATOR);
TableInfo tableInfo = TableInfo.builder().name(dropTableMetadata.getName()).storagePlugin(pluginName).workspace(workspaceName).build();
try {
Tables tables = context.getMetastoreRegistry().get().tables();
MetastoreTableInfo metastoreTableInfo = tables.basicRequests().metastoreTableInfo(tableInfo);
if (!metastoreTableInfo.isExists()) {
if (dropTableMetadata.checkMetadataExistence()) {
throw UserException.validationError().message("Metadata for table [%s] not found.", dropTableMetadata.getName()).build(logger);
}
return DirectPlan.createDirectPlan(context, false, String.format("Metadata for table [%s] does not exist.", dropTableMetadata.getName()));
}
tables.modify().delete(Delete.builder().metadataType(MetadataType.ALL).filter(tableInfo.toFilter()).build()).execute();
} catch (MetastoreException e) {
logger.error("Error when dropping metadata for table {}", dropTableMetadata.getName(), e);
return DirectPlan.createDirectPlan(context, false, e.getMessage());
}
return DirectPlan.createDirectPlan(context, true, String.format("Metadata for table [%s] dropped.", dropTableMetadata.getName()));
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class FileMetadataInfoCollector method init.
private void init(FormatSelection selection, PlannerSettings settings, Supplier<TableScan> tableScanSupplier, List<SchemaPath> interestingColumns, int segmentColumnsCount) throws IOException {
List<SchemaPath> metastoreInterestingColumns = Optional.ofNullable(basicRequests.interestingColumnsAndPartitionKeys(tableInfo).interestingColumns()).map(metastoreInterestingColumnNames -> metastoreInterestingColumnNames.stream().map(SchemaPath::parseFromString).collect(Collectors.toList())).orElse(null);
Map<String, Long> filesNamesLastModifiedTime = basicRequests.filesLastModifiedTime(tableInfo, null, null);
List<String> newFiles = new ArrayList<>();
List<String> updatedFiles = new ArrayList<>();
List<String> removedFiles = new ArrayList<>(filesNamesLastModifiedTime.keySet());
List<String> allFiles = new ArrayList<>();
for (FileStatus fileStatus : getFileStatuses(selection)) {
String path = Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toUri().getPath();
Long lastModificationTime = filesNamesLastModifiedTime.get(path);
if (lastModificationTime == null) {
newFiles.add(path);
} else if (lastModificationTime < fileStatus.getModificationTime()) {
updatedFiles.add(path);
}
removedFiles.remove(path);
allFiles.add(path);
}
String selectionRoot = selection.getSelection().getSelectionRoot().toUri().getPath();
if (!Objects.equals(metastoreInterestingColumns, interestingColumns) && metastoreInterestingColumns != null && (interestingColumns == null || !metastoreInterestingColumns.containsAll(interestingColumns)) || TableStatisticsKind.ANALYZE_METADATA_LEVEL.getValue(basicRequests.tableMetadata(tableInfo)).compareTo(metadataLevel) != 0) {
// do not update table scan and lists of segments / files / row groups,
// metadata should be recalculated
tableScan = tableScanSupplier.get();
metadataToRemove.addAll(getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0));
return;
}
// checks whether there are no new, updated and removed files
if (!newFiles.isEmpty() || !updatedFiles.isEmpty() || !removedFiles.isEmpty()) {
List<String> scanFiles = new ArrayList<>(newFiles);
scanFiles.addAll(updatedFiles);
// updates scan to read updated / new files
tableScan = getTableScan(settings, tableScanSupplier.get(), scanFiles);
// iterates from the end;
// takes deepest updated segments;
// finds their parents:
// - fetches all segments for parent level;
// - filters segments to leave parents only;
// obtains all child segments;
// filters child segments for filtered parent segments
int lastSegmentIndex = segmentColumnsCount - 1;
List<String> scanAndRemovedFiles = new ArrayList<>(scanFiles);
scanAndRemovedFiles.addAll(removedFiles);
// 1. Obtain files info for files from the same folder without removed files
// 2. Get segments for obtained files + segments for removed files
// 3. Get parent segments
// 4. Get other segments for the same parent segment
// 5. Remove segments which have only removed files (matched for removedFileInfo and don't match to filesInfo)
// 6. Do the same for parent segments
List<MetadataInfo> allFilesInfo = getMetadataInfoList(selectionRoot, allFiles, MetadataType.FILE, 0);
// first pass: collect updated segments even without files, they will be removed later
List<MetadataInfo> leafSegments = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, lastSegmentIndex);
List<MetadataInfo> removedFilesMetadata = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.FILE, 0);
List<MetadataInfo> scanFilesInfo = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.FILE, 0);
// files from scan + files from the same folder without removed files
filesInfo = leafSegments.stream().filter(parent -> scanFilesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).flatMap(parent -> allFilesInfo.stream().filter(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
Multimap<Integer, MetadataInfo> allSegments = populateSegments(removedFiles, allFiles, selectionRoot, lastSegmentIndex, leafSegments, removedFilesMetadata);
List<MetadataInfo> allRowGroupsInfo = getAllRowGroupsMetadataInfos(allFiles);
rowGroupsInfo = allRowGroupsInfo.stream().filter(child -> filesInfo.stream().map(MetadataInfo::identifier).anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent, child.identifier()))).collect(Collectors.toList());
List<MetadataInfo> segmentsToUpdate = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, 0);
allMetaToHandle = Streams.concat(allSegments.values().stream(), allFilesInfo.stream(), allRowGroupsInfo.stream()).filter(child -> segmentsToUpdate.stream().anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).filter(parent -> removedFilesMetadata.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) || filesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
// removed top-level segments are handled separately since their metadata is not overridden when producing writing to the Metastore
List<MetadataInfo> removedTopSegments = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0).stream().filter(parent -> removedFilesMetadata.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) && allFilesInfo.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
metadataToRemove.addAll(removedTopSegments);
segmentsToUpdate.stream().filter(segment -> !removedTopSegments.contains(segment)).forEach(allMetaToHandle::add);
} else {
// table metadata may still be actual
outdated = false;
}
}
Aggregations