use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class FileMetadataInfoCollector method init.
private void init(FormatSelection selection, PlannerSettings settings, Supplier<TableScan> tableScanSupplier, List<SchemaPath> interestingColumns, int segmentColumnsCount) throws IOException {
List<SchemaPath> metastoreInterestingColumns = Optional.ofNullable(basicRequests.interestingColumnsAndPartitionKeys(tableInfo).interestingColumns()).map(metastoreInterestingColumnNames -> metastoreInterestingColumnNames.stream().map(SchemaPath::parseFromString).collect(Collectors.toList())).orElse(null);
Map<String, Long> filesNamesLastModifiedTime = basicRequests.filesLastModifiedTime(tableInfo, null, null);
List<String> newFiles = new ArrayList<>();
List<String> updatedFiles = new ArrayList<>();
List<String> removedFiles = new ArrayList<>(filesNamesLastModifiedTime.keySet());
List<String> allFiles = new ArrayList<>();
for (FileStatus fileStatus : getFileStatuses(selection)) {
String path = Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toUri().getPath();
Long lastModificationTime = filesNamesLastModifiedTime.get(path);
if (lastModificationTime == null) {
newFiles.add(path);
} else if (lastModificationTime < fileStatus.getModificationTime()) {
updatedFiles.add(path);
}
removedFiles.remove(path);
allFiles.add(path);
}
String selectionRoot = selection.getSelection().getSelectionRoot().toUri().getPath();
if (!Objects.equals(metastoreInterestingColumns, interestingColumns) && metastoreInterestingColumns != null && (interestingColumns == null || !metastoreInterestingColumns.containsAll(interestingColumns)) || TableStatisticsKind.ANALYZE_METADATA_LEVEL.getValue(basicRequests.tableMetadata(tableInfo)).compareTo(metadataLevel) != 0) {
// do not update table scan and lists of segments / files / row groups,
// metadata should be recalculated
tableScan = tableScanSupplier.get();
metadataToRemove.addAll(getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0));
return;
}
// checks whether there are no new, updated and removed files
if (!newFiles.isEmpty() || !updatedFiles.isEmpty() || !removedFiles.isEmpty()) {
List<String> scanFiles = new ArrayList<>(newFiles);
scanFiles.addAll(updatedFiles);
// updates scan to read updated / new files
tableScan = getTableScan(settings, tableScanSupplier.get(), scanFiles);
// iterates from the end;
// takes deepest updated segments;
// finds their parents:
// - fetches all segments for parent level;
// - filters segments to leave parents only;
// obtains all child segments;
// filters child segments for filtered parent segments
int lastSegmentIndex = segmentColumnsCount - 1;
List<String> scanAndRemovedFiles = new ArrayList<>(scanFiles);
scanAndRemovedFiles.addAll(removedFiles);
// 1. Obtain files info for files from the same folder without removed files
// 2. Get segments for obtained files + segments for removed files
// 3. Get parent segments
// 4. Get other segments for the same parent segment
// 5. Remove segments which have only removed files (matched for removedFileInfo and don't match to filesInfo)
// 6. Do the same for parent segments
List<MetadataInfo> allFilesInfo = getMetadataInfoList(selectionRoot, allFiles, MetadataType.FILE, 0);
// first pass: collect updated segments even without files, they will be removed later
List<MetadataInfo> leafSegments = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, lastSegmentIndex);
List<MetadataInfo> removedFilesMetadata = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.FILE, 0);
List<MetadataInfo> scanFilesInfo = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.FILE, 0);
// files from scan + files from the same folder without removed files
filesInfo = leafSegments.stream().filter(parent -> scanFilesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).flatMap(parent -> allFilesInfo.stream().filter(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
Multimap<Integer, MetadataInfo> allSegments = populateSegments(removedFiles, allFiles, selectionRoot, lastSegmentIndex, leafSegments, removedFilesMetadata);
List<MetadataInfo> allRowGroupsInfo = getAllRowGroupsMetadataInfos(allFiles);
rowGroupsInfo = allRowGroupsInfo.stream().filter(child -> filesInfo.stream().map(MetadataInfo::identifier).anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent, child.identifier()))).collect(Collectors.toList());
List<MetadataInfo> segmentsToUpdate = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, 0);
allMetaToHandle = Streams.concat(allSegments.values().stream(), allFilesInfo.stream(), allRowGroupsInfo.stream()).filter(child -> segmentsToUpdate.stream().anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).filter(parent -> removedFilesMetadata.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) || filesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
// removed top-level segments are handled separately since their metadata is not overridden when producing writing to the Metastore
List<MetadataInfo> removedTopSegments = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0).stream().filter(parent -> removedFilesMetadata.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) && allFilesInfo.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
metadataToRemove.addAll(removedTopSegments);
segmentsToUpdate.stream().filter(segment -> !removedTopSegments.contains(segment)).forEach(allMetaToHandle::add);
} else {
// table metadata may still be actual
outdated = false;
}
}
use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class FileMetadataInfoCollector method getTableScan.
private TableScan getTableScan(PlannerSettings settings, TableScan scanRel, List<String> scanFiles) {
FileSystemPartitionDescriptor descriptor = new FileSystemPartitionDescriptor(settings, scanRel);
List<PartitionLocation> newPartitions = Lists.newArrayList(descriptor.iterator()).stream().flatMap(Collection::stream).flatMap(p -> p.getPartitionLocationRecursive().stream()).filter(p -> scanFiles.contains(p.getEntirePartitionLocation().toUri().getPath())).collect(Collectors.toList());
try {
if (!newPartitions.isEmpty()) {
return descriptor.createTableScan(newPartitions, false);
} else {
DrillTable drillTable = descriptor.getTable();
SchemalessScan scan = new SchemalessScan(drillTable.getUserName(), ((FormatSelection) descriptor.getTable().getSelection()).getSelection().getSelectionRoot());
return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), scan, scanRel.getRowType(), DrillScanRel.getProjectedColumns(scanRel.getTable(), true), true);
}
} catch (Exception e) {
throw new RuntimeException("Error happened during recreation of pruned scan", e);
}
}
use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class PruneScanRule method doOnMatch.
protected void doOnMatch(RelOptRuleCall call, Filter filterRel, Project projectRel, TableScan scanRel) {
final String pruningClassName = getClass().getName();
logger.debug("Beginning partition pruning, pruning class: {}", pruningClassName);
Stopwatch totalPruningTime = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
PartitionDescriptor descriptor = getPartitionDescriptor(settings, scanRel);
final BufferAllocator allocator = optimizerContext.getAllocator();
final Object selection = DrillRelOptUtil.getDrillTable(scanRel).getSelection();
MetadataContext metaContext = null;
if (selection instanceof FormatSelection) {
metaContext = ((FormatSelection) selection).getSelection().getMetaContext();
}
RexNode condition;
if (projectRel == null) {
condition = filterRel.getCondition();
} else {
// get the filter as if it were below the projection.
condition = RelOptUtil.pushPastProject(filterRel.getCondition(), projectRel);
}
RewriteAsBinaryOperators visitor = new RewriteAsBinaryOperators(true, filterRel.getCluster().getRexBuilder());
condition = condition.accept(visitor);
Map<Integer, String> fieldNameMap = new HashMap<>();
List<String> fieldNames = scanRel.getRowType().getFieldNames();
BitSet columnBitset = new BitSet();
BitSet partitionColumnBitSet = new BitSet();
Map<Integer, Integer> partitionMap = new HashMap<>();
int relColIndex = 0;
for (String field : fieldNames) {
final Integer partitionIndex = descriptor.getIdIfValid(field);
if (partitionIndex != null) {
fieldNameMap.put(partitionIndex, field);
partitionColumnBitSet.set(partitionIndex);
columnBitset.set(relColIndex);
// mapping between the relColIndex and partitionIndex
partitionMap.put(relColIndex, partitionIndex);
}
relColIndex++;
}
if (partitionColumnBitSet.isEmpty()) {
if (totalPruningTime != null) {
logger.debug("No partition columns are projected from the scan..continue. Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
}
setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
return;
}
// stop watch to track how long we spend in different phases of pruning
// first track how long we spend building the filter tree
Stopwatch miscTimer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
FindPartitionConditions c = new FindPartitionConditions(columnBitset, filterRel.getCluster().getRexBuilder());
c.analyze(condition);
RexNode pruneCondition = c.getFinalCondition();
BitSet referencedDirsBitSet = c.getReferencedDirs();
if (miscTimer != null) {
logger.debug("Total elapsed time to build and analyze filter tree: {} ms", miscTimer.elapsed(TimeUnit.MILLISECONDS));
miscTimer.reset();
}
if (pruneCondition == null) {
if (totalPruningTime != null) {
logger.debug("No conditions were found eligible for partition pruning. Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
}
setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
return;
}
// set up the partitions
List<PartitionLocation> newPartitions = new ArrayList<>();
// total number of partitions
long numTotal = 0;
int batchIndex = 0;
PartitionLocation firstLocation = null;
LogicalExpression materializedExpr = null;
String[] spInfo = null;
int maxIndex = -1;
BitSet matchBitSet = new BitSet();
// Outer loop: iterate over a list of batches of PartitionLocations
for (List<PartitionLocation> partitions : descriptor) {
numTotal += partitions.size();
logger.debug("Evaluating partition pruning for batch {}", batchIndex);
if (batchIndex == 0) {
// save the first location in case everything is pruned
firstLocation = partitions.get(0);
}
final NullableBitVector output = new NullableBitVector(MaterializedField.create("", Types.optional(MinorType.BIT)), allocator);
final VectorContainer container = new VectorContainer();
try {
final ValueVector[] vectors = new ValueVector[descriptor.getMaxHierarchyLevel()];
for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) {
SchemaPath column = SchemaPath.getSimplePath(fieldNameMap.get(partitionColumnIndex));
// ParquetPartitionDescriptor.populatePruningVector() expects nullable value vectors,
// so force nullability here to avoid class cast exceptions
MajorType type = descriptor.getVectorType(column, settings).toBuilder().setMode(TypeProtos.DataMode.OPTIONAL).build();
MaterializedField field = MaterializedField.create(column.getLastSegment().getNameSegment().getPath(), type);
ValueVector v = TypeHelper.getNewVector(field, allocator);
v.allocateNew();
vectors[partitionColumnIndex] = v;
container.add(v);
}
if (miscTimer != null) {
// track how long we spend populating partition column vectors
miscTimer.start();
}
// populate partition vectors.
descriptor.populatePartitionVectors(vectors, partitions, partitionColumnBitSet, fieldNameMap);
if (miscTimer != null) {
logger.debug("Elapsed time to populate partitioning column vectors: {} ms within batchIndex: {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex);
miscTimer.reset();
}
// materialize the expression; only need to do this once
if (batchIndex == 0) {
materializedExpr = materializePruneExpr(pruneCondition, settings, scanRel, container);
if (materializedExpr == null) {
// materializePruneExpr logs it already
if (totalPruningTime != null) {
logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
}
setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
return;
}
}
output.allocateNew(partitions.size());
if (miscTimer != null) {
// start the timer to evaluate how long we spend in the interpreter evaluation
miscTimer.start();
}
InterpreterEvaluator.evaluate(partitions.size(), optimizerContext, container, output, materializedExpr);
if (miscTimer != null) {
logger.debug("Elapsed time in interpreter evaluation: {} ms within batchIndex: {} with # of partitions : {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex, partitions.size());
miscTimer.reset();
}
int recordCount = 0;
int qualifiedCount = 0;
if (descriptor.supportsMetadataCachePruning() && partitions.get(0).isCompositePartition()) /* apply single partition check only for composite partitions */
{
// Inner loop: within each batch iterate over the PartitionLocations
for (PartitionLocation part : partitions) {
assert part.isCompositePartition();
if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) {
newPartitions.add(part);
// Rather than using the PartitionLocation, get the array of partition values for the directories that are
// referenced by the filter since we are not interested in directory references in other parts of the query.
Pair<String[], Integer> p = composePartition(referencedDirsBitSet, partitionMap, vectors, recordCount);
String[] parts = p.getLeft();
int tmpIndex = p.getRight();
maxIndex = Math.max(maxIndex, tmpIndex);
if (spInfo == null) {
// initialization
spInfo = parts;
for (int j = 0; j <= tmpIndex; j++) {
if (parts[j] != null) {
matchBitSet.set(j);
}
}
} else {
// compare the new partition with existing partition
for (int j = 0; j <= tmpIndex; j++) {
if (parts[j] == null || spInfo[j] == null) {
// nulls don't match
matchBitSet.clear(j);
} else {
if (!parts[j].equals(spInfo[j])) {
matchBitSet.clear(j);
}
}
}
}
qualifiedCount++;
}
recordCount++;
}
} else {
// Inner loop: within each batch iterate over the PartitionLocations
for (PartitionLocation part : partitions) {
if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) {
newPartitions.add(part);
qualifiedCount++;
}
recordCount++;
}
}
logger.debug("Within batch {}: total records: {}, qualified records: {}", batchIndex, recordCount, qualifiedCount);
batchIndex++;
} catch (Exception e) {
logger.warn("Exception while trying to prune partition.", e);
if (totalPruningTime != null) {
logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
}
setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
// continue without partition pruning
return;
} finally {
container.clear();
if (output != null) {
output.clear();
}
}
}
try {
if (newPartitions.size() == numTotal) {
logger.debug("No partitions were eligible for pruning");
return;
}
// handle the case all partitions are filtered out.
boolean canDropFilter = true;
boolean wasAllPartitionsPruned = false;
Path cacheFileRoot = null;
if (newPartitions.isEmpty()) {
assert firstLocation != null;
// Add the first non-composite partition location, since execution requires schema.
// In such case, we should not drop filter.
newPartitions.add(firstLocation.getPartitionLocationRecursive().get(0));
canDropFilter = false;
// NOTE: with DRILL-4530, the PruneScanRule may be called with only a list of
// directories first and the non-composite partition location will still return
// directories, not files. So, additional processing is done depending on this flag
wasAllPartitionsPruned = true;
logger.debug("All {} partitions were pruned; added back a single partition to allow creating a schema", numTotal);
// set the cacheFileRoot appropriately
if (firstLocation.isCompositePartition()) {
cacheFileRoot = Path.mergePaths(descriptor.getBaseTableLocation(), firstLocation.getCompositePartitionPath());
}
}
logger.debug("Pruned {} partitions down to {}", numTotal, newPartitions.size());
List<RexNode> conjuncts = RelOptUtil.conjunctions(condition);
List<RexNode> pruneConjuncts = RelOptUtil.conjunctions(pruneCondition);
conjuncts.removeAll(pruneConjuncts);
RexNode newCondition = RexUtil.composeConjunction(filterRel.getCluster().getRexBuilder(), conjuncts, false);
RewriteCombineBinaryOperators reverseVisitor = new RewriteCombineBinaryOperators(true, filterRel.getCluster().getRexBuilder());
condition = condition.accept(reverseVisitor);
pruneCondition = pruneCondition.accept(reverseVisitor);
if (descriptor.supportsMetadataCachePruning() && !wasAllPartitionsPruned) {
// if metadata cache file could potentially be used, then assign a proper cacheFileRoot
int index = -1;
if (!matchBitSet.isEmpty()) {
StringBuilder path = new StringBuilder();
index = matchBitSet.length() - 1;
for (int j = 0; j < matchBitSet.length(); j++) {
if (!matchBitSet.get(j)) {
// stop at the first index with no match and use the immediate
// previous index
index = j - 1;
break;
}
}
for (int j = 0; j <= index; j++) {
path.append("/").append(spInfo[j]);
}
cacheFileRoot = Path.mergePaths(descriptor.getBaseTableLocation(), DrillFileSystemUtil.createPathSafe(path.toString()));
}
if (index != maxIndex) {
// if multiple partitions are being selected, we should not drop the filter
// since we are reading the cache file at a parent/ancestor level
canDropFilter = false;
}
}
RelNode inputRel = descriptor.supportsMetadataCachePruning() ? descriptor.createTableScan(newPartitions, cacheFileRoot, wasAllPartitionsPruned, metaContext) : descriptor.createTableScan(newPartitions, wasAllPartitionsPruned);
if (projectRel != null) {
inputRel = projectRel.copy(projectRel.getTraitSet(), Collections.singletonList(inputRel));
}
if (newCondition.isAlwaysTrue() && canDropFilter) {
call.transformTo(inputRel);
} else {
final RelNode newFilter = filterRel.copy(filterRel.getTraitSet(), Collections.singletonList(inputRel));
call.transformTo(newFilter);
}
setPruneStatus(metaContext, PruneStatus.PRUNED);
} catch (Exception e) {
logger.warn("Exception while using the pruned partitions.", e);
} finally {
if (totalPruningTime != null) {
logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
}
}
}
use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class AnalyzeTableHandler method getPlan.
@Override
public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConversionException, IOException, ForemanSetupException {
final SqlAnalyzeTable sqlAnalyzeTable = unwrap(sqlNode, SqlAnalyzeTable.class);
verifyNoUnsupportedFunctions(sqlAnalyzeTable);
SqlNode tableRef = sqlAnalyzeTable.getTableRef();
SqlSelect scanSql = new SqlSelect(SqlParserPos.ZERO, /* position */
SqlNodeList.EMPTY, /* keyword list */
getColumnList(sqlAnalyzeTable), /* select list */
tableRef, /* from */
null, /* where */
null, /* group by */
null, /* having */
null, /* windowDecls */
null, /* orderBy */
null, /* offset */
null);
ConvertedRelNode convertedRelNode = validateAndConvert(rewrite(scanSql));
RelDataType validatedRowType = convertedRelNode.getValidatedRowType();
RelNode relScan = convertedRelNode.getConvertedNode();
DrillTableInfo drillTableInfo = DrillTableInfo.getTableInfoHolder(sqlAnalyzeTable.getTableRef(), config);
String tableName = drillTableInfo.tableName();
AbstractSchema drillSchema = SchemaUtilites.resolveToDrillSchema(config.getConverter().getDefaultSchema(), drillTableInfo.schemaPath());
Table table = SqlHandlerUtil.getTableFromSchema(drillSchema, tableName);
if (table == null) {
throw UserException.validationError().message("No table with given name [%s] exists in schema [%s]", tableName, drillSchema.getFullSchemaName()).build(logger);
} else if (!(table instanceof DrillTable)) {
return DrillStatsTable.notSupported(context, tableName);
}
DrillTable drillTable = (DrillTable) table;
final Object selection = drillTable.getSelection();
if (!(selection instanceof FormatSelection)) {
return DrillStatsTable.notSupported(context, tableName);
}
// Do not support non-parquet tables
FormatSelection formatSelection = (FormatSelection) selection;
FormatPluginConfig formatConfig = formatSelection.getFormat();
if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) && ((NamedFormatPluginConfig) formatConfig).getName().equals("parquet")))) {
return DrillStatsTable.notSupported(context, tableName);
}
FileSystemPlugin plugin = (FileSystemPlugin) drillTable.getPlugin();
DrillFileSystem fs = new DrillFileSystem(plugin.getFormatPlugin(formatSelection.getFormat()).getFsConf());
Path selectionRoot = formatSelection.getSelection().getSelectionRoot();
if (!selectionRoot.toUri().getPath().endsWith(tableName) || !fs.getFileStatus(selectionRoot).isDirectory()) {
return DrillStatsTable.notSupported(context, tableName);
}
// Do not recompute statistics, if stale
Path statsFilePath = new Path(selectionRoot, DotDrillType.STATS.getEnding());
if (fs.exists(statsFilePath) && !isStatsStale(fs, statsFilePath)) {
return DrillStatsTable.notRequired(context, tableName);
}
// Convert the query to Drill Logical plan and insert a writer operator on top.
DrillRel drel = convertToDrel(relScan, drillSchema, tableName, sqlAnalyzeTable.getSamplePercent());
Prel prel = convertToPrel(drel, validatedRowType);
logAndSetTextPlan("Drill Physical", prel, logger);
PhysicalOperator pop = convertToPop(prel);
PhysicalPlan plan = convertToPlan(pop);
log("Drill Plan", plan, logger);
return plan;
}
Aggregations