Search in sources :

Example 61 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class ImplCreator method getExec.

/**
 * Create and return fragment RootExec for given FragmentRoot. RootExec has
 * one or more RecordBatches as children (which may contain child
 * RecordBatches and so on).
 *
 * @param context
 *          FragmentContext.
 * @param root
 *          FragmentRoot.
 * @return RootExec of fragment.
 * @throws ExecutionSetupException
 */
public static RootExec getExec(ExecutorFragmentContext context, FragmentRoot root) throws ExecutionSetupException {
    Preconditions.checkNotNull(root);
    Preconditions.checkNotNull(context);
    if (AssertionUtil.isAssertionsEnabled() || context.getOptions().getOption(ExecConstants.ENABLE_ITERATOR_VALIDATOR) || context.getConfig().getBoolean(ExecConstants.ENABLE_ITERATOR_VALIDATION)) {
        root = IteratorValidatorInjector.rewritePlanWithIteratorValidator(context, root);
    }
    final ImplCreator creator = new ImplCreator();
    Stopwatch watch = Stopwatch.createStarted();
    try {
        final RootExec rootExec = creator.getRootExec(root, context);
        // skip over this for SimpleRootExec (testing)
        if (rootExec instanceof BaseRootExec) {
            ((BaseRootExec) rootExec).setOperators(creator.getOperators());
        }
        logger.debug("Took {} ms to create RecordBatch tree", watch.elapsed(TimeUnit.MILLISECONDS));
        if (rootExec == null) {
            throw new ExecutionSetupException("The provided fragment did not have a root node that correctly created a RootExec value.");
        }
        return rootExec;
    } catch (Exception e) {
        AutoCloseables.close(e, creator.getOperators());
        context.getExecutorState().fail(e);
    }
    return null;
}
Also used : ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) IOException(java.io.IOException) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Example 62 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class PruneScanRule method doOnMatch.

protected void doOnMatch(RelOptRuleCall call, Filter filterRel, Project projectRel, TableScan scanRel) {
    final String pruningClassName = getClass().getName();
    logger.debug("Beginning partition pruning, pruning class: {}", pruningClassName);
    Stopwatch totalPruningTime = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
    PartitionDescriptor descriptor = getPartitionDescriptor(settings, scanRel);
    final BufferAllocator allocator = optimizerContext.getAllocator();
    final Object selection = DrillRelOptUtil.getDrillTable(scanRel).getSelection();
    MetadataContext metaContext = null;
    if (selection instanceof FormatSelection) {
        metaContext = ((FormatSelection) selection).getSelection().getMetaContext();
    }
    RexNode condition;
    if (projectRel == null) {
        condition = filterRel.getCondition();
    } else {
        // get the filter as if it were below the projection.
        condition = RelOptUtil.pushPastProject(filterRel.getCondition(), projectRel);
    }
    RewriteAsBinaryOperators visitor = new RewriteAsBinaryOperators(true, filterRel.getCluster().getRexBuilder());
    condition = condition.accept(visitor);
    Map<Integer, String> fieldNameMap = new HashMap<>();
    List<String> fieldNames = scanRel.getRowType().getFieldNames();
    BitSet columnBitset = new BitSet();
    BitSet partitionColumnBitSet = new BitSet();
    Map<Integer, Integer> partitionMap = new HashMap<>();
    int relColIndex = 0;
    for (String field : fieldNames) {
        final Integer partitionIndex = descriptor.getIdIfValid(field);
        if (partitionIndex != null) {
            fieldNameMap.put(partitionIndex, field);
            partitionColumnBitSet.set(partitionIndex);
            columnBitset.set(relColIndex);
            // mapping between the relColIndex and partitionIndex
            partitionMap.put(relColIndex, partitionIndex);
        }
        relColIndex++;
    }
    if (partitionColumnBitSet.isEmpty()) {
        if (totalPruningTime != null) {
            logger.debug("No partition columns are projected from the scan..continue. Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
        }
        setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
        return;
    }
    // stop watch to track how long we spend in different phases of pruning
    // first track how long we spend building the filter tree
    Stopwatch miscTimer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    FindPartitionConditions c = new FindPartitionConditions(columnBitset, filterRel.getCluster().getRexBuilder());
    c.analyze(condition);
    RexNode pruneCondition = c.getFinalCondition();
    BitSet referencedDirsBitSet = c.getReferencedDirs();
    if (miscTimer != null) {
        logger.debug("Total elapsed time to build and analyze filter tree: {} ms", miscTimer.elapsed(TimeUnit.MILLISECONDS));
        miscTimer.reset();
    }
    if (pruneCondition == null) {
        if (totalPruningTime != null) {
            logger.debug("No conditions were found eligible for partition pruning. Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
        }
        setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
        return;
    }
    // set up the partitions
    List<PartitionLocation> newPartitions = new ArrayList<>();
    // total number of partitions
    long numTotal = 0;
    int batchIndex = 0;
    PartitionLocation firstLocation = null;
    LogicalExpression materializedExpr = null;
    String[] spInfo = null;
    int maxIndex = -1;
    BitSet matchBitSet = new BitSet();
    // Outer loop: iterate over a list of batches of PartitionLocations
    for (List<PartitionLocation> partitions : descriptor) {
        numTotal += partitions.size();
        logger.debug("Evaluating partition pruning for batch {}", batchIndex);
        if (batchIndex == 0) {
            // save the first location in case everything is pruned
            firstLocation = partitions.get(0);
        }
        final NullableBitVector output = new NullableBitVector(MaterializedField.create("", Types.optional(MinorType.BIT)), allocator);
        final VectorContainer container = new VectorContainer();
        try {
            final ValueVector[] vectors = new ValueVector[descriptor.getMaxHierarchyLevel()];
            for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) {
                SchemaPath column = SchemaPath.getSimplePath(fieldNameMap.get(partitionColumnIndex));
                // ParquetPartitionDescriptor.populatePruningVector() expects nullable value vectors,
                // so force nullability here to avoid class cast exceptions
                MajorType type = descriptor.getVectorType(column, settings).toBuilder().setMode(TypeProtos.DataMode.OPTIONAL).build();
                MaterializedField field = MaterializedField.create(column.getLastSegment().getNameSegment().getPath(), type);
                ValueVector v = TypeHelper.getNewVector(field, allocator);
                v.allocateNew();
                vectors[partitionColumnIndex] = v;
                container.add(v);
            }
            if (miscTimer != null) {
                // track how long we spend populating partition column vectors
                miscTimer.start();
            }
            // populate partition vectors.
            descriptor.populatePartitionVectors(vectors, partitions, partitionColumnBitSet, fieldNameMap);
            if (miscTimer != null) {
                logger.debug("Elapsed time to populate partitioning column vectors: {} ms within batchIndex: {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex);
                miscTimer.reset();
            }
            // materialize the expression; only need to do this once
            if (batchIndex == 0) {
                materializedExpr = materializePruneExpr(pruneCondition, settings, scanRel, container);
                if (materializedExpr == null) {
                    // materializePruneExpr logs it already
                    if (totalPruningTime != null) {
                        logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
                    }
                    setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
                    return;
                }
            }
            output.allocateNew(partitions.size());
            if (miscTimer != null) {
                // start the timer to evaluate how long we spend in the interpreter evaluation
                miscTimer.start();
            }
            InterpreterEvaluator.evaluate(partitions.size(), optimizerContext, container, output, materializedExpr);
            if (miscTimer != null) {
                logger.debug("Elapsed time in interpreter evaluation: {} ms within batchIndex: {} with # of partitions : {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex, partitions.size());
                miscTimer.reset();
            }
            int recordCount = 0;
            int qualifiedCount = 0;
            if (descriptor.supportsMetadataCachePruning() && partitions.get(0).isCompositePartition()) /* apply single partition check only for composite partitions */
            {
                // Inner loop: within each batch iterate over the PartitionLocations
                for (PartitionLocation part : partitions) {
                    assert part.isCompositePartition();
                    if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) {
                        newPartitions.add(part);
                        // Rather than using the PartitionLocation, get the array of partition values for the directories that are
                        // referenced by the filter since we are not interested in directory references in other parts of the query.
                        Pair<String[], Integer> p = composePartition(referencedDirsBitSet, partitionMap, vectors, recordCount);
                        String[] parts = p.getLeft();
                        int tmpIndex = p.getRight();
                        maxIndex = Math.max(maxIndex, tmpIndex);
                        if (spInfo == null) {
                            // initialization
                            spInfo = parts;
                            for (int j = 0; j <= tmpIndex; j++) {
                                if (parts[j] != null) {
                                    matchBitSet.set(j);
                                }
                            }
                        } else {
                            // compare the new partition with existing partition
                            for (int j = 0; j <= tmpIndex; j++) {
                                if (parts[j] == null || spInfo[j] == null) {
                                    // nulls don't match
                                    matchBitSet.clear(j);
                                } else {
                                    if (!parts[j].equals(spInfo[j])) {
                                        matchBitSet.clear(j);
                                    }
                                }
                            }
                        }
                        qualifiedCount++;
                    }
                    recordCount++;
                }
            } else {
                // Inner loop: within each batch iterate over the PartitionLocations
                for (PartitionLocation part : partitions) {
                    if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) {
                        newPartitions.add(part);
                        qualifiedCount++;
                    }
                    recordCount++;
                }
            }
            logger.debug("Within batch {}: total records: {}, qualified records: {}", batchIndex, recordCount, qualifiedCount);
            batchIndex++;
        } catch (Exception e) {
            logger.warn("Exception while trying to prune partition.", e);
            if (totalPruningTime != null) {
                logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
            }
            setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
            // continue without partition pruning
            return;
        } finally {
            container.clear();
            if (output != null) {
                output.clear();
            }
        }
    }
    try {
        if (newPartitions.size() == numTotal) {
            logger.debug("No partitions were eligible for pruning");
            return;
        }
        // handle the case all partitions are filtered out.
        boolean canDropFilter = true;
        boolean wasAllPartitionsPruned = false;
        Path cacheFileRoot = null;
        if (newPartitions.isEmpty()) {
            assert firstLocation != null;
            // Add the first non-composite partition location, since execution requires schema.
            // In such case, we should not drop filter.
            newPartitions.add(firstLocation.getPartitionLocationRecursive().get(0));
            canDropFilter = false;
            // NOTE: with DRILL-4530, the PruneScanRule may be called with only a list of
            // directories first and the non-composite partition location will still return
            // directories, not files.  So, additional processing is done depending on this flag
            wasAllPartitionsPruned = true;
            logger.debug("All {} partitions were pruned; added back a single partition to allow creating a schema", numTotal);
            // set the cacheFileRoot appropriately
            if (firstLocation.isCompositePartition()) {
                cacheFileRoot = Path.mergePaths(descriptor.getBaseTableLocation(), firstLocation.getCompositePartitionPath());
            }
        }
        logger.debug("Pruned {} partitions down to {}", numTotal, newPartitions.size());
        List<RexNode> conjuncts = RelOptUtil.conjunctions(condition);
        List<RexNode> pruneConjuncts = RelOptUtil.conjunctions(pruneCondition);
        conjuncts.removeAll(pruneConjuncts);
        RexNode newCondition = RexUtil.composeConjunction(filterRel.getCluster().getRexBuilder(), conjuncts, false);
        RewriteCombineBinaryOperators reverseVisitor = new RewriteCombineBinaryOperators(true, filterRel.getCluster().getRexBuilder());
        condition = condition.accept(reverseVisitor);
        pruneCondition = pruneCondition.accept(reverseVisitor);
        if (descriptor.supportsMetadataCachePruning() && !wasAllPartitionsPruned) {
            // if metadata cache file could potentially be used, then assign a proper cacheFileRoot
            int index = -1;
            if (!matchBitSet.isEmpty()) {
                StringBuilder path = new StringBuilder();
                index = matchBitSet.length() - 1;
                for (int j = 0; j < matchBitSet.length(); j++) {
                    if (!matchBitSet.get(j)) {
                        // stop at the first index with no match and use the immediate
                        // previous index
                        index = j - 1;
                        break;
                    }
                }
                for (int j = 0; j <= index; j++) {
                    path.append("/").append(spInfo[j]);
                }
                cacheFileRoot = Path.mergePaths(descriptor.getBaseTableLocation(), DrillFileSystemUtil.createPathSafe(path.toString()));
            }
            if (index != maxIndex) {
                // if multiple partitions are being selected, we should not drop the filter
                // since we are reading the cache file at a parent/ancestor level
                canDropFilter = false;
            }
        }
        RelNode inputRel = descriptor.supportsMetadataCachePruning() ? descriptor.createTableScan(newPartitions, cacheFileRoot, wasAllPartitionsPruned, metaContext) : descriptor.createTableScan(newPartitions, wasAllPartitionsPruned);
        if (projectRel != null) {
            inputRel = projectRel.copy(projectRel.getTraitSet(), Collections.singletonList(inputRel));
        }
        if (newCondition.isAlwaysTrue() && canDropFilter) {
            call.transformTo(inputRel);
        } else {
            final RelNode newFilter = filterRel.copy(filterRel.getTraitSet(), Collections.singletonList(inputRel));
            call.transformTo(newFilter);
        }
        setPruneStatus(metaContext, PruneStatus.PRUNED);
    } catch (Exception e) {
        logger.warn("Exception while using the pruned partitions.", e);
    } finally {
        if (totalPruningTime != null) {
            logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
        }
    }
}
Also used : PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) HashMap(java.util.HashMap) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) ArrayList(java.util.ArrayList) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) NullableBitVector(org.apache.drill.exec.vector.NullableBitVector) SchemaPath(org.apache.drill.common.expression.SchemaPath) PartitionDescriptor(org.apache.drill.exec.planner.PartitionDescriptor) FileSystemPartitionDescriptor(org.apache.drill.exec.planner.FileSystemPartitionDescriptor) PartitionLocation(org.apache.drill.exec.planner.PartitionLocation) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) BitSet(java.util.BitSet) MaterializedField(org.apache.drill.exec.record.MaterializedField) BufferAllocator(org.apache.drill.exec.memory.BufferAllocator) VectorContainer(org.apache.drill.exec.record.VectorContainer) ValueVector(org.apache.drill.exec.vector.ValueVector) RelNode(org.apache.calcite.rel.RelNode) MetadataContext(org.apache.drill.exec.store.dfs.MetadataContext) RexNode(org.apache.calcite.rex.RexNode)

Example 63 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class DefaultSqlHandler method convertToPrel.

/**
 * Applies physical rules and certain transformations to convert drill relational node into physical one.
 *
 * @param drel relational node
 * @param validatedRowType final output row type
 * @return physical relational node
 * @throws RelConversionException
 * @throws SqlUnsupportedException
 */
protected Prel convertToPrel(RelNode drel, RelDataType validatedRowType) throws RelConversionException, SqlUnsupportedException {
    Preconditions.checkArgument(drel.getConvention() == DrillRel.DRILL_LOGICAL);
    final RelTraitSet traits = drel.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON);
    Prel phyRelNode;
    try {
        final Stopwatch watch = Stopwatch.createStarted();
        final RelNode relNode = transform(PlannerType.VOLCANO, PlannerPhase.PHYSICAL, drel, traits, false);
        phyRelNode = (Prel) relNode.accept(new PrelFinalizer());
        // log externally as we need to finalize before traversing the tree.
        log(PlannerType.VOLCANO, PlannerPhase.PHYSICAL, phyRelNode, logger, watch);
    } catch (RelOptPlanner.CannotPlanException ex) {
        logger.error(ex.getMessage());
        if (JoinUtils.checkCartesianJoin(drel)) {
            throw JoinUtils.cartesianJoinPlanningException();
        } else {
            throw ex;
        }
    }
    OptionManager queryOptions = context.getOptions();
    if (context.getPlannerSettings().isMemoryEstimationEnabled() && !MemoryEstimationVisitor.enoughMemory(phyRelNode, queryOptions, context.getActiveEndpoints().size())) {
        log("Not enough memory for this plan", phyRelNode, logger, null);
        logger.debug("Re-planning without hash operations.");
        queryOptions.setLocalOption(PlannerSettings.HASHJOIN.getOptionName(), false);
        queryOptions.setLocalOption(PlannerSettings.HASHAGG.getOptionName(), false);
        try {
            final RelNode relNode = transform(PlannerType.VOLCANO, PlannerPhase.PHYSICAL, drel, traits);
            phyRelNode = (Prel) relNode.accept(new PrelFinalizer());
        } catch (RelOptPlanner.CannotPlanException ex) {
            logger.error(ex.getMessage());
            if (JoinUtils.checkCartesianJoin(drel)) {
                throw JoinUtils.cartesianJoinPlanningException();
            } else {
                throw ex;
            }
        }
    }
    // Handy way to visualize the plan while debugging
    // ExplainHandler.printPlan(phyRelNode, context);
    /* The order of the following transformations is important */
    /*
     * 0.)
     * Add top project before screen operator or writer to ensure that final output column names are preserved.
     */
    phyRelNode = TopProjectVisitor.insertTopProject(phyRelNode, validatedRowType);
    /*
     * 1.) For select * from join query, we need insert project on top of scan and a top project just
     * under screen operator. The project on top of scan will rename from * to T1*, while the top project
     * will rename T1* to *, before it output the final result. Only the top project will allow
     * duplicate columns, since user could "explicitly" ask for duplicate columns ( select *, col, *).
     * The rest of projects will remove the duplicate column when we generate POP in json format.
     */
    phyRelNode = StarColumnConverter.insertRenameProject(phyRelNode);
    log("Physical RelNode after Top and Rename Project inserting: ", phyRelNode, logger, null);
    /*
     * 2.)
     * Join might cause naming conflicts from its left and right child.
     * In such case, we have to insert Project to rename the conflicting names.
     * Unnest operator might need to adjust the correlated field after the physical planning.
     */
    phyRelNode = AdjustOperatorsSchemaVisitor.adjustSchema(phyRelNode);
    /*
     * 2.1) Swap left / right for INNER hash join, if left's row count is < (1 + margin) right's row count.
     * We want to have smaller dataset on the right side, since hash table builds on right side.
     */
    if (context.getPlannerSettings().isHashJoinSwapEnabled()) {
        phyRelNode = SwapHashJoinVisitor.swapHashJoin(phyRelNode, context.getPlannerSettings().getHashJoinSwapMarginFactor());
    }
    if (context.getPlannerSettings().isParquetRowGroupFilterPushdownPlanningEnabled()) {
        phyRelNode = (Prel) transform(PlannerType.HEP_BOTTOM_UP, PlannerPhase.PHYSICAL_PARTITION_PRUNING, phyRelNode);
    }
    /*
     * 2.2) Break up all expressions with complex outputs into their own project operations
     */
    phyRelNode = phyRelNode.accept(new SplitUpComplexExpressions(config.getConverter().getTypeFactory(), context.getPlannerSettings().functionImplementationRegistry, phyRelNode.getCluster().getRexBuilder()), null);
    /*
     * 2.3) Projections that contain reference to flatten are rewritten as Flatten operators followed by Project
     */
    phyRelNode = phyRelNode.accept(new RewriteProjectToFlatten(config.getConverter().getTypeFactory(), context.getDrillOperatorTable()), null);
    /*
     * 3.)
     * Since our operators work via names rather than indices, we have to reorder any
     * output before we return data to the user as we may have accidentally shuffled things.
     * This adds a trivial project to reorder columns prior to output.
     */
    phyRelNode = FinalColumnReorderer.addFinalColumnOrdering(phyRelNode);
    /*
     * 4.)
     * If two fragments are both estimated to be parallelization one, remove the exchange
     * separating them.
     */
    phyRelNode = ExcessiveExchangeIdentifier.removeExcessiveExchanges(phyRelNode, targetSliceSize);
    /* Insert the IMPLICIT_COLUMN in the lateral unnest pipeline */
    phyRelNode = LateralUnnestRowIDVisitor.insertRowID(phyRelNode);
    /* 6.)
     * if the client does not support complex types (Map, Repeated)
     * insert a project which would convert
     */
    if (!context.getSession().isSupportComplexTypes()) {
        logger.debug("Client does not support complex types, add ComplexToJson operator.");
        phyRelNode = ComplexToJsonPrelVisitor.addComplexToJsonPrel(phyRelNode);
    }
    /* 7.)
     * Insert LocalExchange (mux and/or demux) nodes
     */
    phyRelNode = InsertLocalExchangeVisitor.insertLocalExchanges(phyRelNode, queryOptions);
    /*
     * 8.)
     * Insert RuntimeFilter over Scan nodes
     */
    if (context.isRuntimeFilterEnabled()) {
        phyRelNode = RuntimeFilterVisitor.addRuntimeFilter(phyRelNode, context);
    }
    /* 9.)
     * Next, we add any required selection vector removers given the supported encodings of each
     * operator. This will ultimately move to a new trait but we're managing here for now to avoid
     * introducing new issues in planning before the next release
     */
    phyRelNode = SelectionVectorPrelVisitor.addSelectionRemoversWhereNecessary(phyRelNode);
    /*
     * 10.)
     * Insert project above the screen operator or writer to ensure that final output column names are preserved after all optimizations.
     */
    phyRelNode = TopProjectVisitor.insertTopProject(phyRelNode, validatedRowType);
    /* 11.)
     * Finally, Make sure that the no rels are repeats.
     * This could happen in the case of querying the same table twice as Optiq may canonicalize these.
     */
    phyRelNode = RelUniqifier.uniqifyGraph(phyRelNode);
    return phyRelNode;
}
Also used : RelNode(org.apache.calcite.rel.RelNode) RewriteProjectToFlatten(org.apache.drill.exec.planner.physical.visitor.RewriteProjectToFlatten) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) RelTraitSet(org.apache.calcite.plan.RelTraitSet) SplitUpComplexExpressions(org.apache.drill.exec.planner.physical.visitor.SplitUpComplexExpressions) RelOptPlanner(org.apache.calcite.plan.RelOptPlanner) OptionManager(org.apache.drill.exec.server.options.OptionManager) Prel(org.apache.drill.exec.planner.physical.Prel)

Example 64 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class FileSelection method create.

public static FileSelection create(DrillFileSystem fs, String parent, String path, boolean allowAccessOutsideWorkspace) throws IOException {
    Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    boolean hasWildcard = path.contains(WILD_CARD);
    String child = DrillStringUtils.removeLeadingSlash(path);
    Path combined = new Path(parent, child);
    // Unescape chars escaped with '\' for our root path to be consistent with what
    // fs.globStatus(...) below will do with them, c.f. DRILL-8064
    Path root = new Path(parent, DrillStringUtils.unescapeJava(child));
    if (!allowAccessOutsideWorkspace) {
        checkBackPaths(new Path(parent).toUri().getPath(), combined.toUri().getPath(), path);
    }
    // note: this will expand wildcards
    FileStatus[] statuses = fs.globStatus(combined);
    if (statuses == null) {
        return null;
    }
    FileSelection fileSel = create(Arrays.asList(statuses), null, root);
    if (timer != null) {
        logger.debug("FileSelection.create() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS));
        timer.stop();
    }
    if (fileSel == null) {
        return null;
    }
    fileSel.setHadWildcard(hasWildcard);
    return fileSel;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)

Example 65 with Stopwatch

use of org.apache.drill.shaded.guava.com.google.common.base.Stopwatch in project drill by apache.

the class FileSelection method createFromDirectories.

public static FileSelection createFromDirectories(List<Path> dirPaths, FileSelection selection, Path cacheFileRoot) {
    Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    Path root = selection.getSelectionRoot();
    Objects.requireNonNull(root, "Selection root is null");
    if (dirPaths == null || dirPaths.isEmpty()) {
        throw new DrillRuntimeException("List of directories is null or empty");
    }
    // for wildcard the directory list should have already been expanded
    List<Path> dirs = selection.hadWildcard() ? selection.getFileStatuses().stream().map(FileStatus::getPath).collect(Collectors.toList()) : new ArrayList<>(dirPaths);
    Path rootPath = handleWildCard(root);
    URI uri = selection.getFileStatuses().get(0).getPath().toUri();
    Path path = new Path(uri.getScheme(), uri.getAuthority(), rootPath.toUri().getPath());
    FileSelection fileSel = new FileSelection(null, dirs, path, cacheFileRoot, false);
    fileSel.setHadWildcard(selection.hadWildcard());
    if (timer != null) {
        logger.debug("FileSelection.createFromDirectories() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS));
        timer.stop();
    }
    return fileSel;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Stopwatch(org.apache.drill.shaded.guava.com.google.common.base.Stopwatch) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) URI(java.net.URI)

Aggregations

Stopwatch (org.apache.drill.shaded.guava.com.google.common.base.Stopwatch)68 IOException (java.io.IOException)13 Path (org.apache.hadoop.fs.Path)12 ArrayList (java.util.ArrayList)8 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)8 FileStatus (org.apache.hadoop.fs.FileStatus)8 DrillBuf (io.netty.buffer.DrillBuf)7 ByteBuffer (java.nio.ByteBuffer)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)7 HashMap (java.util.HashMap)5 RelNode (org.apache.calcite.rel.RelNode)5 SchemaChangeException (org.apache.drill.exec.exception.SchemaChangeException)4 DrillbitEndpoint (org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 SelectionVector4 (org.apache.drill.exec.record.selection.SelectionVector4)4 ValueVector (org.apache.drill.exec.vector.ValueVector)4 CompressionCodecName (org.apache.parquet.hadoop.metadata.CompressionCodecName)4 File (java.io.File)3 ResultSet (java.sql.ResultSet)3 ResultSetMetaData (java.sql.ResultSetMetaData)3