Search in sources :

Example 91 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class HiveInputFormat method pushProjectionsAndFiltersAndAsOf.

protected void pushProjectionsAndFiltersAndAsOf(JobConf jobConf, Path splitPath) {
    Path splitPathWithNoSchema = Path.getPathWithoutSchemeAndAuthority(splitPath);
    if (this.mrwork == null) {
        init(job);
    }
    if (this.mrwork.getPathToAliases() == null) {
        return;
    }
    ArrayList<String> aliases = new ArrayList<String>();
    Iterator<Entry<Path, List<String>>> iterator = this.mrwork.getPathToAliases().entrySet().iterator();
    Set<Path> splitParentPaths = null;
    int pathsSize = this.mrwork.getPathToAliases().entrySet().size();
    while (iterator.hasNext()) {
        Entry<Path, List<String>> entry = iterator.next();
        Path key = entry.getKey();
        // Note for HIVE-1903: for non-native tables we might only see a table location provided as path in splitPath.
        // In this case the code part below should still work, as the "key" will be an exact match for splitPath.
        // Also: we should not anticipate table paths to be under other tables' locations.
        boolean match;
        if (pathsSize > 1) {
            // In such cases, use pre-computed paths for comparison
            if (splitParentPaths == null) {
                splitParentPaths = new HashSet<>();
                FileUtils.populateParentPaths(splitParentPaths, splitPath);
                FileUtils.populateParentPaths(splitParentPaths, splitPathWithNoSchema);
            }
            match = splitParentPaths.contains(key);
        } else {
            match = FileUtils.isPathWithinSubtree(splitPath, key) || FileUtils.isPathWithinSubtree(splitPathWithNoSchema, key);
        }
        if (match) {
            List<String> list = entry.getValue();
            for (String val : list) {
                aliases.add(val);
            }
        }
    }
    for (String alias : aliases) {
        Operator<? extends OperatorDesc> op = this.mrwork.getAliasToWork().get(alias);
        if (op instanceof TableScanOperator) {
            TableScanOperator ts = (TableScanOperator) op;
            // push down projections.
            ColumnProjectionUtils.appendReadColumns(jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
            // push down filters and as of information
            pushFiltersAndAsOf(jobConf, ts, this.mrwork);
            AcidUtils.setAcidOperationalProperties(job, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties());
            AcidUtils.setValidWriteIdList(job, ts.getConf());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) Entry(java.util.Map.Entry) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) List(java.util.List) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList)

Example 92 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class OpProcFactory method createFilter.

protected static Object createFilter(Operator op, Map<String, List<ExprNodeDesc>> predicates, OpWalkerInfo owi) throws SemanticException {
    RowSchema inputRS = op.getSchema();
    // combine all predicates into a single expression
    List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
    Iterator<List<ExprNodeDesc>> iterator = predicates.values().iterator();
    while (iterator.hasNext()) {
        for (ExprNodeDesc pred : iterator.next()) {
            preds = ExprNodeDescUtils.split(pred, preds);
        }
    }
    if (preds.isEmpty()) {
        return null;
    }
    ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
    if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc) {
        boolean pushFilterToStorage;
        HiveConf hiveConf = owi.getParseContext().getConf();
        pushFilterToStorage = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
        if (pushFilterToStorage) {
            condn = pushFilterToStorageHandler((TableScanOperator) op, (ExprNodeGenericFuncDesc) condn, owi, hiveConf);
            if (condn == null) {
                // we pushed the whole thing down
                return null;
            }
        }
    }
    // add new filter op
    List<Operator<? extends OperatorDesc>> originalChilren = op.getChildOperators();
    op.setChildOperators(null);
    Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(new FilterDesc(condn, false), new RowSchema(inputRS.getSignature()), op);
    output.setChildOperators(originalChilren);
    for (Operator<? extends OperatorDesc> ch : originalChilren) {
        List<Operator<? extends OperatorDesc>> parentOperators = ch.getParentOperators();
        int pos = parentOperators.indexOf(op);
        assert pos != -1;
        parentOperators.remove(pos);
        // add the new op as the old
        parentOperators.add(pos, output);
    }
    if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
        // remove the candidate filter ops
        removeCandidates(op, owi);
    }
    // push down current ppd context to newly added filter
    ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
    if (walkerInfo != null) {
        walkerInfo.getNonFinalCandidates().clear();
        owi.putPrunedPreds(output, walkerInfo);
    }
    return output;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PTFOperator(org.apache.hadoop.hive.ql.exec.PTFOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) List(java.util.List) ArrayList(java.util.ArrayList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 93 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class AbstractCreateViewAnalyzer method addInputs.

private void addInputs(SemanticAnalyzer analyzer) {
    inputs.addAll(analyzer.getInputs());
    for (Map.Entry<String, TableScanOperator> entry : analyzer.getTopOps().entrySet()) {
        String alias = entry.getKey();
        TableScanOperator topOp = entry.getValue();
        ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(alias, analyzer.getViewAliasToInput());
        // Adds tables only for create view (PPD filter can be appended by outer query)
        Table table = topOp.getConf().getTableMetadata();
        PlanUtils.addInput(inputs, new ReadEntity(table, parentViewInfo));
    }
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) Map(java.util.Map)

Example 94 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class ConvertJoinMapJoin method hasExternalTableAncestor.

private static boolean hasExternalTableAncestor(Operator op, StringBuilder sb) {
    boolean result = false;
    Operator ancestor = OperatorUtils.findSingleOperatorUpstream(op, TableScanOperator.class);
    if (ancestor != null) {
        TableScanOperator ts = (TableScanOperator) ancestor;
        if (MetaStoreUtils.isExternalTable(ts.getConf().getTableMetadata().getTTable())) {
            sb.append(ts.getConf().getTableMetadata().getFullyQualifiedName());
            return true;
        }
    }
    return result;
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator)

Example 95 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class DynamicPartitionPruningOptimization method disableSemiJoinOptDueToExternalTable.

private boolean disableSemiJoinOptDueToExternalTable(HiveConf conf, TableScanOperator ts, DynamicListContext ctx) {
    boolean disableSemiJoin = false;
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS)) {
        // We already have the TableScan for one side of the join. Check this now.
        if (MetaStoreUtils.isExternalTable(ts.getConf().getTableMetadata().getTTable())) {
            LOG.debug("Disabling semijoin optimzation on {} since it is an external table.", ts.getConf().getTableMetadata().getFullyQualifiedName());
            disableSemiJoin = true;
        } else {
            // Check the other side of the join, using the DynamicListContext
            ExprNodeDesc exprNodeDesc = ctx.getKeyCol();
            ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc);
            if (colExpr != null) {
                // fetch table alias
                ExprNodeDescUtils.ColumnOrigin columnOrigin = ExprNodeDescUtils.findColumnOrigin(exprNodeDesc, ctx.generator);
                if (columnOrigin != null && columnOrigin.op instanceof TableScanOperator) {
                    // Join key origin has been traced to a table column. Check if the table is external.
                    TableScanOperator joinKeyTs = (TableScanOperator) columnOrigin.op;
                    if (MetaStoreUtils.isExternalTable(joinKeyTs.getConf().getTableMetadata().getTTable())) {
                        LOG.debug("Join key {} is from {} which is an external table. Disabling semijoin optimization.", columnOrigin.col, joinKeyTs.getConf().getTableMetadata().getFullyQualifiedName());
                        disableSemiJoin = true;
                    }
                }
            }
        }
    }
    return disableSemiJoin;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDescUtils(org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)133 Operator (org.apache.hadoop.hive.ql.exec.Operator)52 ArrayList (java.util.ArrayList)47 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)44 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)36 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)32 HashMap (java.util.HashMap)30 Path (org.apache.hadoop.fs.Path)30 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)29 Table (org.apache.hadoop.hive.ql.metadata.Table)26 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)25 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)24 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)24 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)23 LinkedHashMap (java.util.LinkedHashMap)22 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)22 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)22 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)22 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)21