Search in sources :

Example 36 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class DynamicPartitionPruningOptimization method createFinalRsForSemiJoinOp.

private void createFinalRsForSemiJoinOp(ParseContext parseContext, TableScanOperator ts, GroupByOperator gb, ExprNodeDesc key, String keyBaseAlias, ExprNodeDesc colExpr, boolean isHint) throws SemanticException {
    ArrayList<String> gbOutputNames = new ArrayList<>();
    // One each for min, max and bloom filter
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
    int colPos = 0;
    ArrayList<ExprNodeDesc> rsValueCols = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < gbOutputNames.size() - 1; i++) {
        ExprNodeColumnDesc expr = new ExprNodeColumnDesc(key.getTypeInfo(), gbOutputNames.get(colPos++), "", false);
        rsValueCols.add(expr);
    }
    // Bloom Filter uses binary
    ExprNodeColumnDesc colBFExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo, gbOutputNames.get(colPos++), "", false);
    rsValueCols.add(colBFExpr);
    // Create the final Reduce Sink Operator
    ReduceSinkDesc rsDescFinal = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID, NullOrdering.defaultNullOrder(parseContext.getConf()));
    ReduceSinkOperator rsOpFinal = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDescFinal, new RowSchema(gb.getSchema()), gb);
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<>();
    rsOpFinal.setColumnExprMap(columnExprMap);
    LOG.debug("DynamicSemiJoinPushdown: Saving RS to TS mapping: " + rsOpFinal + ": " + ts);
    SemiJoinBranchInfo sjInfo = new SemiJoinBranchInfo(ts, isHint);
    parseContext.getRsToSemiJoinBranchInfo().put(rsOpFinal, sjInfo);
    // Save the info that is required at query time to resolve dynamic/runtime values.
    RuntimeValuesInfo runtimeValuesInfo = new RuntimeValuesInfo();
    TableDesc rsFinalTableDesc = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(rsValueCols, "_col"));
    List<String> dynamicValueIDs = new ArrayList<String>();
    dynamicValueIDs.add(keyBaseAlias + "_min");
    dynamicValueIDs.add(keyBaseAlias + "_max");
    dynamicValueIDs.add(keyBaseAlias + "_bloom_filter");
    runtimeValuesInfo.setTableDesc(rsFinalTableDesc);
    runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs);
    runtimeValuesInfo.setColExprs(rsValueCols);
    runtimeValuesInfo.setTargetColumns(Collections.singletonList(colExpr));
    parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo);
    parseContext.getColExprToGBMap().put(key, gb);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SemiJoinBranchInfo(org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SemiJoinHint(org.apache.hadoop.hive.ql.parse.SemiJoinHint) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 37 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class ColumnPrunerProcFactory method pruneReduceSinkOperator.

private static void pruneReduceSinkOperator(boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowSchema oldRS = reduce.getSchema();
    List<ColumnInfo> old_signature = oldRS.getSignature();
    List<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);
    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();
    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < retainFlags.length; i++) {
        String outputCol = valueColNames.get(i);
        ExprNodeDesc outputColExpr = valueExprs.get(i);
        if (!retainFlags[i]) {
            ColumnInfo colInfo = oldRS.getColumnInfo(outputCol);
            if (colInfo == null) {
                outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
                colInfo = oldRS.getColumnInfo(outputCol);
            }
            // do row resolve once more because the ColumnInfo in row resolver is already removed
            if (colInfo == null) {
                continue;
            }
            // i.e. this column is not appearing in keyExprs of the RS
            if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
                oldMap.remove(outputCol);
                signature.remove(colInfo);
            }
        } else {
            newValueColNames.add(outputCol);
            newValueExprs.add(outputColExpr);
        }
    }
    oldRS.setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(reduceConf.getValueCols(), newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 38 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class GlobalLimitOptimizer method checkQbpForGlobalLimit.

/**
 * Check the limit number in all sub queries
 *
 * @return if there is one and only one limit for all subqueries, return the limit
 *         if there is no limit, return 0
 *         otherwise, return null
 */
private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) {
    Set<Class<? extends Operator<?>>> searchedClasses = new ImmutableSet.Builder<Class<? extends Operator<?>>>().add(ReduceSinkOperator.class).add(GroupByOperator.class).add(FilterOperator.class).add(LimitOperator.class).build();
    Multimap<Class<? extends Operator<?>>, Operator<?>> ops = OperatorUtils.classifyOperators(ts, searchedClasses);
    // existsOrdering AND existsPartitioning should be false.
    for (Operator<?> op : ops.get(ReduceSinkOperator.class)) {
        ReduceSinkDesc reduceSinkConf = ((ReduceSinkOperator) op).getConf();
        if (reduceSinkConf.isOrdering() || reduceSinkConf.isPartitioning()) {
            return null;
        }
    }
    // - There cannot exist any (distinct) aggregate.
    for (Operator<?> op : ops.get(GroupByOperator.class)) {
        GroupByDesc groupByConf = ((GroupByOperator) op).getConf();
        if (groupByConf.isAggregate() || groupByConf.isDistinct()) {
            return null;
        }
    }
    // - There cannot exist any sampling predicate.
    for (Operator<?> op : ops.get(FilterOperator.class)) {
        FilterDesc filterConf = ((FilterOperator) op).getConf();
        if (filterConf.getIsSamplingPred()) {
            return null;
        }
    }
    // If there is one and only one limit starting at op, return the limit
    // If there is no limit, return 0
    // Otherwise, return null
    Collection<Operator<?>> limitOps = ops.get(LimitOperator.class);
    if (limitOps.size() == 1) {
        return (LimitOperator) limitOps.iterator().next();
    } else if (limitOps.size() == 0) {
        return null;
    }
    return null;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ImmutableSet(com.google.common.collect.ImmutableSet) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 39 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class AbstractSMBJoinProc method canConvertJoinToBucketMapJoin.

// Can the join operator be converted to a bucket map-merge join operator ?
@SuppressWarnings("unchecked")
protected boolean canConvertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException {
    // This has already been inspected and rejected
    if (context.getRejectedJoinOps().contains(joinOp)) {
        return false;
    }
    if (!this.pGraphContext.getJoinOps().contains(joinOp)) {
        return false;
    }
    Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
    try {
        String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
        bigTableMatcherClass = JavaUtils.loadClass(selector);
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e.getMessage());
    }
    BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
        // of any type. So return false.
        return false;
    }
    int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates);
    if (bigTablePosition < 0) {
        // contains aliases from sub-query
        return false;
    }
    context.setBigTablePosition(bigTablePosition);
    String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1];
    joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias);
    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
    // get the join keys from parent ReduceSink operators
    for (Operator<? extends OperatorDesc> parentOp : parentOps) {
        ReduceSinkDesc rsconf = ((ReduceSinkOperator) parentOp).getConf();
        Byte tag = (byte) rsconf.getTag();
        List<ExprNodeDesc> keys = rsconf.getKeyCols();
        keyExprMap.put(tag, keys);
    }
    context.setKeyExprMap(keyExprMap);
    // Make a deep copy of the aliases so that they are not changed in the context
    String[] joinSrcs = joinOp.getConf().getBaseSrc();
    String[] srcs = new String[joinSrcs.length];
    for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
        joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]);
        srcs[srcPos] = new String(joinSrcs[srcPos]);
    }
    // table matcher.
    return checkConvertBucketMapJoin(context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs));
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) HashMap(java.util.HashMap) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 40 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class SemanticAnalyzer method genCommonGroupByPlanReduceSinkOperator.

@SuppressWarnings("nls")
private ReduceSinkOperator genCommonGroupByPlanReduceSinkOperator(QB qb, List<String> dests, Operator inputOperatorInfo) throws SemanticException {
    RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    QBParseInfo parseInfo = qb.getParseInfo();
    RowResolver reduceSinkOutputRowResolver = new RowResolver();
    reduceSinkOutputRowResolver.setIsExprResolver(true);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    // The group by keys and distinct keys should be the same for all dests, so using the first
    // one to produce these will be the same as using any other.
    String dest = dests.get(0);
    // Pre-compute group-by keys and store in reduceKeys
    List<String> outputKeyColumnNames = new ArrayList<String>();
    List<String> outputValueColumnNames = new ArrayList<String>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    List<ExprNodeDesc> reduceKeys = getReduceKeysForReduceSink(grpByExprs, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap);
    int keyLength = reduceKeys.size();
    List<List<Integer>> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest, reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap);
    List<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    // them
    for (String destination : dests) {
        getReduceValuesForReduceSinkNoMapAgg(parseInfo, destination, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap);
        // Need to pass all of the columns used in the where clauses as reduce values
        ASTNode whereClause = parseInfo.getWhrForClause(destination);
        if (whereClause != null) {
            assert whereClause.getChildCount() == 1;
            ASTNode predicates = (ASTNode) whereClause.getChild(0);
            Map<ASTNode, ExprNodeDesc> nodeOutputs = genAllExprNodeDesc(predicates, reduceSinkInputRowResolver);
            removeMappingForKeys(predicates, nodeOutputs, reduceKeys);
            // extract columns missing in current RS key/value
            for (Map.Entry<ASTNode, ExprNodeDesc> entry : nodeOutputs.entrySet()) {
                ASTNode parameter = entry.getKey();
                ExprNodeDesc expression = entry.getValue();
                if (!(expression instanceof ExprNodeColumnDesc)) {
                    continue;
                }
                if (ExprNodeDescUtils.indexOf(expression, reduceValues) >= 0) {
                    continue;
                }
                String internalName = getColumnInternalName(reduceValues.size());
                String field = Utilities.ReduceField.VALUE.toString() + "." + internalName;
                reduceValues.add(expression);
                outputValueColumnNames.add(internalName);
                reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field, expression.getTypeInfo(), null, false));
                colExprMap.put(field, expression);
            }
        }
    }
    // Optimize the scenario when there are no grouping keys - only 1 reducer is needed
    int numReducers = -1;
    if (grpByExprs.isEmpty()) {
        numReducers = 1;
    }
    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, distinctColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, keyLength, numReducers, AcidUtils.Operation.NOT_ACID, defaultNullOrder);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
    rsOp.setColumnExprMap(colExprMap);
    return rsOp;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap)

Aggregations

ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)50 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)31 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)31 ArrayList (java.util.ArrayList)29 Operator (org.apache.hadoop.hive.ql.exec.Operator)21 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)20 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)19 HashMap (java.util.HashMap)18 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)17 LinkedHashMap (java.util.LinkedHashMap)16 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)16 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)16 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)14 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)14 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)13 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)12 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)11 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)11 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)11 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)11