Search in sources :

Example 41 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class ConstantPropagateProcFactory method evaluateColumn.

/**
   * Evaluate column, replace the deterministic columns with constants if possible
   *
   * @param desc
   * @param ctx
   * @param op
   * @param colToConstants
   * @return
   */
private static ExprNodeDesc evaluateColumn(ExprNodeColumnDesc desc, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> parent) {
    RowSchema rs = parent.getSchema();
    ColumnInfo ci = rs.getColumnInfo(desc.getColumn());
    if (ci == null) {
        if (LOG.isErrorEnabled()) {
            LOG.error("Reverse look up of column " + desc + " error!");
        }
        ci = rs.getColumnInfo(desc.getTabAlias(), desc.getColumn());
    }
    if (ci == null) {
        if (LOG.isErrorEnabled()) {
            LOG.error("Can't resolve " + desc.getTabAlias() + "." + desc.getColumn());
        }
        return null;
    }
    ExprNodeDesc constant = null;
    // Additional work for union operator, see union27.q
    if (ci.getAlias() == null) {
        for (Entry<ColumnInfo, ExprNodeDesc> e : cppCtx.getOpToConstantExprs().get(parent).entrySet()) {
            if (e.getKey().getInternalName().equals(ci.getInternalName())) {
                constant = e.getValue();
                break;
            }
        }
    } else {
        constant = cppCtx.getOpToConstantExprs().get(parent).get(ci);
    }
    if (constant != null) {
        if (constant instanceof ExprNodeConstantDesc && !constant.getTypeInfo().equals(desc.getTypeInfo())) {
            return typeCast(constant, desc.getTypeInfo());
        }
        return constant;
    } else {
        return null;
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 42 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class MapJoinProcessor method convertJoinOpMapJoinOp.

public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) throws SemanticException {
    MapJoinDesc mapJoinDescriptor = getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin, adjustParentsChildren);
    // reduce sink row resolver used to generate map join op
    RowSchema outputRS = op.getSchema();
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(op.getCompilationOpContext(), mapJoinDescriptor, new RowSchema(outputRS.getSignature()), op.getParentOperators());
    mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    mapJoinOp.setColumnExprMap(colExprMap);
    List<Operator<? extends OperatorDesc>> childOps = op.getChildOperators();
    for (Operator<? extends OperatorDesc> childOp : childOps) {
        childOp.replaceParent(op, mapJoinOp);
    }
    mapJoinOp.setPosToAliasMap(op.getPosToAliasMap());
    mapJoinOp.setChildOperators(childOps);
    op.setChildOperators(null);
    op.setParentOperators(null);
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 43 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class OpProcFactory method createFilter.

protected static Object createFilter(Operator op, Map<String, List<ExprNodeDesc>> predicates, OpWalkerInfo owi) {
    RowSchema inputRS = op.getSchema();
    // combine all predicates into a single expression
    List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
    Iterator<List<ExprNodeDesc>> iterator = predicates.values().iterator();
    while (iterator.hasNext()) {
        for (ExprNodeDesc pred : iterator.next()) {
            preds = ExprNodeDescUtils.split(pred, preds);
        }
    }
    if (preds.isEmpty()) {
        return null;
    }
    ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
    if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc) {
        boolean pushFilterToStorage;
        HiveConf hiveConf = owi.getParseContext().getConf();
        pushFilterToStorage = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
        if (pushFilterToStorage) {
            condn = pushFilterToStorageHandler((TableScanOperator) op, (ExprNodeGenericFuncDesc) condn, owi, hiveConf);
            if (condn == null) {
                // we pushed the whole thing down
                return null;
            }
        }
    }
    // add new filter op
    List<Operator<? extends OperatorDesc>> originalChilren = op.getChildOperators();
    op.setChildOperators(null);
    Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(new FilterDesc(condn, false), new RowSchema(inputRS.getSignature()), op);
    output.setChildOperators(originalChilren);
    for (Operator<? extends OperatorDesc> ch : originalChilren) {
        List<Operator<? extends OperatorDesc>> parentOperators = ch.getParentOperators();
        int pos = parentOperators.indexOf(op);
        assert pos != -1;
        parentOperators.remove(pos);
        // add the new op as the old
        parentOperators.add(pos, output);
    }
    if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
        // remove the candidate filter ops
        removeCandidates(op, owi);
    }
    // push down current ppd context to newly added filter
    ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
    if (walkerInfo != null) {
        walkerInfo.getNonFinalCandidates().clear();
        owi.putPrunedPreds(output, walkerInfo);
    }
    return output;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PTFOperator(org.apache.hadoop.hive.ql.exec.PTFOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) List(java.util.List) ArrayList(java.util.ArrayList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 44 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class ColumnStatsAutoGatherContext method replaceSelectOperatorProcess.

/**
   * @param operator : the select operator in the analyze statement
   * @param input : the operator right before FS in the insert overwrite statement
   * @throws HiveException 
   */
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input) throws HiveException {
    RowSchema selRS = operator.getSchema();
    ArrayList<ColumnInfo> signature = new ArrayList<>();
    OpParseContext inputCtx = sa.opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    // 1. deal with non-partition columns
    for (int i = 0; i < this.columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        ExprNodeDesc exprNodeDesc = new ExprNodeColumnDesc(col);
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(i));
    }
    // if there is any partition column (in static partition or dynamic
    // partition or mixed case)
    int dynamicPartBegin = -1;
    for (int i = 0; i < partitionColumns.size(); i++) {
        ExprNodeDesc exprNodeDesc = null;
        String partColName = partitionColumns.get(i).getName();
        // 2. deal with static partition columns
        if (partSpec != null && partSpec.containsKey(partColName) && partSpec.get(partColName) != null) {
            if (dynamicPartBegin > 0) {
                throw new SemanticException("Dynamic partition columns should not come before static partition columns.");
            }
            exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName));
            TypeInfo srcType = exprNodeDesc.getTypeInfo();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            if (!srcType.equals(destType)) {
                // This may be possible when srcType is string but destType is integer
                exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        } else // 3. dynamic partition columns
        {
            dynamicPartBegin++;
            ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
            TypeInfo srcType = col.getType();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            exprNodeDesc = new ExprNodeColumnDesc(col);
            if (!srcType.equals(destType)) {
                exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        }
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(this.columns.size() + i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(this.columns.size() + i));
    }
    operator.setConf(new SelectDesc(colList, columnNames));
    operator.setColumnExprMap(columnExprMap);
    selRS.setSignature(signature);
    operator.setSchema(selRS);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 45 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class HiveGBOpConvUtil method genReduceSideGB2.

private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    String colOutputName = null;
    ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
    ColumnInfo ci;
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        ci = rsColInfoLst.get(i);
        colOutputName = gbInfo.outputColNames.get(i);
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 1.2 Add GrpSet Col
    int groupingSetsPosition = -1;
    if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
        groupingSetsPosition = gbKeys.size();
        ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
        gbKeys.add(grpSetColExpr);
        colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
        ;
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
        colExprMap.put(colOutputName, grpSetColExpr);
    }
    // 2. Add UDAF
    UDAFAttrs udafAttr;
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
    int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
        udafAttr = gbInfo.udafAttrs.get(i);
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
        colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
        outputColNames.add(colOutputName);
        Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
        GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
        colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
    }
    Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
    rsGBOp2.setColumnExprMap(colExprMap);
    // TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
    return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Aggregations

RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)76 ArrayList (java.util.ArrayList)59 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)57 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)56 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)42 HashMap (java.util.HashMap)39 Operator (org.apache.hadoop.hive.ql.exec.Operator)36 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)32 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)31 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)31 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)30 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)28 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)28 LinkedHashMap (java.util.LinkedHashMap)26 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)25 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)23 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)23 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)22 List (java.util.List)14 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)14