Search in sources :

Example 96 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class ColumnStatsAutoGatherContext method replaceSelectOperatorProcess.

/**
   * @param operator : the select operator in the analyze statement
   * @param input : the operator right before FS in the insert overwrite statement
   * @throws HiveException 
   */
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input) throws HiveException {
    RowSchema selRS = operator.getSchema();
    ArrayList<ColumnInfo> signature = new ArrayList<>();
    OpParseContext inputCtx = sa.opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    // 1. deal with non-partition columns
    for (int i = 0; i < this.columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        ExprNodeDesc exprNodeDesc = new ExprNodeColumnDesc(col);
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(i));
    }
    // if there is any partition column (in static partition or dynamic
    // partition or mixed case)
    int dynamicPartBegin = -1;
    for (int i = 0; i < partitionColumns.size(); i++) {
        ExprNodeDesc exprNodeDesc = null;
        String partColName = partitionColumns.get(i).getName();
        // 2. deal with static partition columns
        if (partSpec != null && partSpec.containsKey(partColName) && partSpec.get(partColName) != null) {
            if (dynamicPartBegin > 0) {
                throw new SemanticException("Dynamic partition columns should not come before static partition columns.");
            }
            exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName));
            TypeInfo srcType = exprNodeDesc.getTypeInfo();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            if (!srcType.equals(destType)) {
                // This may be possible when srcType is string but destType is integer
                exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        } else // 3. dynamic partition columns
        {
            dynamicPartBegin++;
            ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
            TypeInfo srcType = col.getType();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            exprNodeDesc = new ExprNodeColumnDesc(col);
            if (!srcType.equals(destType)) {
                exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        }
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(this.columns.size() + i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(this.columns.size() + i));
    }
    operator.setConf(new SelectDesc(colList, columnNames));
    operator.setColumnExprMap(columnExprMap);
    selRS.setSignature(signature);
    operator.setSchema(selRS);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 97 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class IndexPredicateAnalyzer method getColumnExpr.

//Check if ExprNodeColumnDesc is wrapped in expr.
//If so, peel off. Otherwise return itself.
private ExprNodeDesc getColumnExpr(ExprNodeDesc expr) {
    if (expr instanceof ExprNodeColumnDesc) {
        return expr;
    }
    ExprNodeGenericFuncDesc funcDesc = null;
    if (expr instanceof ExprNodeGenericFuncDesc) {
        funcDesc = (ExprNodeGenericFuncDesc) expr;
    }
    if (null == funcDesc) {
        return expr;
    }
    GenericUDF udf = funcDesc.getGenericUDF();
    // check if its a simple cast expression.
    if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp) && funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
        return expr.getChildren().get(0);
    }
    return expr;
}
Also used : GenericUDFToChar(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFToDecimal(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal) GenericUDFToUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFToBinary(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary) GenericUDFToDate(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate) GenericUDFToUtcTimestamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) GenericUDFToVarchar(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar)

Example 98 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class IndexPredicateAnalyzer method analyzeExpr.

private ExprNodeDesc analyzeExpr(ExprNodeGenericFuncDesc expr, List<IndexSearchCondition> searchConditions, Object... nodeOutputs) throws SemanticException {
    if (FunctionRegistry.isOpAnd(expr)) {
        assert (nodeOutputs.length >= 2);
        List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>();
        for (Object residual : nodeOutputs) {
            if (null != residual) {
                residuals.add((ExprNodeDesc) residual);
            }
        }
        if (residuals.size() == 0) {
            return null;
        } else if (residuals.size() == 1) {
            return residuals.get(0);
        } else if (residuals.size() > 1) {
            return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getGenericUDFForAnd(), residuals);
        }
    }
    GenericUDF genericUDF = expr.getGenericUDF();
    if (!(genericUDF instanceof GenericUDFBaseCompare)) {
        return expr;
    }
    ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0];
    ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1];
    // We may need to peel off the GenericUDFBridge that is added by CBO or user
    if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) {
        expr1 = getColumnExpr(expr1);
        expr2 = getColumnExpr(expr2);
    }
    ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
    if (extracted == null || (extracted.length > 2 && !acceptsFields)) {
        return expr;
    }
    ExprNodeColumnDesc columnDesc;
    ExprNodeConstantDesc constantDesc;
    if (extracted[0] instanceof ExprNodeConstantDesc) {
        genericUDF = genericUDF.flip();
        columnDesc = (ExprNodeColumnDesc) extracted[1];
        constantDesc = (ExprNodeConstantDesc) extracted[0];
    } else {
        columnDesc = (ExprNodeColumnDesc) extracted[0];
        constantDesc = (ExprNodeConstantDesc) extracted[1];
    }
    Set<String> allowed = columnToUDFs.get(columnDesc.getColumn());
    if (allowed == null) {
        return expr;
    }
    String udfName = genericUDF.getUdfName();
    if (!allowed.contains(genericUDF.getUdfName())) {
        return expr;
    }
    String[] fields = null;
    if (extracted.length > 2) {
        ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) extracted[2];
        if (!isValidField(fieldDesc)) {
            return expr;
        }
        fields = ExprNodeDescUtils.extractFields(fieldDesc);
    }
    // We also need to update the expr so that the index query can be generated.
    // Note that, hive does not support UDFToDouble etc in the query text.
    List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>();
    list.add(expr1);
    list.add(expr2);
    ExprNodeGenericFuncDesc indexExpr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list);
    searchConditions.add(new IndexSearchCondition(columnDesc, udfName, constantDesc, indexExpr, expr, fields));
    // remove it from the residual predicate
    return fields == null ? null : expr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 99 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class HiveGBOpConvUtil method genReduceSideGB2.

private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    String colOutputName = null;
    ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
    ColumnInfo ci;
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        ci = rsColInfoLst.get(i);
        colOutputName = gbInfo.outputColNames.get(i);
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 1.2 Add GrpSet Col
    int groupingSetsPosition = -1;
    if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
        groupingSetsPosition = gbKeys.size();
        ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
        gbKeys.add(grpSetColExpr);
        colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
        ;
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
        colExprMap.put(colOutputName, grpSetColExpr);
    }
    // 2. Add UDAF
    UDAFAttrs udafAttr;
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
    int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
        udafAttr = gbInfo.udafAttrs.get(i);
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
        colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
        outputColNames.add(colOutputName);
        Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
        GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
        colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
    }
    Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
    rsGBOp2.setColumnExprMap(colExprMap);
    // TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
    return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 100 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class HiveOpConverter method genInputSelectForUnion.

private Operator<? extends OperatorDesc> genInputSelectForUnion(Operator<? extends OperatorDesc> origInputOp, ArrayList<ColumnInfo> uColumnInfo) throws SemanticException {
    Iterator<ColumnInfo> oIter = origInputOp.getSchema().getSignature().iterator();
    Iterator<ColumnInfo> uIter = uColumnInfo.iterator();
    List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
    List<String> colName = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    boolean needSelectOp = false;
    while (oIter.hasNext()) {
        ColumnInfo oInfo = oIter.next();
        ColumnInfo uInfo = uIter.next();
        if (!oInfo.isSameColumnForRR(uInfo)) {
            needSelectOp = true;
        }
        ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol());
        if (!oInfo.getType().equals(uInfo.getType())) {
            column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) uInfo.getType());
        }
        columns.add(column);
        colName.add(uInfo.getInternalName());
        columnExprMap.put(uInfo.getInternalName(), column);
    }
    if (needSelectOp) {
        return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp);
    } else {
        return origInputOp;
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)161 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)145 ArrayList (java.util.ArrayList)93 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)88 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)78 Test (org.junit.Test)65 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)43 HashMap (java.util.HashMap)40 LinkedHashMap (java.util.LinkedHashMap)30 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)28 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)22 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)21 Operator (org.apache.hadoop.hive.ql.exec.Operator)19 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)19 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)19 List (java.util.List)17 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)17 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)17