Search in sources :

Example 51 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class ColumnStatsAutoGatherContext method replaceSelectOperatorProcess.

/**
 * @param operator : the select operator in the analyze statement
 * @param input : the operator right before FS in the insert overwrite statement
 * @throws HiveException
 */
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input) throws HiveException {
    RowSchema selRS = operator.getSchema();
    List<ColumnInfo> signature = new ArrayList<>();
    OpParseContext inputCtx = sa.opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    List<ColumnInfo> columns = inputRR.getColumnInfos();
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    // 1. deal with non-partition columns
    for (int i = 0; i < this.columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        ExprNodeDesc exprNodeDesc = new ExprNodeColumnDesc(col);
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(i));
    }
    // if there is any partition column (in static partition or dynamic
    // partition or mixed case)
    int dynamicPartBegin = -1;
    for (int i = 0; i < partitionColumns.size(); i++) {
        ExprNodeDesc exprNodeDesc = null;
        String partColName = partitionColumns.get(i).getName();
        // 2. deal with static partition columns
        if (partSpec != null && partSpec.containsKey(partColName) && partSpec.get(partColName) != null) {
            if (dynamicPartBegin > 0) {
                throw new SemanticException("Dynamic partition columns should not come before static partition columns.");
            }
            exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName));
            TypeInfo srcType = exprNodeDesc.getTypeInfo();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            if (!srcType.equals(destType)) {
                // This may be possible when srcType is string but destType is integer
                exprNodeDesc = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        } else // 3. dynamic partition columns
        {
            dynamicPartBegin++;
            ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
            TypeInfo srcType = col.getType();
            TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
            exprNodeDesc = new ExprNodeColumnDesc(col);
            if (!srcType.equals(destType)) {
                exprNodeDesc = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
            }
        }
        colList.add(exprNodeDesc);
        String internalName = selRS.getColumnNames().get(this.columns.size() + i);
        columnNames.add(internalName);
        columnExprMap.put(internalName, exprNodeDesc);
        signature.add(selRS.getSignature().get(this.columns.size() + i));
    }
    operator.setConf(new SelectDesc(colList, columnNames));
    operator.setColumnExprMap(columnExprMap);
    selRS.setSignature(signature);
    operator.setSchema(selRS);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 52 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method genInputSelectForUnion.

/**
 * Generates a select operator which can go between the original input operator and the union
 * operator. This select casts columns to match the type of the associated column in the union,
 * other columns pass through unchanged. The new operator's only parent is the original input
 * operator to the union, and it's only child is the union. If the input does not need to be
 * cast, the original operator is returned, and no new select operator is added.
 *
 * @param origInputOp
 *          The original input operator to the union.
 * @param origInputFieldMap
 *          A map from field name to ColumnInfo for the original input operator.
 * @param origInputAlias
 *          The alias associated with the original input operator.
 * @param unionoutRR
 *          The union's output row resolver.
 * @param unionalias
 *          The alias of the union.
 * @return
 * @throws SemanticException
 */
private Operator<? extends OperatorDesc> genInputSelectForUnion(Operator<? extends OperatorDesc> origInputOp, Map<String, ColumnInfo> origInputFieldMap, String origInputAlias, RowResolver unionoutRR, String unionalias) throws SemanticException {
    Map<String, ColumnInfo> fieldMap = unionoutRR.getFieldMap(unionalias);
    Iterator<ColumnInfo> oIter = origInputFieldMap.values().iterator();
    Iterator<ColumnInfo> uIter = fieldMap.values().iterator();
    List<ExprNodeDesc> columns = new ArrayList<>();
    boolean needsCast = false;
    while (oIter.hasNext()) {
        ColumnInfo oInfo = oIter.next();
        ColumnInfo uInfo = uIter.next();
        ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol());
        if (!oInfo.getType().equals(uInfo.getType())) {
            needsCast = true;
            column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) uInfo.getType());
        }
        columns.add(column);
    }
    // If none of the columns need to be cast there's no need for an additional select operator
    if (!needsCast) {
        return origInputOp;
    }
    RowResolver rowResolver = new RowResolver();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    List<String> colName = new ArrayList<String>();
    for (int i = 0; i < columns.size(); i++) {
        String name = getColumnInternalName(i);
        ColumnInfo col = new ColumnInfo(name, columns.get(i).getTypeInfo(), "", false);
        rowResolver.put(origInputAlias, name, col);
        colName.add(name);
        columnExprMap.put(name, columns.get(i));
    }
    Operator<SelectDesc> newInputOp = OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(rowResolver.getColumnInfos()), columnExprMap, origInputOp);
    return putOpInsertMap(newInputOp, rowResolver);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 53 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method genConversionSelectOperator.

/**
 * Generate the conversion SelectOperator that converts the columns into the
 * types that are expected by the table_desc.
 */
private Operator genConversionSelectOperator(String dest, QB qb, Operator input, Deserializer deserializer, DynamicPartitionCtx dpCtx, List<FieldSchema> parts) throws SemanticException {
    StructObjectInspector oi = null;
    try {
        oi = (StructObjectInspector) deserializer.getObjectInspector();
    } catch (Exception e) {
        throw new SemanticException(e);
    }
    // Check column number
    List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
    boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
    List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
    int inColumnCnt = rowFields.size();
    int outColumnCnt = tableFields.size();
    if (dynPart && dpCtx != null) {
        outColumnCnt += dpCtx.getNumDPCols();
    }
    // The numbers of input columns and output columns should match for regular query
    if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) {
        String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns.";
        throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
    }
    // Check column types
    boolean converted = false;
    int columnNumber = tableFields.size();
    List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
    // does the conversion to String by itself.
    if (!(deserializer instanceof MetadataTypedColumnsetSerDe) && !deleting(dest)) {
        // offset by 1 so that we don't try to convert the ROW__ID
        if (updating(dest)) {
            expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true));
        }
        // here only deals with non-partition columns. We deal with partition columns next
        for (int i = 0; i < columnNumber; i++) {
            int rowFieldsOffset = updating(dest) ? i + 1 : i;
            ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector();
            TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
            TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType();
            ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol());
            // Thus, we still keep the conversion.
            if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
                // need to do some conversions here
                converted = true;
                if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
                    // cannot convert to complex types
                    column = null;
                } else {
                    column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
                }
                if (column == null) {
                    String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
                    throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
                }
            }
            expressions.add(column);
        }
        // deal with dynamic partition columns
        if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
            // rowFields contains non-partitioned columns (tableFields) followed by DP columns
            int rowFieldsOffset = tableFields.size() + (updating(dest) ? 1 : 0);
            for (int dpColIdx = 0; dpColIdx < rowFields.size() - rowFieldsOffset; ++dpColIdx) {
                // create ExprNodeDesc
                ColumnInfo inputColumn = rowFields.get(dpColIdx + rowFieldsOffset);
                TypeInfo inputTypeInfo = inputColumn.getType();
                ExprNodeDesc column = new ExprNodeColumnDesc(inputTypeInfo, inputColumn.getInternalName(), "", true);
                // Cast input column to destination column type if necessary.
                if (conf.getBoolVar(DYNAMICPARTITIONCONVERT)) {
                    if (parts != null && !parts.isEmpty()) {
                        String destPartitionName = dpCtx.getDPColNames().get(dpColIdx);
                        FieldSchema destPartitionFieldSchema = parts.stream().filter(dynamicPartition -> dynamicPartition.getName().equals(destPartitionName)).findFirst().orElse(null);
                        if (destPartitionFieldSchema == null) {
                            throw new IllegalStateException("Partition schema for dynamic partition " + destPartitionName + " not found in DynamicPartitionCtx.");
                        }
                        String partitionType = destPartitionFieldSchema.getType();
                        if (partitionType == null) {
                            throw new IllegalStateException("Couldn't get FieldSchema for partition" + destPartitionFieldSchema.getName());
                        }
                        PrimitiveTypeInfo partitionTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(partitionType);
                        if (!partitionTypeInfo.equals(inputTypeInfo)) {
                            column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, partitionTypeInfo);
                            converted = true;
                        }
                    } else {
                        LOG.warn("Partition schema for dynamic partition " + inputColumn.getAlias() + " (" + inputColumn.getInternalName() + ") not found in DynamicPartitionCtx. " + "This is expected with a CTAS.");
                    }
                }
                expressions.add(column);
            }
        }
    }
    if (converted) {
        // add the select operator
        RowResolver rowResolver = new RowResolver();
        List<String> colNames = new ArrayList<String>();
        Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
        for (int i = 0; i < expressions.size(); i++) {
            String name = getColumnInternalName(i);
            rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false));
            colNames.add(name);
            colExprMap.put(name, expressions.get(i));
        }
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
        input.setColumnExprMap(colExprMap);
    }
    return input;
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) MetadataTypedColumnsetSerDe(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 54 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class SemanticAnalyzer method genSelectPlan.

@SuppressWarnings("nls")
private Operator<?> genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator<?> input, Operator<?> inputForSelectStar, boolean outerLV) throws SemanticException {
    LOG.debug("tree: {}", selExprList.toStringTree());
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    RowResolver out_rwsch = new RowResolver();
    ASTNode trfm = null;
    Integer pos = 0;
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    RowResolver starRR = null;
    if (inputForSelectStar != null && inputForSelectStar != input) {
        starRR = opParseCtx.get(inputForSelectStar).getRowResolver();
    }
    // SELECT * or SELECT TRANSFORM(*)
    boolean selectStar = false;
    int posn = 0;
    boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT);
    if (hintPresent) {
        posn++;
    }
    boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM);
    if (isInTransform) {
        queryProperties.setUsesScript(true);
        globalLimitCtx.setHasTransformOrUDTF(true);
        trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
    }
    // Detect queries of the form SELECT udtf(col) AS ...
    // by looking for a function as the first child, and then checking to see
    // if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
    // the lack of a special token.
    boolean isUDTF = false;
    String udtfTableAlias = null;
    List<String> udtfColAliases = new ArrayList<String>();
    ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
    GenericUDTF genericUDTF = null;
    int udtfExprType = udtfExpr.getType();
    if (udtfExprType == HiveParser.TOK_FUNCTION || udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
        String funcName = TypeCheckProcFactory.getFunctionText(udtfExpr, true);
        FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
        if (fi != null) {
            genericUDTF = fi.getGenericUDTF();
        }
        isUDTF = (genericUDTF != null);
        if (isUDTF) {
            globalLimitCtx.setHasTransformOrUDTF(true);
        }
        if (isUDTF && !fi.isNative()) {
            unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr.getChild(0));
        }
        if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) {
            genExprNodeDescRegex(".*", null, (ASTNode) udtfExpr.getChild(0), colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        }
    }
    if (isUDTF) {
        // Only support a single expression when it's a UDTF
        if (selExprList.getChildCount() > 1) {
            throw new SemanticException(generateErrorMessage((ASTNode) selExprList.getChild(1), ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
        }
        ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
        // column names also can be inferred from result of UDTF
        for (int i = 1; i < selExpr.getChildCount(); i++) {
            ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
            switch(selExprChild.getType()) {
                case HiveParser.Identifier:
                    udtfColAliases.add(unescapeIdentifier(selExprChild.getText().toLowerCase()));
                    unparseTranslator.addIdentifierTranslation(selExprChild);
                    break;
                case HiveParser.TOK_TABALIAS:
                    assert (selExprChild.getChildCount() == 1);
                    udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0).getText());
                    qb.addAlias(udtfTableAlias);
                    unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild.getChild(0));
                    break;
                default:
                    assert (false);
            }
        }
        LOG.debug("UDTF table alias is {}", udtfTableAlias);
        LOG.debug("UDTF col aliases are {}", udtfColAliases);
    }
    // The list of expressions after SELECT or SELECT TRANSFORM.
    ASTNode exprList;
    if (isInTransform) {
        exprList = (ASTNode) trfm.getChild(0);
    } else if (isUDTF) {
        exprList = udtfExpr;
    } else {
        exprList = selExprList;
    }
    LOG.debug("genSelectPlan: input = {} starRr = {}", inputRR, starRR);
    // For UDTF's, skip the function name to get the expressions
    int startPosn = isUDTF ? posn + 1 : posn;
    if (isInTransform) {
        startPosn = 0;
    }
    final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() || !qb.getParseInfo().getDestGroupingSets().isEmpty() || !qb.getParseInfo().getDestCubes().isEmpty());
    Set<String> colAliases = new HashSet<String>();
    int offset = 0;
    // Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
    for (int i = startPosn; i < exprList.getChildCount(); ++i) {
        // child can be EXPR AS ALIAS, or EXPR.
        ASTNode child = (ASTNode) exprList.getChild(i);
        boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
        boolean isWindowSpec = child.getChildCount() == 3 && child.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC;
        // AST's are slightly different.
        if (!isWindowSpec && !isInTransform && !isUDTF && child.getChildCount() > 2) {
            throw new SemanticException(generateErrorMessage((ASTNode) child.getChild(2), ErrorMsg.INVALID_AS.getMsg()));
        }
        // The real expression
        ASTNode expr;
        String tabAlias;
        String colAlias;
        if (isInTransform || isUDTF) {
            tabAlias = null;
            colAlias = autogenColAliasPrfxLbl + i;
            expr = child;
        } else {
            // Get rid of TOK_SELEXPR
            expr = (ASTNode) child.getChild(0);
            String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, autogenColAliasPrfxIncludeFuncName, i + offset);
            tabAlias = colRef[0];
            colAlias = colRef[1];
            if (hasAsClause) {
                unparseTranslator.addIdentifierTranslation((ASTNode) child.getChild(1));
            }
        }
        colAliases.add(colAlias);
        // The real expression
        if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
            int initPos = pos;
            pos = genExprNodeDescRegex(".*", expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
            if (unparseTranslator.isEnabled()) {
                offset += pos - initPos - 1;
            }
            selectStar = true;
        } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) {
            // In case the expression is a regex COL.
            // This can only happen without AS clause
            // We don't allow this for ExprResolver - the Group By case
            pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) {
            // In case the expression is TABLE.COL (col can be regex).
            // This can only happen without AS clause
            // We don't allow this for ExprResolver - the Group By case
            pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
        } else {
            // Case when this is an expression
            TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, true, isCBOExecuted());
            // We allow stateful functions in the SELECT list (but nowhere else)
            tcCtx.setAllowStatefulFunctions(true);
            tcCtx.setAllowDistinctFunctions(false);
            if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) {
                // If CBO did not optimize the query, we might need to replace grouping function
                // Special handling of grouping function
                expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr, !cubeRollupGrpSetPresent);
            }
            ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
            String recommended = recommendName(exp, colAlias);
            if (recommended != null && !colAliases.contains(recommended) && out_rwsch.get(null, recommended) == null) {
                colAlias = recommended;
            }
            colList.add(exp);
            ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), exp.getWritableObjectInspector(), tabAlias, false);
            colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) && ((ExprNodeColumnDesc) exp).isSkewedCol());
            out_rwsch.put(tabAlias, colAlias, colInfo);
            if (exp instanceof ExprNodeColumnDesc) {
                ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
                String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
                if (altMapping != null) {
                    out_rwsch.put(altMapping[0], altMapping[1], colInfo);
                }
            }
            pos++;
        }
    }
    selectStar = selectStar && exprList.getChildCount() == posn + 1;
    out_rwsch = handleInsertStatementSpec(colList, dest, out_rwsch, qb, selExprList);
    List<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < colList.size(); i++) {
        String outputCol = getColumnInternalName(i);
        colExprMap.put(outputCol, colList.get(i));
        columnNames.add(outputCol);
    }
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames, selectStar), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    output.setColumnExprMap(colExprMap);
    if (isInTransform) {
        output = genScriptPlan(trfm, qb, output);
    }
    if (isUDTF) {
        output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb, output, outerLV);
    }
    LOG.debug("Created Select Plan row schema: {}", out_rwsch);
    return output;
}
Also used : TypeCheckCtx(org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) WindowFunctionInfo(org.apache.hadoop.hive.ql.exec.WindowFunctionInfo) FunctionInfo(org.apache.hadoop.hive.ql.exec.FunctionInfo) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) GenericUDTF(org.apache.hadoop.hive.ql.udf.generic.GenericUDTF) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet)

Example 55 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class Vectorizer method vectorizeSelectOperator.

public static Operator<? extends OperatorDesc> vectorizeSelectOperator(Operator<? extends OperatorDesc> selectOp, VectorizationContext vContext, VectorSelectDesc vectorSelectDesc) throws HiveException {
    SelectDesc selectDesc = (SelectDesc) selectOp.getConf();
    List<ExprNodeDesc> colList = selectDesc.getColList();
    int index = 0;
    final int size = colList.size();
    // this will mark all actual computed columns
    vContext.markActualScratchColumns();
    VectorExpression[] vectorSelectExprs = new VectorExpression[size];
    int[] projectedOutputColumns = new int[size];
    for (int i = 0; i < size; i++) {
        ExprNodeDesc expr = colList.get(i);
        VectorExpression ve = vContext.getVectorExpression(expr);
        projectedOutputColumns[i] = ve.getOutputColumnNum();
        if (ve instanceof IdentityExpression) {
            // Suppress useless evaluation.
            continue;
        }
        vectorSelectExprs[index++] = ve;
    }
    if (index < size) {
        vectorSelectExprs = Arrays.copyOf(vectorSelectExprs, index);
    }
    // The following method introduces a cast if x or y is DECIMAL_64 and parent expression (x % y) is DECIMAL.
    try {
        fixDecimalDataTypePhysicalVariations(vContext, vectorSelectExprs);
    } finally {
        vContext.freeMarkedScratchColumns();
    }
    vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
    vectorSelectDesc.setProjectedOutputColumns(projectedOutputColumns);
    return OperatorFactory.getVectorOperator(selectOp.getCompilationOpContext(), selectDesc, vContext, vectorSelectDesc);
}
Also used : VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IdentityExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression)

Aggregations

SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)55 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)50 ArrayList (java.util.ArrayList)43 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)32 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)31 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)30 HashMap (java.util.HashMap)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)25 LinkedHashMap (java.util.LinkedHashMap)20 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)15 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)13 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)13 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)13 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)13 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)13 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)13 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)12