Search in sources :

Example 46 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class ConstantPropagateProcFactory method propagate.

/**
 * Propagate assignment expression, adding an entry into constant map constants.
 *
 * @param udf expression UDF, currently only 2 UDFs are supported: '=' and 'is null'.
 * @param newExprs child expressions (parameters).
 * @param cppCtx
 * @param op
 * @param constants
 */
private static void propagate(GenericUDF udf, List<ExprNodeDesc> newExprs, RowSchema rs, Map<ColumnInfo, ExprNodeDesc> constants) {
    if (udf instanceof GenericUDFOPEqual) {
        ExprNodeDesc lOperand = newExprs.get(0);
        ExprNodeDesc rOperand = newExprs.get(1);
        ExprNodeConstantDesc v;
        if (lOperand instanceof ExprNodeConstantDesc) {
            v = (ExprNodeConstantDesc) lOperand;
        } else if (rOperand instanceof ExprNodeConstantDesc) {
            v = (ExprNodeConstantDesc) rOperand;
        } else {
            // we need a constant on one side.
            return;
        }
        // If both sides are constants, there is nothing to propagate
        ExprNodeColumnDesc c;
        if (lOperand instanceof ExprNodeColumnDesc) {
            c = (ExprNodeColumnDesc) lOperand;
        } else if (rOperand instanceof ExprNodeColumnDesc) {
            c = (ExprNodeColumnDesc) rOperand;
        } else {
            // truncate information
            return;
        }
        ColumnInfo ci = resolveColumn(rs, c);
        if (ci != null) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Filter {} is identified as a value assignment, propagate it.", udf.getDisplayString(new String[] { lOperand.getExprString(), rOperand.getExprString() }));
            }
            if (!v.getTypeInfo().equals(ci.getType())) {
                v = typeCast(v, ci.getType(), true);
            }
            if (v != null) {
                constants.put(ci, v);
            }
        }
    } else if (udf instanceof GenericUDFOPNull) {
        ExprNodeDesc operand = newExprs.get(0);
        if (operand instanceof ExprNodeColumnDesc) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Filter {} is identified as a value assignment, propagate it.", udf.getDisplayString(new String[] { operand.getExprString() }));
            }
            ExprNodeColumnDesc c = (ExprNodeColumnDesc) operand;
            ColumnInfo ci = resolveColumn(rs, c);
            if (ci != null) {
                constants.put(ci, new ExprNodeConstantDesc(ci.getType(), null));
            }
        }
    }
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 47 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class ConstantPropagateProcFactory method foldOperator.

/**
 * Change operator row schema, replace column with constant if it is.
 *
 * @param op
 * @param constants
 * @throws SemanticException
 */
private static void foldOperator(Operator<? extends Serializable> op, ConstantPropagateProcCtx cppCtx) throws SemanticException {
    RowSchema schema = op.getSchema();
    Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getOpToConstantExprs().get(op);
    if (schema != null && schema.getSignature() != null) {
        for (ColumnInfo col : schema.getSignature()) {
            ExprNodeDesc constant = constants.get(col);
            if (constant != null) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Replacing column " + col + " with constant " + constant + " in " + op);
                }
                if (!col.getType().equals(constant.getTypeInfo())) {
                    constant = typeCast(constant, col.getType());
                }
                if (constant != null) {
                    col.setObjectinspector(constant.getWritableObjectInspector());
                }
            }
        }
    }
    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    if (colExprMap != null) {
        for (Entry<ColumnInfo, ExprNodeDesc> e : constants.entrySet()) {
            String internalName = e.getKey().getInternalName();
            if (colExprMap.containsKey(internalName)) {
                colExprMap.put(internalName, e.getValue());
            }
        }
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 48 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class MapJoinProcessor method genSelectPlan.

protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
    List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
    input.setChildOperators(null);
    // create a dummy select - This select is needed by the walker to split the
    // mapJoin later on
    RowSchema inputRS = input.getSchema();
    ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputs = new ArrayList<String>();
    List<String> outputCols = input.getConf().getOutputColumnNames();
    ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < outputCols.size(); i++) {
        String internalName = outputCols.get(i);
        ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
        ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
        exprs.add(colDesc);
        outputs.add(internalName);
        ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
        newCol.setAlias(valueInfo.getAlias());
        outputRS.add(newCol);
        colExprMap.put(internalName, colDesc);
    }
    SelectDesc select = new SelectDesc(exprs, outputs, false);
    SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
    sel.setColumnExprMap(colExprMap);
    // Insert the select operator in between.
    sel.setChildOperators(childOps);
    for (Operator<? extends OperatorDesc> ch : childOps) {
        ch.replaceParent(input, sel);
    }
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 49 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class CalcitePlanner method createRowResolver.

private RowResolver createRowResolver(List<ColumnInfo> columnInfos) {
    RowResolver rowResolver = new RowResolver();
    int pos = 0;
    for (ColumnInfo columnInfo : columnInfos) {
        ColumnInfo newColumnInfo = new ColumnInfo(columnInfo);
        newColumnInfo.setInternalName(HiveConf.getColumnInternalName(pos++));
        rowResolver.put(newColumnInfo.getTabAlias(), newColumnInfo.getAlias(), newColumnInfo);
    }
    return rowResolver;
}
Also used : ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint)

Example 50 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class CalcitePlanner method handleInsertStatement.

// This function serves as the wrapper of handleInsertStatementSpec in
// SemanticAnalyzer
Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb) throws SemanticException {
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    List<ColumnInfo> columns = inputRR.getColumnInfos();
    for (int i = 0; i < columns.size(); i++) {
        ColumnInfo col = columns.get(i);
        colList.add(new ExprNodeColumnDesc(col));
    }
    ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
    RowResolver rowResolver = createRowResolver(columns);
    rowResolver = handleInsertStatementSpec(colList, dest, rowResolver, qb, selExprList);
    List<String> columnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < colList.size(); i++) {
        String outputCol = getColumnInternalName(i);
        colExprMap.put(outputCol, colList.get(i));
        columnNames.add(outputCol);
    }
    Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
    output.setColumnExprMap(colExprMap);
    return output;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)225 ArrayList (java.util.ArrayList)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)138 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)100 HashMap (java.util.HashMap)93 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)86 LinkedHashMap (java.util.LinkedHashMap)71 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)59 Operator (org.apache.hadoop.hive.ql.exec.Operator)48 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)47 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)47 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)45 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)45 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)45 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)45 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)45 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)45 Map (java.util.Map)41 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)39 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)38