Search in sources :

Example 51 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class TypeConverter method getType.

public static RelDataType getType(RelOptCluster cluster, RowResolver rr, List<String> neededCols) throws CalciteSemanticException {
    RexBuilder rexBuilder = cluster.getRexBuilder();
    RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
    RowSchema rs = rr.getRowSchema();
    List<RelDataType> fieldTypes = new LinkedList<RelDataType>();
    List<String> fieldNames = new LinkedList<String>();
    for (ColumnInfo ci : rs.getSignature()) {
        if (neededCols == null || neededCols.contains(ci.getInternalName())) {
            fieldTypes.add(convert(ci.getType(), dtFactory));
            fieldNames.add(ci.getInternalName());
        }
    }
    return dtFactory.createStructType(fieldTypes, fieldNames);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) RexBuilder(org.apache.calcite.rex.RexBuilder) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelDataType(org.apache.calcite.rel.type.RelDataType) LinkedList(java.util.LinkedList)

Example 52 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class CorrelationUtilities method replaceReduceSinkWithSelectOperator.

// replace the cRS to SEL operator
protected static SelectOperator replaceReduceSinkWithSelectOperator(ReduceSinkOperator childRS, ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException {
    RowSchema inputRS = childRS.getSchema();
    SelectDesc select = new SelectDesc(childRS.getConf().getValueCols(), childRS.getConf().getOutputValueColumnNames());
    Operator<?> parent = getSingleParent(childRS);
    parent.getChildOperators().clear();
    SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(inputRS.getSignature()), parent);
    sel.setColumnExprMap(childRS.getColumnExprMap());
    sel.setChildOperators(childRS.getChildOperators());
    for (Operator<? extends Serializable> ch : childRS.getChildOperators()) {
        ch.replaceParent(childRS, sel);
    }
    childRS.setChildOperators(null);
    childRS.setParentOperators(null);
    procCtx.addRemovedOperator(childRS);
    return sel;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 53 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class ColumnPrunerProcFactory method pruneOperator.

private static void pruneOperator(NodeProcessorCtx ctx, Operator<? extends OperatorDesc> op, List<FieldNode> cols) throws SemanticException {
    // the pruning needs to preserve the order of columns in the input schema
    RowSchema inputSchema = op.getSchema();
    if (inputSchema != null) {
        ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
        RowSchema oldRS = op.getSchema();
        for (ColumnInfo i : oldRS.getSignature()) {
            if (lookupColumn(cols, i.getInternalName()) != null) {
                rs.add(i);
            }
        }
        op.getSchema().setSignature(rs);
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo)

Example 54 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class ColumnPrunerProcFactory method pruneReduceSinkOperator.

private static void pruneReduceSinkOperator(boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowSchema oldRS = reduce.getSchema();
    ArrayList<ColumnInfo> old_signature = oldRS.getSignature();
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);
    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();
    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < retainFlags.length; i++) {
        String outputCol = valueColNames.get(i);
        ExprNodeDesc outputColExpr = valueExprs.get(i);
        if (!retainFlags[i]) {
            ColumnInfo colInfo = oldRS.getColumnInfo(outputCol);
            if (colInfo == null) {
                outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
                colInfo = oldRS.getColumnInfo(outputCol);
            }
            // do row resolve once more because the ColumnInfo in row resolver is already removed
            if (colInfo == null) {
                continue;
            }
            // i.e. this column is not appearing in keyExprs of the RS
            if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
                oldMap.remove(outputCol);
                signature.remove(colInfo);
            }
        } else {
            newValueColNames.add(outputCol);
            newValueExprs.add(outputColExpr);
        }
    }
    oldRS.setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(reduceConf.getValueCols(), newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 55 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class MapJoinProcessor method convertJoinOpMapJoinOp.

public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) throws SemanticException {
    MapJoinDesc mapJoinDescriptor = getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin, adjustParentsChildren);
    // reduce sink row resolver used to generate map join op
    RowSchema outputRS = op.getSchema();
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(op.getCompilationOpContext(), mapJoinDescriptor, new RowSchema(outputRS.getSignature()), op.getParentOperators());
    mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    mapJoinOp.setColumnExprMap(colExprMap);
    List<Operator<? extends OperatorDesc>> childOps = op.getChildOperators();
    for (Operator<? extends OperatorDesc> childOp : childOps) {
        childOp.replaceParent(op, mapJoinOp);
    }
    mapJoinOp.setPosToAliasMap(op.getPosToAliasMap());
    mapJoinOp.setChildOperators(childOps);
    op.setChildOperators(null);
    op.setParentOperators(null);
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)86 ArrayList (java.util.ArrayList)65 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)65 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)62 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)50 HashMap (java.util.HashMap)45 Operator (org.apache.hadoop.hive.ql.exec.Operator)42 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)39 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)38 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)37 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)34 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)34 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)31 LinkedHashMap (java.util.LinkedHashMap)30 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)28 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)28 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)27 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)25 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)22