Search in sources :

Example 46 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class HiveOpConverter method genPTF.

private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticException {
    Operator<?> input = inputOpAf.inputs.get(0);
    wSpec.validateAndMakeEffective();
    WindowingComponentizer groups = new WindowingComponentizer(wSpec);
    RowResolver rr = new RowResolver();
    for (ColumnInfo ci : input.getSchema().getSignature()) {
        rr.put(inputOpAf.tabAlias, ci.getInternalName(), ci);
    }
    while (groups.hasNext()) {
        wSpec = groups.next(hiveConf, semanticAnalyzer, unparseTranslator, rr);
        // 1. Create RS and backtrack Select operator on top
        ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
        StringBuilder order = new StringBuilder();
        StringBuilder nullOrder = new StringBuilder();
        for (PartitionExpression partCol : wSpec.getQueryPartitionSpec().getExpressions()) {
            ExprNodeDesc partExpr = semanticAnalyzer.genExprNodeDesc(partCol.getExpression(), rr);
            if (ExprNodeDescUtils.indexOf(partExpr, partCols) < 0) {
                keyCols.add(partExpr);
                partCols.add(partExpr);
                order.append('+');
                nullOrder.append('a');
            }
        }
        if (wSpec.getQueryOrderSpec() != null) {
            for (OrderExpression orderCol : wSpec.getQueryOrderSpec().getExpressions()) {
                ExprNodeDesc orderExpr = semanticAnalyzer.genExprNodeDesc(orderCol.getExpression(), rr);
                char orderChar = orderCol.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-';
                char nullOrderChar = orderCol.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z';
                int index = ExprNodeDescUtils.indexOf(orderExpr, keyCols);
                if (index >= 0) {
                    order.setCharAt(index, orderChar);
                    nullOrder.setCharAt(index, nullOrderChar);
                    continue;
                }
                keyCols.add(orderExpr);
                order.append(orderChar);
                nullOrder.append(nullOrderChar);
            }
        }
        SelectOperator selectOp = genReduceSinkAndBacktrackSelect(input, keyCols.toArray(new ExprNodeDesc[keyCols.size()]), 0, partCols, order.toString(), nullOrder.toString(), -1, Operation.NOT_ACID, hiveConf);
        // 2. Finally create PTF
        PTFTranslator translator = new PTFTranslator();
        PTFDesc ptfDesc = translator.translate(wSpec, semanticAnalyzer, hiveConf, rr, unparseTranslator);
        RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
        Operator<?> ptfOp = OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), selectOp);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Generated " + ptfOp + " with row schema: [" + ptfOp.getSchema() + "]");
        }
        // 3. Prepare for next iteration (if any)
        rr = ptfOpRR;
        input = ptfOp;
    }
    return inputOpAf.clone(input);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) OrderExpression(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression) ArrayList(java.util.ArrayList) PTFTranslator(org.apache.hadoop.hive.ql.parse.PTFTranslator) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) PartitionExpression(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowingComponentizer(org.apache.hadoop.hive.ql.parse.WindowingComponentizer)

Example 47 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class HiveOpConverter method visit.

OpAttr visit(HiveUnion unionRel) throws SemanticException {
    // 1. Convert inputs
    List<RelNode> inputsList = extractRelNodeFromUnion(unionRel);
    OpAttr[] inputs = new OpAttr[inputsList.size()];
    for (int i = 0; i < inputs.length; i++) {
        inputs[i] = dispatch(inputsList.get(i));
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName() + " with row type: [" + unionRel.getRowType() + "]");
    }
    // 2. Create a new union operator
    UnionDesc unionDesc = new UnionDesc();
    unionDesc.setNumInputs(inputs.length);
    String tableAlias = getHiveDerivedTableAlias();
    ArrayList<ColumnInfo> cinfoLst = createColInfos(inputs[0].inputs.get(0), tableAlias);
    Operator<?>[] children = new Operator<?>[inputs.length];
    for (int i = 0; i < children.length; i++) {
        if (i == 0) {
            children[i] = inputs[i].inputs.get(0);
        } else {
            Operator<?> op = inputs[i].inputs.get(0);
            // We need to check if the other input branches for union is following the first branch
            // We may need to cast the data types for specific columns.
            children[i] = genInputSelectForUnion(op, cinfoLst);
        }
    }
    Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild(semanticAnalyzer.getOpContext(), unionDesc, new RowSchema(cinfoLst), children);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]");
    }
    // 3. Return result
    return new OpAttr(tableAlias, inputs[0].vcolsInCalcite, unionOp);
}
Also used : UnionDesc(org.apache.hadoop.hive.ql.plan.UnionDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelNode(org.apache.calcite.rel.RelNode)

Example 48 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class HiveGBOpConvUtil method genMapSideGBRS.

private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<String> outputKeyColumnNames = new ArrayList<String>();
    List<String> outputValueColumnNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0);
    ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
    int keyLength = reduceKeys.size();
    if (inclGrpSetInMapSide(gbInfo)) {
        addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap);
        keyLength++;
    }
    if (mapGB.getConf().getKeys().size() > reduceKeys.size()) {
        // NOTE: All dist cols have single output col name;
        reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys().size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false));
    } else if (!gbInfo.distColIndices.isEmpty()) {
        // This is the case where distinct cols are part of GB Keys in which case
        // we still need to add it to out put col names
        outputKeyColumnNames.add(SemanticAnalyzer.getColumnInternalName(reduceKeys.size()));
    }
    ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys().size(), outputValueColumnNames, colInfoLst, colExprMap, false, false);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB);
    rsOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 49 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class RewriteQueryUsingAggregateIndexCtx method replaceSelectOperatorProcess.

/**
   * This method replaces the original SelectOperator with the new
   * SelectOperator with a new column indexed_key_column.
   */
private void replaceSelectOperatorProcess(SelectOperator operator) throws SemanticException {
    RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
    // we need to set the colList, outputColumnNames, colExprMap,
    // rowSchema for only that SelectOperator which precedes the GroupByOperator
    // count(indexed_key_column) needs to be replaced by
    // sum(`_count_of_indexed_key_column`)
    List<ExprNodeDesc> selColList = operator.getConf().getColList();
    selColList.add(rewriteQueryCtx.getAggrExprNode());
    List<String> selOutputColNames = operator.getConf().getOutputColumnNames();
    selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn());
    operator.getColumnExprMap().put(rewriteQueryCtx.getAggrExprNode().getColumn(), rewriteQueryCtx.getAggrExprNode());
    RowSchema selRS = operator.getSchema();
    List<ColumnInfo> selRSSignature = selRS.getSignature();
    // Need to create a new type for Column[_count_of_indexed_key_column] node
    PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo("bigint");
    pti.setTypeName("bigint");
    ColumnInfo newCI = new ColumnInfo(rewriteQueryCtx.getAggregateFunction(), pti, "", false);
    selRSSignature.add(newCI);
    selRS.setSignature((ArrayList<ColumnInfo>) selRSSignature);
    operator.setSchema(selRS);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 50 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class RewriteQueryUsingAggregateIndexCtx method replaceTableScanProcess.

/**
   * This method replaces the original TableScanOperator with the new
   * TableScanOperator and metadata that scans over the index table rather than
   * scanning over the original table.
   *
   */
private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException {
    RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
    String alias = rewriteQueryCtx.getAlias();
    // Need to remove the original TableScanOperators from these data structures
    // and add new ones
    HashMap<String, TableScanOperator> topOps = rewriteQueryCtx.getParseContext().getTopOps();
    // remove original TableScanOperator
    topOps.remove(alias);
    String indexTableName = rewriteQueryCtx.getIndexName();
    Table indexTableHandle = null;
    try {
        indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
    } catch (HiveException e) {
        LOG.error("Error while getting the table handle for index table.");
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    // construct a new descriptor for the index table scan
    TableScanDesc indexTableScanDesc = new TableScanDesc(indexTableHandle);
    indexTableScanDesc.setGatherStats(false);
    String k = MetaStoreUtils.encodeTableName(indexTableName) + Path.SEPARATOR;
    indexTableScanDesc.setStatsAggPrefix(k);
    scanOperator.setConf(indexTableScanDesc);
    // Construct the new RowResolver for the new TableScanOperator
    ArrayList<ColumnInfo> sigRS = new ArrayList<ColumnInfo>();
    try {
        StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector();
        StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey());
        sigRS.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), indexTableName, false));
    } catch (SerDeException e) {
        LOG.error("Error while creating the RowResolver for new TableScanOperator.");
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    RowSchema rs = new RowSchema(sigRS);
    // Set row resolver for new table
    String newAlias = indexTableName;
    int index = alias.lastIndexOf(":");
    if (index >= 0) {
        newAlias = alias.substring(0, index) + ":" + indexTableName;
    }
    // Scan operator now points to other table
    scanOperator.getConf().setAlias(newAlias);
    scanOperator.setAlias(indexTableName);
    topOps.put(newAlias, scanOperator);
    rewriteQueryCtx.getParseContext().setTopOps(topOps);
    ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rs, Arrays.asList(new FieldNode(rewriteQueryCtx.getIndexKey())));
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldNode(org.apache.hadoop.hive.ql.optimizer.FieldNode) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)86 ArrayList (java.util.ArrayList)65 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)65 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)62 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)50 HashMap (java.util.HashMap)45 Operator (org.apache.hadoop.hive.ql.exec.Operator)42 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)39 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)38 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)37 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)34 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)34 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)31 LinkedHashMap (java.util.LinkedHashMap)30 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)28 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)28 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)27 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)25 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)22