Search in sources :

Example 71 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genTablePlan.

@SuppressWarnings("nls")
private Operator genTablePlan(String alias, QB qb) throws SemanticException {
    String alias_id = getAliasId(alias, qb);
    Table tab = qb.getMetaData().getSrcForAlias(alias);
    RowResolver rwsch;
    // is the table already present
    TableScanOperator top = topOps.get(alias_id);
    // Obtain table props in query
    Map<String, String> properties = qb.getTabPropsForAlias(alias);
    if (top == null) {
        // Determine row schema for TSOP.
        // Include column names from SerDe, the partition and virtual columns.
        rwsch = new RowResolver();
        try {
            // Including parameters passed in the query
            if (properties != null) {
                for (Entry<String, String> prop : properties.entrySet()) {
                    if (tab.getSerdeParam(prop.getKey()) != null) {
                        LOG.warn("SerDe property in input query overrides stored SerDe property");
                    }
                    tab.setSerdeParam(prop.getKey(), prop.getValue());
                }
            }
            // Obtain inspector for schema
            StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer().getObjectInspector();
            List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
            for (int i = 0; i < fields.size(); i++) {
                /**
                 * if the column is a skewed column, use ColumnInfo accordingly
                 */
                ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), alias, false);
                colInfo.setSkewedCol((isSkewedCol(alias, qb, fields.get(i).getFieldName())) ? true : false);
                rwsch.put(alias, fields.get(i).getFieldName(), colInfo);
            }
        } catch (SerDeException e) {
            throw new RuntimeException(e);
        }
        // Finally add the partitioning columns
        for (FieldSchema part_col : tab.getPartCols()) {
            LOG.trace("Adding partition col: " + part_col);
            rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true));
        }
        // put all virtual columns in RowResolver.
        Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
        // use a list for easy cumtomize
        List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
        while (vcs.hasNext()) {
            VirtualColumn vc = vcs.next();
            rwsch.put(alias, vc.getName().toLowerCase(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
            vcList.add(vc);
        }
        // Create the root of the operator tree
        TableScanDesc tsDesc = new TableScanDesc(alias, vcList, tab);
        setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch);
        SplitSample sample = nameToSplitSample.get(alias_id);
        if (sample != null && sample.getRowCount() != null) {
            tsDesc.setRowLimit(sample.getRowCount());
            nameToSplitSample.remove(alias_id);
        }
        top = (TableScanOperator) putOpInsertMap(OperatorFactory.get(getOpContext(), tsDesc, new RowSchema(rwsch.getColumnInfos())), rwsch);
        // Set insiderView so that we can skip the column authorization for this.
        top.setInsideView(qb.isInsideView() || qb.getAliasInsideView().contains(alias.toLowerCase()));
        // Add this to the list of top operators - we always start from a table
        // scan
        topOps.put(alias_id, top);
        // Add a mapping from the table scan operator to Table
        topToTable.put(top, tab);
        if (properties != null) {
            topToTableProps.put(top, properties);
            tsDesc.setOpProps(properties);
        }
    } else {
        rwsch = opParseCtx.get(top).getRowResolver();
        top.setChildOperators(null);
    }
    // check if this table is sampled and needs more than input pruning
    Operator<? extends OperatorDesc> op = top;
    TableSample ts = qb.getParseInfo().getTabSample(alias);
    if (ts != null) {
        TableScanOperator tableScanOp = top;
        tableScanOp.getConf().setTableSample(ts);
        int num = ts.getNumerator();
        int den = ts.getDenominator();
        ArrayList<ASTNode> sampleExprs = ts.getExprs();
        // TODO: Do the type checking of the expressions
        List<String> tabBucketCols = tab.getBucketCols();
        int numBuckets = tab.getNumBuckets();
        // If there are no sample cols and no bucket cols then throw an error
        if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
            throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + tab.getTableName());
        }
        if (num > den) {
            throw new SemanticException(ErrorMsg.BUCKETED_NUMERATOR_BIGGER_DENOMINATOR.getMsg() + " " + tab.getTableName());
        }
        // check if a predicate is needed
        // predicate is needed if either input pruning is not enough
        // or if input pruning is not possible
        // check if the sample columns are the same as the table bucket columns
        boolean colsEqual = true;
        if ((sampleExprs.size() != tabBucketCols.size()) && (sampleExprs.size() != 0)) {
            colsEqual = false;
        }
        for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
            boolean colFound = false;
            for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
                if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
                    break;
                }
                if (((ASTNode) sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) {
                    colFound = true;
                }
            }
            colsEqual = (colsEqual && colFound);
        }
        // Check if input can be pruned
        ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));
        // check if input pruning is enough
        if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual) && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {
            // input pruning is enough; add the filter for the optimizer to use it
            // later
            LOG.info("No need for sample filter");
            ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null);
            FilterDesc filterDesc = new FilterDesc(samplePredicate, true, new SampleDesc(ts.getNumerator(), ts.getDenominator(), tabBucketCols, true));
            filterDesc.setGenerated(true);
            op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
        } else {
            // need to add filter
            // create tableOp to be filterDesc and set as child to 'top'
            LOG.info("Need sample filter");
            ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null);
            FilterDesc filterDesc = new FilterDesc(samplePredicate, true);
            filterDesc.setGenerated(true);
            op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
        }
    } else {
        boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
        if (testMode) {
            String tabName = tab.getTableName();
            // has the user explicitly asked not to sample this table
            String unSampleTblList = conf.getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
            String[] unSampleTbls = unSampleTblList.split(",");
            boolean unsample = false;
            for (String unSampleTbl : unSampleTbls) {
                if (tabName.equalsIgnoreCase(unSampleTbl)) {
                    unsample = true;
                }
            }
            if (!unsample) {
                int numBuckets = tab.getNumBuckets();
                // If the input table is bucketed, choose the first bucket
                if (numBuckets > 0) {
                    TableSample tsSample = new TableSample(1, numBuckets);
                    tsSample.setInputPruning(true);
                    qb.getParseInfo().setTabSample(alias, tsSample);
                    ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab.getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
                    FilterDesc filterDesc = new FilterDesc(samplePred, true, new SampleDesc(tsSample.getNumerator(), tsSample.getDenominator(), tab.getBucketCols(), true));
                    filterDesc.setGenerated(true);
                    op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
                    LOG.info("No need for sample filter");
                } else {
                    // The table is not bucketed, add a dummy filter :: rand()
                    int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);
                    TableSample tsSample = new TableSample(1, freq);
                    tsSample.setInputPruning(false);
                    qb.getParseInfo().setTabSample(alias, tsSample);
                    LOG.info("Need sample filter");
                    ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer.valueOf(460476415)));
                    ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc);
                    FilterDesc filterDesc = new FilterDesc(samplePred, true);
                    filterDesc.setGenerated(true);
                    op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
                }
            }
        }
    }
    Operator output = putOpInsertMap(op, rwsch);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created Table Plan for " + alias + " " + op.toString());
    }
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) SampleDesc(org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 72 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genPTFPlanForComponentQuery.

private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input) throws SemanticException {
    /*
     * 1. Create the PTFDesc from the Qspec attached to this QB.
     */
    RowResolver rr = opParseCtx.get(input).getRowResolver();
    PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
    /*
     * 2. build Map-side Op Graph. Graph template is either:
     * Input -> PTF_map -> ReduceSink
     * or
     * Input -> ReduceSink
     *
     * Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR.
     */
    {
        PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain();
        /*
       * a. add Map-side PTF Operator if needed
       */
        if (tabDef.isTransformsRawInput()) {
            RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();
            ptfDesc.setMapSide(true);
            input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR);
            rr = opParseCtx.get(input).getRowResolver();
        }
        /*
       * b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
       */
        ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
        StringBuilder orderString = new StringBuilder();
        StringBuilder nullOrderString = new StringBuilder();
        /*
       * Use the input RR of TableScanOperator in case there is no map-side
       * reshape of input.
       * If the parent of ReduceSinkOperator is PTFOperator, use it's
       * output RR.
       */
        buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString, nullOrderString);
        input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), nullOrderString.toString(), -1, Operation.NOT_ACID);
    }
    /*
     * 3. build Reduce-side Op Graph
     */
    {
        /*
       * c. Rebuilt the QueryDef.
       * Why?
       * - so that the ExprNodeDescriptors in the QueryDef are based on the
       *   Select Operator's RowResolver
       */
        rr = opParseCtx.get(input).getRowResolver();
        ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
        /*
       * d. Construct PTF Operator.
       */
        RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR);
    }
    return input;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)

Example 73 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genUDTFPlan.

private Operator genUDTFPlan(GenericUDTF genericUDTF, String outputTableAlias, ArrayList<String> colAliases, QB qb, Operator input, boolean outerLV) throws SemanticException {
    // No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
    QBParseInfo qbp = qb.getParseInfo();
    if (!qbp.getDestToGroupBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
    }
    if (!qbp.getDestToDistributeBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
    }
    if (!qbp.getDestToSortBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
    }
    if (!qbp.getDestToClusterBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
    }
    if (!qbp.getAliasToLateralViews().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
    }
    // Use the RowResolver from the input operator to generate a input
    // ObjectInspector that can be used to initialize the UDTF. Then, the
    // resulting output object inspector can be used to make the RowResolver
    // for the UDTF operator
    RowResolver selectRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ColumnInfo> inputCols = selectRR.getColumnInfos();
    // Create the object inspector for the input columns and initialize the UDTF
    ArrayList<String> colNames = new ArrayList<String>();
    ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()];
    for (int i = 0; i < inputCols.size(); i++) {
        colNames.add(inputCols.get(i).getInternalName());
        colOIs[i] = inputCols.get(i).getObjectInspector();
    }
    StandardStructObjectInspector rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, Arrays.asList(colOIs));
    StructObjectInspector outputOI = genericUDTF.initialize(rowOI);
    int numUdtfCols = outputOI.getAllStructFieldRefs().size();
    if (colAliases.isEmpty()) {
        // user did not specfied alias names, infer names from outputOI
        for (StructField field : outputOI.getAllStructFieldRefs()) {
            colAliases.add(field.getFieldName());
        }
    }
    // Make sure that the number of column aliases in the AS clause matches
    // the number of columns output by the UDTF
    int numSuppliedAliases = colAliases.size();
    if (numUdtfCols != numSuppliedAliases) {
        throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numUdtfCols + " aliases " + "but got " + numSuppliedAliases));
    }
    // Generate the output column info's / row resolver using internal names.
    ArrayList<ColumnInfo> udtfCols = new ArrayList<ColumnInfo>();
    Iterator<String> colAliasesIter = colAliases.iterator();
    for (StructField sf : outputOI.getAllStructFieldRefs()) {
        String colAlias = colAliasesIter.next();
        assert (colAlias != null);
        // Since the UDTF operator feeds into a LVJ operator that will rename
        // all the internal names, we can just use field name from the UDTF's OI
        // as the internal name
        ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
        udtfCols.add(col);
    }
    // Create the row resolver for this operator from the output columns
    RowResolver out_rwsch = new RowResolver();
    for (int i = 0; i < udtfCols.size(); i++) {
        out_rwsch.put(outputTableAlias, colAliases.get(i), udtfCols.get(i));
    }
    // Add the UDTFOperator to the operator DAG
    Operator<?> udtf = putOpInsertMap(OperatorFactory.getAndMakeChild(new UDTFDesc(genericUDTF, outerLV), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    return udtf;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) UDTFDesc(org.apache.hadoop.hive.ql.plan.UDTFDesc) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 74 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genReduceSinkPlan.

@SuppressWarnings("nls")
private Operator genReduceSinkPlan(Operator<?> input, ArrayList<ExprNodeDesc> partitionCols, ArrayList<ExprNodeDesc> sortCols, String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, boolean pullConstants) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(input));
    ArrayList<ExprNodeDesc> newSortCols = new ArrayList<ExprNodeDesc>();
    StringBuilder newSortOrder = new StringBuilder();
    StringBuilder newNullOrder = new StringBuilder();
    ArrayList<ExprNodeDesc> sortColsBack = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < sortCols.size(); i++) {
        ExprNodeDesc sortCol = sortCols.get(i);
        // we are pulling constants but this is not a constant
        if (!pullConstants || !(sortCol instanceof ExprNodeConstantDesc)) {
            newSortCols.add(sortCol);
            newSortOrder.append(sortOrder.charAt(i));
            newNullOrder.append(nullOrder.charAt(i));
            sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input));
        }
    }
    // For the generation of the values expression just get the inputs
    // signature and generate field expressions for those
    RowResolver rsRR = new RowResolver();
    ArrayList<String> outputColumns = new ArrayList<String>();
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> valueColsBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ExprNodeDesc> constantCols = new ArrayList<ExprNodeDesc>();
    ArrayList<ColumnInfo> columnInfos = inputRR.getColumnInfos();
    int[] index = new int[columnInfos.size()];
    for (int i = 0; i < index.length; i++) {
        ColumnInfo colInfo = columnInfos.get(i);
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input);
        if (pullConstants && valueBack instanceof ExprNodeConstantDesc) {
            // ignore, it will be generated by SEL op
            index[i] = Integer.MAX_VALUE;
            constantCols.add(valueBack);
            continue;
        }
        int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack);
        if (kindex >= 0) {
            index[i] = kindex;
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setTabAlias(nm[0]);
            rsRR.put(nm[0], nm[1], newColInfo);
            if (nm2 != null) {
                rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
            }
            continue;
        }
        int vindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, valueColsBack);
        if (vindex >= 0) {
            index[i] = -vindex - 1;
            continue;
        }
        index[i] = -valueCols.size() - 1;
        String outputColName = getColumnInternalName(valueCols.size());
        valueCols.add(value);
        valueColsBack.add(valueBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setTabAlias(nm[0]);
        rsRR.put(nm[0], nm[1], newColInfo);
        if (nm2 != null) {
            rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
        }
        outputColumns.add(outputColName);
    }
    dummy.setParentOperators(null);
    ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(newSortCols, valueCols, outputColumns, false, -1, partitionCols, newSortOrder.toString(), newNullOrder.toString(), numReducers, acidOp);
    Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR);
    List<String> keyColNames = rsdesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), sortCols.get(i));
    }
    List<String> valueColNames = rsdesc.getOutputValueColumnNames();
    for (int i = 0; i < valueColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i));
    }
    interim.setColumnExprMap(colExprMap);
    RowResolver selectRR = new RowResolver();
    ArrayList<ExprNodeDesc> selCols = new ArrayList<ExprNodeDesc>();
    ArrayList<String> selOutputCols = new ArrayList<String>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
    Iterator<ExprNodeDesc> constants = constantCols.iterator();
    for (int i = 0; i < index.length; i++) {
        ColumnInfo prev = columnInfos.get(i);
        String[] nm = inputRR.reverseLookup(prev.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName());
        ColumnInfo info = new ColumnInfo(prev);
        ExprNodeDesc desc;
        if (index[i] == Integer.MAX_VALUE) {
            desc = constants.next();
        } else {
            String field;
            if (index[i] >= 0) {
                field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
            } else {
                field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
            }
            desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
        }
        selCols.add(desc);
        String internalName = getColumnInternalName(i);
        info.setInternalName(internalName);
        selectRR.put(nm[0], nm[1], info);
        if (nm2 != null) {
            selectRR.addMappingOnly(nm2[0], nm2[1], info);
        }
        selOutputCols.add(internalName);
        selColExprMap.put(internalName, desc);
    }
    SelectDesc select = new SelectDesc(selCols, selOutputCols);
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(select, new RowSchema(selectRR.getColumnInfos()), interim), selectRR);
    output.setColumnExprMap(selColExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 75 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genConversionSelectOperator.

/**
 * Generate the conversion SelectOperator that converts the columns into the
 * types that are expected by the table_desc.
 */
Operator genConversionSelectOperator(String dest, QB qb, Operator input, TableDesc table_desc, DynamicPartitionCtx dpCtx) throws SemanticException {
    StructObjectInspector oi = null;
    try {
        Deserializer deserializer = table_desc.getDeserializerClass().newInstance();
        SerDeUtils.initializeSerDe(deserializer, conf, table_desc.getProperties(), null);
        oi = (StructObjectInspector) deserializer.getObjectInspector();
    } catch (Exception e) {
        throw new SemanticException(e);
    }
    // Check column number
    List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
    boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING);
    ArrayList<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
    int inColumnCnt = rowFields.size();
    int outColumnCnt = tableFields.size();
    if (dynPart && dpCtx != null) {
        outColumnCnt += dpCtx.getNumDPCols();
    }
    // The numbers of input columns and output columns should match for regular query
    if (!updating(dest) && !deleting(dest) && inColumnCnt != outColumnCnt) {
        String reason = "Table " + dest + " has " + outColumnCnt + " columns, but query has " + inColumnCnt + " columns.";
        throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
    }
    // Check column types
    boolean converted = false;
    int columnNumber = tableFields.size();
    ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
    // MetadataTypedColumnsetSerDe does not need type conversions because it
    // does the conversion to String by itself.
    boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(MetadataTypedColumnsetSerDe.class);
    boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals(LazySimpleSerDe.class);
    if (!isMetaDataSerDe && !deleting(dest)) {
        // offset by 1 so that we don't try to convert the ROW__ID
        if (updating(dest)) {
            expressions.add(new ExprNodeColumnDesc(rowFields.get(0).getType(), rowFields.get(0).getInternalName(), "", true));
        }
        // here only deals with non-partition columns. We deal with partition columns next
        for (int i = 0; i < columnNumber; i++) {
            int rowFieldsOffset = updating(dest) ? i + 1 : i;
            ObjectInspector tableFieldOI = tableFields.get(i).getFieldObjectInspector();
            TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
            TypeInfo rowFieldTypeInfo = rowFields.get(rowFieldsOffset).getType();
            ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(rowFieldsOffset).getInternalName(), "", false, rowFields.get(rowFieldsOffset).isSkewedCol());
            // Thus, we still keep the conversion.
            if (!tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
                // need to do some conversions here
                converted = true;
                if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
                    // cannot convert to complex types
                    column = null;
                } else {
                    column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
                }
                if (column == null) {
                    String reason = "Cannot convert column " + i + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
                    throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(qb.getParseInfo().getDestForClause(dest), reason));
                }
            }
            expressions.add(column);
        }
    }
    // deal with dynamic partition columns: convert ExprNodeDesc type to String??
    if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) {
        // DP columns starts with tableFields.size()
        for (int i = tableFields.size() + (updating(dest) ? 1 : 0); i < rowFields.size(); ++i) {
            TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
            ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", true);
            expressions.add(column);
        }
    // converted = true; // [TODO]: should we check & convert type to String and set it to true?
    }
    if (converted) {
        // add the select operator
        RowResolver rowResolver = new RowResolver();
        ArrayList<String> colNames = new ArrayList<String>();
        Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
        for (int i = 0; i < expressions.size(); i++) {
            String name = getColumnInternalName(i);
            rowResolver.put("", name, new ColumnInfo(name, expressions.get(i).getTypeInfo(), "", false));
            colNames.add(name);
            colExprMap.put(name, expressions.get(i));
        }
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(expressions, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
        input.setColumnExprMap(colExprMap);
    }
    return input;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PatternSyntaxException(java.util.regex.PatternSyntaxException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(java.security.AccessControlException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)86 ArrayList (java.util.ArrayList)65 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)65 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)62 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)50 HashMap (java.util.HashMap)45 Operator (org.apache.hadoop.hive.ql.exec.Operator)42 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)39 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)38 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)37 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)34 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)34 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)31 LinkedHashMap (java.util.LinkedHashMap)30 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)28 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)28 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)27 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)25 NotNullConstraint (org.apache.hadoop.hive.ql.metadata.NotNullConstraint)22