Search in sources :

Example 6 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.

the class FetchOperator method initialize.

private void initialize() throws HiveException {
    if (isStatReader) {
        outputOI = work.getStatRowOI();
        return;
    }
    if (hasVC) {
        List<String> names = new ArrayList<String>(vcCols.size());
        List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(vcCols.size());
        for (VirtualColumn vc : vcCols) {
            inspectors.add(vc.getObjectInspector());
            names.add(vc.getName());
        }
        vcsOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors);
        vcValues = new Object[vcCols.size()];
    }
    if (hasVC && isPartitioned) {
        row = new Object[3];
    } else if (hasVC || isPartitioned) {
        row = new Object[2];
    } else {
        row = new Object[1];
    }
    if (isPartitioned) {
        iterPath = work.getPartDir().iterator();
        iterPartDesc = work.getPartDesc().iterator();
    } else {
        iterPath = Arrays.asList(work.getTblDir()).iterator();
        iterPartDesc = Iterators.cycle(new PartitionDesc(work.getTblDesc(), null));
    }
    outputOI = setupOutputObjectInspector();
    context = setupExecContext(operator, work.getPathLists());
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn)

Example 7 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.

the class SemanticAnalyzer method genTablePlan.

@SuppressWarnings("nls")
private Operator genTablePlan(String alias, QB qb) throws SemanticException {
    String alias_id = getAliasId(alias, qb);
    Table tab = qb.getMetaData().getSrcForAlias(alias);
    RowResolver rwsch;
    // is the table already present
    TableScanOperator top = topOps.get(alias_id);
    // Obtain table props in query
    Map<String, String> properties = qb.getTabPropsForAlias(alias);
    if (top == null) {
        // Determine row schema for TSOP.
        // Include column names from SerDe, the partition and virtual columns.
        rwsch = new RowResolver();
        try {
            // Including parameters passed in the query
            if (properties != null) {
                for (Entry<String, String> prop : properties.entrySet()) {
                    if (tab.getSerdeParam(prop.getKey()) != null) {
                        LOG.warn("SerDe property in input query overrides stored SerDe property");
                    }
                    tab.setSerdeParam(prop.getKey(), prop.getValue());
                }
            }
            // Obtain inspector for schema
            StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer().getObjectInspector();
            List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
            for (int i = 0; i < fields.size(); i++) {
                /**
           * if the column is a skewed column, use ColumnInfo accordingly
           */
                ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), alias, false);
                colInfo.setSkewedCol((isSkewedCol(alias, qb, fields.get(i).getFieldName())) ? true : false);
                rwsch.put(alias, fields.get(i).getFieldName(), colInfo);
            }
        } catch (SerDeException e) {
            throw new RuntimeException(e);
        }
        // Finally add the partitioning columns
        for (FieldSchema part_col : tab.getPartCols()) {
            LOG.trace("Adding partition col: " + part_col);
            rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true));
        }
        // put all virtual columns in RowResolver.
        Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
        // use a list for easy cumtomize
        List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
        while (vcs.hasNext()) {
            VirtualColumn vc = vcs.next();
            rwsch.put(alias, vc.getName().toLowerCase(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
            vcList.add(vc);
        }
        // Create the root of the operator tree
        TableScanDesc tsDesc = new TableScanDesc(alias, vcList, tab);
        setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch);
        SplitSample sample = nameToSplitSample.get(alias_id);
        if (sample != null && sample.getRowCount() != null) {
            tsDesc.setRowLimit(sample.getRowCount());
            nameToSplitSample.remove(alias_id);
        }
        top = (TableScanOperator) putOpInsertMap(OperatorFactory.get(getOpContext(), tsDesc, new RowSchema(rwsch.getColumnInfos())), rwsch);
        // Set insiderView so that we can skip the column authorization for this.
        top.setInsideView(qb.isInsideView() || qb.getAliasInsideView().contains(alias.toLowerCase()));
        // Add this to the list of top operators - we always start from a table
        // scan
        topOps.put(alias_id, top);
        // Add a mapping from the table scan operator to Table
        topToTable.put(top, tab);
        if (properties != null) {
            topToTableProps.put(top, properties);
            tsDesc.setOpProps(properties);
        }
    } else {
        rwsch = opParseCtx.get(top).getRowResolver();
        top.setChildOperators(null);
    }
    // check if this table is sampled and needs more than input pruning
    Operator<? extends OperatorDesc> op = top;
    TableSample ts = qb.getParseInfo().getTabSample(alias);
    if (ts != null) {
        TableScanOperator tableScanOp = top;
        tableScanOp.getConf().setTableSample(ts);
        int num = ts.getNumerator();
        int den = ts.getDenominator();
        ArrayList<ASTNode> sampleExprs = ts.getExprs();
        // TODO: Do the type checking of the expressions
        List<String> tabBucketCols = tab.getBucketCols();
        int numBuckets = tab.getNumBuckets();
        // If there are no sample cols and no bucket cols then throw an error
        if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
            throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + tab.getTableName());
        }
        if (num > den) {
            throw new SemanticException(ErrorMsg.BUCKETED_NUMERATOR_BIGGER_DENOMINATOR.getMsg() + " " + tab.getTableName());
        }
        // check if a predicate is needed
        // predicate is needed if either input pruning is not enough
        // or if input pruning is not possible
        // check if the sample columns are the same as the table bucket columns
        boolean colsEqual = true;
        if ((sampleExprs.size() != tabBucketCols.size()) && (sampleExprs.size() != 0)) {
            colsEqual = false;
        }
        for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
            boolean colFound = false;
            for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
                if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
                    break;
                }
                if (((ASTNode) sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) {
                    colFound = true;
                }
            }
            colsEqual = (colsEqual && colFound);
        }
        // Check if input can be pruned
        ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));
        // check if input pruning is enough
        if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual) && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {
            // input pruning is enough; add the filter for the optimizer to use it
            // later
            LOG.info("No need for sample filter");
            ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null);
            FilterDesc filterDesc = new FilterDesc(samplePredicate, true, new SampleDesc(ts.getNumerator(), ts.getDenominator(), tabBucketCols, true));
            filterDesc.setGenerated(true);
            op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
        } else {
            // need to add filter
            // create tableOp to be filterDesc and set as child to 'top'
            LOG.info("Need sample filter");
            ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null);
            FilterDesc filterDesc = new FilterDesc(samplePredicate, true);
            filterDesc.setGenerated(true);
            op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
        }
    } else {
        boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
        if (testMode) {
            String tabName = tab.getTableName();
            // has the user explicitly asked not to sample this table
            String unSampleTblList = conf.getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
            String[] unSampleTbls = unSampleTblList.split(",");
            boolean unsample = false;
            for (String unSampleTbl : unSampleTbls) {
                if (tabName.equalsIgnoreCase(unSampleTbl)) {
                    unsample = true;
                }
            }
            if (!unsample) {
                int numBuckets = tab.getNumBuckets();
                // If the input table is bucketed, choose the first bucket
                if (numBuckets > 0) {
                    TableSample tsSample = new TableSample(1, numBuckets);
                    tsSample.setInputPruning(true);
                    qb.getParseInfo().setTabSample(alias, tsSample);
                    ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab.getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
                    FilterDesc filterDesc = new FilterDesc(samplePred, true, new SampleDesc(tsSample.getNumerator(), tsSample.getDenominator(), tab.getBucketCols(), true));
                    filterDesc.setGenerated(true);
                    op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
                    LOG.info("No need for sample filter");
                } else {
                    // The table is not bucketed, add a dummy filter :: rand()
                    int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);
                    TableSample tsSample = new TableSample(1, freq);
                    tsSample.setInputPruning(false);
                    qb.getParseInfo().setTabSample(alias, tsSample);
                    LOG.info("Need sample filter");
                    ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer.valueOf(460476415)));
                    ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc);
                    FilterDesc filterDesc = new FilterDesc(samplePred, true);
                    filterDesc.setGenerated(true);
                    op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
                }
            }
        }
    }
    Operator output = putOpInsertMap(op, rwsch);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created Table Plan for " + alias + " " + op.toString());
    }
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) SampleDesc(org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 8 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project hive by apache.

the class TableMask method create.

public String create(HivePrivilegeObject privObject, MaskAndFilterInfo maskAndFilterInfo) throws SemanticException {
    boolean doColumnMasking = false;
    boolean doRowFiltering = false;
    StringBuilder sb = new StringBuilder();
    sb.append("(SELECT ");
    boolean firstOne = true;
    List<String> exprs = privObject.getCellValueTransformers();
    if (exprs != null) {
        if (exprs.size() != privObject.getColumns().size()) {
            throw new SemanticException("Expect " + privObject.getColumns().size() + " columns in " + privObject.getObjectName() + ", but only find " + exprs.size());
        }
        List<String> colTypes = maskAndFilterInfo.colTypes;
        for (int index = 0; index < exprs.size(); index++) {
            String expr = exprs.get(index);
            if (expr == null) {
                throw new SemanticException("Expect string type CellValueTransformer in " + privObject.getObjectName() + ", but only find null");
            }
            if (!firstOne) {
                sb.append(", ");
            } else {
                firstOne = false;
            }
            String colName = privObject.getColumns().get(index);
            if (!expr.equals(colName)) {
                // CAST(expr AS COLTYPE) AS COLNAME
                sb.append("CAST(" + expr + " AS " + colTypes.get(index) + ") AS " + HiveUtils.unparseIdentifier(colName, conf));
                doColumnMasking = true;
            } else {
                sb.append(HiveUtils.unparseIdentifier(colName, conf));
            }
        }
    }
    if (!doColumnMasking) {
        sb = new StringBuilder();
        sb.append("(SELECT *");
    }
    if (!maskAndFilterInfo.isView) {
        // put all virtual columns in RowResolver.
        Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
        while (vcs.hasNext()) {
            VirtualColumn vc = vcs.next();
            sb.append(", " + vc.getName());
        }
    }
    sb.append(" FROM ");
    sb.append(HiveUtils.unparseIdentifier(privObject.getDbname(), conf));
    sb.append(".");
    sb.append(HiveUtils.unparseIdentifier(privObject.getObjectName(), conf));
    sb.append(" " + maskAndFilterInfo.additionalTabInfo);
    String filter = privObject.getRowFilterExpression();
    if (filter != null) {
        sb.append(" WHERE " + filter);
        doRowFiltering = true;
    }
    sb.append(")" + HiveUtils.unparseIdentifier(maskAndFilterInfo.alias, conf));
    if (!doColumnMasking && !doRowFiltering) {
        // nothing to do
        return null;
    } else {
        LOG.debug("TableMask creates `" + sb.toString() + "`");
        return sb.toString();
    }
}
Also used : VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn)

Example 9 with VirtualColumn

use of org.apache.hadoop.hive.ql.metadata.VirtualColumn in project SQLWindowing by hbutani.

the class HiveUtils method getRowResolver.

public static RowResolver getRowResolver(String db, String table, String alias, HiveConf conf) throws WindowingException {
    LOG.info("HiveUtils::getRowResolver invoked on " + table);
    try {
        HiveMetaStoreClient client = getClient(conf);
        db = validateDB(client, db);
        org.apache.hadoop.hive.ql.metadata.Table t = Hive.get(conf).getTable(db, table);
        StructObjectInspector rowObjectInspector = (StructObjectInspector) t.getDeserializer().getObjectInspector();
        RowResolver rwsch = getRowResolver(alias, rowObjectInspector);
        for (FieldSchema part_col : t.getPartCols()) {
            LOG.trace("Adding partition col: " + part_col);
            rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true));
        }
        Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
        //use a list for easy cumtomize
        List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
        while (vcs.hasNext()) {
            VirtualColumn vc = vcs.next();
            rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
            vcList.add(vc);
        }
        return rwsch;
    } catch (WindowingException w) {
        throw w;
    } catch (Exception me) {
        throw new WindowingException(me);
    }
}
Also used : HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) WindowingException(com.sap.hadoop.windowing.WindowingException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WindowingException(com.sap.hadoop.windowing.WindowingException) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

VirtualColumn (org.apache.hadoop.hive.ql.metadata.VirtualColumn)9 ArrayList (java.util.ArrayList)7 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 TableScanDesc (org.apache.hadoop.hive.ql.plan.TableScanDesc)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)2 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)2 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)2 WindowingException (com.sap.hadoop.windowing.WindowingException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)1 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)1