Search in sources :

Example 26 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class SemanticAnalyzer method genTablePlan.

@SuppressWarnings("nls")
private Operator genTablePlan(String alias, QB qb) throws SemanticException {
    String alias_id = getAliasId(alias, qb);
    Table tab = qb.getMetaData().getSrcForAlias(alias);
    RowResolver rwsch;
    // is the table already present
    TableScanOperator top = topOps.get(alias_id);
    // Obtain table props in query
    Map<String, String> properties = qb.getTabPropsForAlias(alias);
    if (top == null) {
        // Determine row schema for TSOP.
        // Include column names from SerDe, the partition and virtual columns.
        rwsch = new RowResolver();
        try {
            // Including parameters passed in the query
            if (properties != null) {
                for (Entry<String, String> prop : properties.entrySet()) {
                    if (tab.getSerdeParam(prop.getKey()) != null) {
                        LOG.warn("SerDe property in input query overrides stored SerDe property");
                    }
                    tab.setSerdeParam(prop.getKey(), prop.getValue());
                }
            }
            // Obtain inspector for schema
            StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer().getObjectInspector();
            List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
            for (int i = 0; i < fields.size(); i++) {
                /**
                 * if the column is a skewed column, use ColumnInfo accordingly
                 */
                ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), alias, false);
                colInfo.setSkewedCol(isSkewedCol(alias, qb, fields.get(i).getFieldName()));
                rwsch.put(alias, fields.get(i).getFieldName(), colInfo);
            }
        } catch (SerDeException e) {
            throw new RuntimeException(e);
        }
        // Finally add the partitioning columns
        for (FieldSchema part_col : tab.getPartCols()) {
            LOG.trace("Adding partition col: " + part_col);
            rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true));
        }
        // put all virtual columns in RowResolver.
        Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
        // use a list for easy cumtomize
        List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
        if (!tab.isNonNative()) {
            // Virtual columns are only for native tables
            while (vcs.hasNext()) {
                VirtualColumn vc = vcs.next();
                rwsch.put(alias, vc.getName().toLowerCase(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
                vcList.add(vc);
            }
        }
        // Create the root of the operator tree
        TableScanDesc tsDesc = new TableScanDesc(alias, vcList, tab);
        setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch);
        Map<String, String> tblProperties = tab.getParameters();
        Map<String, String> tblPropertiesFromQuery = qb.getTabPropsForAlias(alias);
        AcidUtils.AcidOperationalProperties acidOperationalProperties = tsDesc.getAcidOperationalProperties();
        if (acidOperationalProperties != null) {
            tsDesc.getAcidOperationalProperties().setInsertOnlyFetchBucketId((tblProperties != null && Boolean.parseBoolean(tblProperties.get(Constants.INSERT_ONLY_FETCH_BUCKET_ID))) || (tblPropertiesFromQuery != null && Boolean.parseBoolean(tblPropertiesFromQuery.get(Constants.INSERT_ONLY_FETCH_BUCKET_ID))));
            tsDesc.getAcidOperationalProperties().setFetchDeletedRows((tblProperties != null && Boolean.parseBoolean(tblProperties.get(Constants.ACID_FETCH_DELETED_ROWS))) || (tblPropertiesFromQuery != null && Boolean.parseBoolean(tblPropertiesFromQuery.get(Constants.ACID_FETCH_DELETED_ROWS))));
        }
        SplitSample sample = nameToSplitSample.get(alias_id);
        if (sample != null && sample.getRowCount() != null) {
            tsDesc.setRowLimit(sample.getRowCount());
            nameToSplitSample.remove(alias_id);
        }
        top = (TableScanOperator) putOpInsertMap(OperatorFactory.get(getOpContext(), tsDesc, new RowSchema(rwsch.getColumnInfos())), rwsch);
        // Set insiderView so that we can skip the column authorization for this.
        top.setInsideView(qb.isInsideView() || qb.getAliasInsideView().contains(alias.toLowerCase()));
        // Add this to the list of top operators - we always start from a table
        // scan
        topOps.put(alias_id, top);
        if (properties != null) {
            tsDesc.setOpProps(properties);
        }
    } else {
        rwsch = opParseCtx.get(top).getRowResolver();
        top.setChildOperators(null);
    }
    // check if this table is sampled and needs more than input pruning
    Operator<? extends OperatorDesc> op = top;
    TableSample ts = qb.getParseInfo().getTabSample(alias);
    if (ts != null) {
        TableScanOperator tableScanOp = top;
        tableScanOp.getConf().setTableSample(ts);
        int num = ts.getNumerator();
        int den = ts.getDenominator();
        List<ASTNode> sampleExprs = ts.getExprs();
        // TODO: Do the type checking of the expressions
        List<String> tabBucketCols = tab.getBucketCols();
        int numBuckets = tab.getNumBuckets();
        // If there are no sample cols and no bucket cols then throw an error
        if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
            throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + tab.getTableName());
        }
        if (num > den) {
            throw new SemanticException(ErrorMsg.BUCKETED_NUMERATOR_BIGGER_DENOMINATOR.getMsg() + " " + tab.getTableName());
        }
        // check if a predicate is needed
        // predicate is needed if either input pruning is not enough
        // or if input pruning is not possible
        // check if the sample columns are the same as the table bucket columns
        boolean colsEqual = true;
        if ((sampleExprs.size() != tabBucketCols.size()) && (sampleExprs.size() != 0)) {
            colsEqual = false;
        }
        for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
            boolean colFound = false;
            for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
                if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
                    break;
                }
                if ((sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) {
                    colFound = true;
                }
            }
            colsEqual = colFound;
        }
        // Check if input can be pruned
        ts.setInputPruning((sampleExprs.size() == 0 || colsEqual));
        // check if input pruning is enough
        if ((sampleExprs.size() == 0 || colsEqual) && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {
            // input pruning is enough; add the filter for the optimizer to use it
            // later
            LOG.info("No need for sample filter");
            ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, null, tab.getBucketingVersion());
            FilterDesc filterDesc = new FilterDesc(samplePredicate, true, new SampleDesc(ts.getNumerator(), ts.getDenominator(), tabBucketCols, true));
            filterDesc.setGenerated(true);
            op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
        } else {
            // need to add filter
            // create tableOp to be filterDesc and set as child to 'top'
            LOG.info("Need sample filter");
            ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, null, tab.getBucketingVersion());
            FilterDesc filterDesc = new FilterDesc(samplePredicate, true);
            filterDesc.setGenerated(true);
            op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
        }
    } else {
        boolean testMode = conf.getBoolVar(ConfVars.HIVETESTMODE);
        if (testMode) {
            String tabName = tab.getTableName();
            // has the user explicitly asked not to sample this table
            String unSampleTblList = conf.getVar(ConfVars.HIVETESTMODENOSAMPLE);
            String[] unSampleTbls = unSampleTblList.split(",");
            boolean unsample = false;
            for (String unSampleTbl : unSampleTbls) {
                if (tabName.equalsIgnoreCase(unSampleTbl)) {
                    unsample = true;
                }
            }
            if (!unsample) {
                int numBuckets = tab.getNumBuckets();
                // If the input table is bucketed, choose the first bucket
                if (numBuckets > 0) {
                    TableSample tsSample = new TableSample(1, numBuckets);
                    tsSample.setInputPruning(true);
                    qb.getParseInfo().setTabSample(alias, tsSample);
                    ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab.getBucketCols(), true, alias, rwsch, null, tab.getBucketingVersion());
                    FilterDesc filterDesc = new FilterDesc(samplePred, true, new SampleDesc(tsSample.getNumerator(), tsSample.getDenominator(), tab.getBucketCols(), true));
                    filterDesc.setGenerated(true);
                    op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
                    LOG.info("No need for sample filter");
                } else {
                    // The table is not bucketed, add a dummy filter :: rand()
                    int freq = conf.getIntVar(ConfVars.HIVETESTMODESAMPLEFREQ);
                    TableSample tsSample = new TableSample(1, freq);
                    tsSample.setInputPruning(false);
                    qb.getParseInfo().setTabSample(alias, tsSample);
                    LOG.info("Need sample filter");
                    ExprNodeDesc randFunc = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer.valueOf(460476415)));
                    ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, randFunc, tab.getBucketingVersion());
                    FilterDesc filterDesc = new FilterDesc(samplePred, true);
                    filterDesc.setGenerated(true);
                    op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
                }
            }
        }
    }
    Operator output = putOpInsertMap(op, rwsch);
    LOG.debug("Created Table Plan for {} {}", alias, op);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) Table(org.apache.hadoop.hive.ql.metadata.Table) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) SampleDesc(org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Example 27 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class TestSharedWorkOptimizer method addFilter.

private TableScanOperator addFilter(TableScanOperator ts, int i) {
    TableScanDesc desc = ts.getConf();
    List<ExprNodeDesc> as = Lists.newArrayList(new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(i)), new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "c1", "aa", false));
    GenericUDF udf = new GenericUDFConcat();
    ExprNodeGenericFuncDesc f1 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, udf, as);
    desc.setFilterExpr(f1);
    return ts;
}
Also used : GenericUDFConcat(org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 28 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class TestNullScanTaskDispatcher method createTableScanOperator.

private TableScanOperator createTableScanOperator(boolean isExternal) {
    TableScanOperator tso = new TableScanOperator(mock(CompilationOpContext.class));
    TableScanDesc tableScanDesc = mock(TableScanDesc.class);
    Table table = mock(Table.class);
    Map<String, String> parameterMap = new HashMap<>();
    if (isExternal) {
        parameterMap.put("EXTERNAL", "TRUE");
    }
    org.apache.hadoop.hive.metastore.api.Table ttable = mock(org.apache.hadoop.hive.metastore.api.Table.class);
    when(ttable.getParameters()).thenReturn(parameterMap);
    when(table.getTTable()).thenReturn(ttable);
    when(tableScanDesc.getTableMetadata()).thenReturn(table);
    tso.setConf(tableScanDesc);
    return tso;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc)

Aggregations

TableScanDesc (org.apache.hadoop.hive.ql.plan.TableScanDesc)28 ArrayList (java.util.ArrayList)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 Table (org.apache.hadoop.hive.ql.metadata.Table)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)7 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)4 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)4 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)4 Serializable (java.io.Serializable)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)3 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3