Examples with TableScanDesc - org.apache.hadoop.hive.ql.plan.TableScanDesc

Example 21 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class HiveTableScanVisitor method visit.

/**
 * TODO: 1. PPD needs to get pushed in to TS.
 */
@Override
OpAttr visit(HiveTableScan scanRel) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
    }
    RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable();
    // 1. Setup TableScan Desc
    // 1.1 Build col details used by scan
    ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
    List<VirtualColumn> virtualCols = new ArrayList<VirtualColumn>();
    List<Integer> neededColumnIDs = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    Set<Integer> vcolsInCalcite = new HashSet<Integer>();
    List<String> partColNames = new ArrayList<String>();
    Map<Integer, VirtualColumn> vColsMap = HiveCalciteUtil.getVColsMap(ht.getVirtualCols(), ht.getNoOfNonVirtualCols());
    Map<Integer, ColumnInfo> posToPartColInfo = ht.getPartColInfoMap();
    Map<Integer, ColumnInfo> posToNonPartColInfo = ht.getNonPartColInfoMap();
    List<Integer> neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT();
    List<String> scanColNames = scanRel.getRowType().getFieldNames();
    String tableAlias = scanRel.getConcatQbIDAlias();
    String colName;
    ColumnInfo colInfo;
    VirtualColumn vc;
    for (int index = 0; index < scanRel.getRowType().getFieldList().size(); index++) {
        colName = scanColNames.get(index);
        if (vColsMap.containsKey(index)) {
            vc = vColsMap.get(index);
            virtualCols.add(vc);
            colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden());
            vcolsInCalcite.add(index);
        } else if (posToPartColInfo.containsKey(index)) {
            partColNames.add(colName);
            colInfo = posToPartColInfo.get(index);
            vcolsInCalcite.add(index);
        } else {
            colInfo = posToNonPartColInfo.get(index);
        }
        colInfos.add(colInfo);
        if (neededColIndxsFrmReloptHT.contains(index)) {
            neededColumnIDs.add(index);
            neededColumnNames.add(colName);
        }
    }
    // 1.2 Create TableScanDesc
    TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD());
    // 1.3. Set Partition cols in TSDesc
    tsd.setPartColumns(partColNames);
    // 1.4. Set needed cols in TSDesc
    tsd.setNeededColumnIDs(neededColumnIDs);
    tsd.setNeededColumns(neededColumnNames);
    // 2. Setup TableScan
    TableScanOperator ts = (TableScanOperator) OperatorFactory.get(hiveOpConverter.getSemanticAnalyzer().getOpContext(), tsd, new RowSchema(colInfos));
    // tablescan with same alias.
    if (hiveOpConverter.getTopOps().get(tableAlias) != null) {
        tableAlias = tableAlias + hiveOpConverter.getUniqueCounter();
    }
    hiveOpConverter.getTopOps().put(tableAlias, ts);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]");
    }
    return new OpAttr(tableAlias, vcolsInCalcite, ts);
}

Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) HashSet(java.util.HashSet)

Example 22 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class HiveInputFormat method pushFiltersAndAsOf.

public static void pushFiltersAndAsOf(JobConf jobConf, TableScanOperator tableScan, final MapWork mrwork) {
    // Push as of information
    pushAsOf(jobConf, tableScan);
    // ensure filters are not set from previous pushFilters
    jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
    jobConf.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
    Utilities.unsetSchemaEvolution(jobConf);
    TableScanDesc scanDesc = tableScan.getConf();
    if (scanDesc == null) {
        return;
    }
    Utilities.addTableSchemaToConf(jobConf, tableScan);
    // construct column name list and types for reference by filter push down
    Utilities.setColumnNameList(jobConf, tableScan);
    Utilities.setColumnTypeList(jobConf, tableScan);
    // push down filters
    ExprNodeGenericFuncDesc filterExpr = scanDesc.getFilterExpr();
    String pruningFilter = jobConf.get(TableScanDesc.PARTITION_PRUNING_FILTER);
    // If we have a pruning filter then combine it with the original
    if (pruningFilter != null) {
        ExprNodeGenericFuncDesc pruningExpr = SerializationUtilities.deserializeExpression(pruningFilter);
        if (filterExpr != null) {
            // Combine the 2 filters with AND
            filterExpr = ExprNodeDescUtils.and(filterExpr, pruningExpr);
        } else {
            // Use the pruning filter if there was no filter before
            filterExpr = pruningExpr;
        }
        // Set the combined filter in the TableScanDesc and remove the pruning filter
        scanDesc.setFilterExpr(filterExpr);
        scanDesc.setSerializedFilterExpr(SerializationUtilities.serializeExpression(filterExpr));
        jobConf.unset(TableScanDesc.PARTITION_PRUNING_FILTER);
    }
    if (filterExpr == null) {
        return;
    }
    // since we don't clone jobConf per alias
    if (mrwork != null && mrwork.getAliases() != null && mrwork.getAliases().size() > 1 && jobConf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname).equals("mr") && (scanDesc.getTableMetadata() == null || !(scanDesc.getTableMetadata().getStorageHandler() instanceof HiveStoragePredicateHandler))) {
        return;
    }
    String serializedFilterObj = scanDesc.getSerializedFilterObject();
    String serializedFilterExpr = scanDesc.getSerializedFilterExpr();
    boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null;
    if (!hasObj) {
        Serializable filterObject = scanDesc.getFilterObject();
        if (filterObject != null) {
            serializedFilterObj = SerializationUtilities.serializeObject(filterObject);
        }
    }
    if (serializedFilterObj != null) {
        jobConf.set(TableScanDesc.FILTER_OBJECT_CONF_STR, serializedFilterObj);
    }
    if (!hasExpr) {
        serializedFilterExpr = SerializationUtilities.serializeExpression(filterExpr);
    }
    String filterText = filterExpr.getExprString();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Pushdown initiated with filterText = " + filterText + ", filterExpr = " + filterExpr + ", serializedFilterExpr = " + serializedFilterExpr + " (" + (hasExpr ? "desc" : "new") + ")" + (serializedFilterObj == null ? "" : (", serializedFilterObj = " + serializedFilterObj + " (" + (hasObj ? "desc" : "new") + ")")));
    }
    jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText);
    jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, serializedFilterExpr);
}

Also used : HiveStoragePredicateHandler(org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler) Serializable(java.io.Serializable) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)

Example 23 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class OpProcFactory method pushFilterToStorageHandler.

/**
 * Attempts to push a predicate down into a storage handler.  For
 * native tables, this is a no-op.
 *
 * @param tableScanOp table scan against which predicate applies
 *
 * @param originalPredicate predicate to be pushed down
 *
 * @param owi object walk info
 *
 * @param hiveConf Hive configuration
 *
 * @return portion of predicate which needs to be evaluated
 * by Hive as a post-filter, or null if it was possible
 * to push down the entire predicate
 */
private static ExprNodeGenericFuncDesc pushFilterToStorageHandler(TableScanOperator tableScanOp, ExprNodeGenericFuncDesc originalPredicate, OpWalkerInfo owi, HiveConf hiveConf) throws SemanticException {
    TableScanDesc tableScanDesc = tableScanOp.getConf();
    Table tbl = tableScanDesc.getTableMetadata();
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) {
        // attach the original predicate to the table scan operator for index
        // optimizations that require the pushed predicate before pcr & later
        // optimizations are applied
        tableScanDesc.setFilterExpr(originalPredicate);
    }
    if (!tbl.isNonNative()) {
        return originalPredicate;
    }
    HiveStorageHandler storageHandler = tbl.getStorageHandler();
    if (!(storageHandler instanceof HiveStoragePredicateHandler)) {
        // The storage handler does not provide predicate decomposition
        // support, so we'll implement the entire filter in Hive.  However,
        // we still provide the full predicate to the storage handler in
        // case it wants to do any of its own prefiltering.
        tableScanDesc.setFilterExpr(originalPredicate);
        return originalPredicate;
    }
    HiveStoragePredicateHandler predicateHandler = (HiveStoragePredicateHandler) storageHandler;
    JobConf jobConf = new JobConf(owi.getParseContext().getConf());
    Utilities.setColumnNameList(jobConf, tableScanOp);
    Utilities.setColumnTypeList(jobConf, tableScanOp);
    try {
        Utilities.copyTableJobPropertiesToConf(Utilities.getTableDesc(tbl), jobConf);
    } catch (Exception e) {
        throw new SemanticException(e);
    }
    Deserializer deserializer = tbl.getDeserializer();
    HiveStoragePredicateHandler.DecomposedPredicate decomposed = predicateHandler.decomposePredicate(jobConf, deserializer, originalPredicate);
    if (decomposed == null) {
        // not able to push anything down
        if (LOG.isDebugEnabled()) {
            LOG.debug("No pushdown possible for predicate:  " + originalPredicate.getExprString());
        }
        return originalPredicate;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Original predicate:  " + originalPredicate.getExprString());
        if (decomposed.pushedPredicate != null) {
            LOG.debug("Pushed predicate:  " + decomposed.pushedPredicate.getExprString());
        }
        if (decomposed.residualPredicate != null) {
            LOG.debug("Residual predicate:  " + decomposed.residualPredicate.getExprString());
        }
    }
    tableScanDesc.setFilterExpr(decomposed.pushedPredicate);
    tableScanDesc.setFilterObject(decomposed.pushedPredicateObject);
    return decomposed.residualPredicate;
}

Also used : HiveStoragePredicateHandler(org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) JobConf(org.apache.hadoop.mapred.JobConf) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 24 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class ColumnPrunerProcFactory method setupNeededColumns.

/**
 * Sets up needed columns for TSOP. Mainly, transfers column names from input
 * RowSchema as well as the needed virtual columns, into TableScanDesc.
 */
public static void setupNeededColumns(TableScanOperator scanOp, RowSchema inputRS, List<FieldNode> cols) throws SemanticException {
    List<Integer> neededColumnIds = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    List<String> neededNestedColumnPaths = new ArrayList<>();
    List<String> referencedColumnNames = new ArrayList<String>();
    TableScanDesc desc = scanOp.getConf();
    List<VirtualColumn> virtualCols = desc.getVirtualCols();
    List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();
    // add virtual columns for ANALYZE TABLE
    if (scanOp.getConf().isGatherStats()) {
        cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName()));
    }
    for (FieldNode fn : cols) {
        String column = fn.getFieldName();
        ColumnInfo colInfo = inputRS.getColumnInfo(column);
        if (colInfo == null) {
            continue;
        }
        referencedColumnNames.add(column);
        if (colInfo.getIsVirtualCol()) {
            // list.
            for (int j = 0; j < virtualCols.size(); j++) {
                VirtualColumn vc = virtualCols.get(j);
                if (vc.getName().equals(colInfo.getInternalName())) {
                    newVirtualCols.add(vc);
                }
            }
            // no need to pass virtual columns to reader.
            continue;
        }
        int position = inputRS.getPosition(column);
        if (position >= 0) {
            // get the needed columns by id and name
            neededColumnIds.add(position);
            neededColumnNames.add(column);
            neededNestedColumnPaths.addAll(fn.toPaths());
        }
    }
    desc.setVirtualCols(newVirtualCols);
    scanOp.setNeededColumnIDs(neededColumnIds);
    scanOp.setNeededColumns(neededColumnNames);
    scanOp.setNeededNestedColumnPaths(neededNestedColumnPaths);
    scanOp.setReferencedColumns(referencedColumnNames);
}

Also used : ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn)

Example 25 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class GenMapRedUtils method createTemporaryTableScanOperator.

public static TableScanOperator createTemporaryTableScanOperator(CompilationOpContext ctx, RowSchema rowSchema) {
    TableScanOperator tableScanOp = (TableScanOperator) OperatorFactory.get(ctx, new TableScanDesc(null), rowSchema);
    // Set needed columns for this dummy TableScanOperator
    List<Integer> neededColumnIds = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    List<ColumnInfo> parentColumnInfos = rowSchema.getSignature();
    for (int i = 0; i < parentColumnInfos.size(); i++) {
        neededColumnIds.add(i);
        neededColumnNames.add(parentColumnInfos.get(i).getInternalName());
    }
    tableScanOp.setNeededColumnIDs(neededColumnIds);
    tableScanOp.setNeededColumns(neededColumnNames);
    tableScanOp.setReferencedColumns(neededColumnNames);
    return tableScanOp;
}

Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo)

Aggregations

TableScanDesc (org.apache.hadoop.hive.ql.plan.TableScanDesc)28 ArrayList (java.util.ArrayList)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 Table (org.apache.hadoop.hive.ql.metadata.Table)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)7 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)4 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)4 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)4 Serializable (java.io.Serializable)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)3 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3