Search in sources :

Example 1 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class HiveInputFormat method pushFilters.

public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
    // ensure filters are not set from previous pushFilters
    jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
    jobConf.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
    Utilities.unsetSchemaEvolution(jobConf);
    TableScanDesc scanDesc = tableScan.getConf();
    if (scanDesc == null) {
        return;
    }
    Utilities.addTableSchemaToConf(jobConf, tableScan);
    // construct column name list and types for reference by filter push down
    Utilities.setColumnNameList(jobConf, tableScan);
    Utilities.setColumnTypeList(jobConf, tableScan);
    // push down filters
    ExprNodeGenericFuncDesc filterExpr = (ExprNodeGenericFuncDesc) scanDesc.getFilterExpr();
    if (filterExpr == null) {
        return;
    }
    String serializedFilterObj = scanDesc.getSerializedFilterObject();
    String serializedFilterExpr = scanDesc.getSerializedFilterExpr();
    boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null;
    if (!hasObj) {
        Serializable filterObject = scanDesc.getFilterObject();
        if (filterObject != null) {
            serializedFilterObj = SerializationUtilities.serializeObject(filterObject);
        }
    }
    if (serializedFilterObj != null) {
        jobConf.set(TableScanDesc.FILTER_OBJECT_CONF_STR, serializedFilterObj);
    }
    if (!hasExpr) {
        serializedFilterExpr = SerializationUtilities.serializeExpression(filterExpr);
    }
    String filterText = filterExpr.getExprString();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Pushdown initiated with filterText = " + filterText + ", filterExpr = " + filterExpr + ", serializedFilterExpr = " + serializedFilterExpr + " (" + (hasExpr ? "desc" : "new") + ")" + (serializedFilterObj == null ? "" : (", serializedFilterObj = " + serializedFilterObj + " (" + (hasObj ? "desc" : "new") + ")")));
    }
    jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText);
    jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, serializedFilterExpr);
}
Also used : Serializable(java.io.Serializable) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)

Example 2 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class MapOperator method cloneConfsForNestedColPruning.

/**
 * For each source table, combine the nested column pruning information from all its
 * table scan descriptors and set it in a configuration copy. This is necessary since
 * the configuration property "READ_NESTED_COLUMN_PATH_CONF_STR" is set on a per-table
 * basis, so we can't just use a single configuration for all the tables.
 */
private Map<String, Configuration> cloneConfsForNestedColPruning(Configuration hconf) {
    Map<String, Configuration> tableNameToConf = new HashMap<>();
    for (Map.Entry<Path, ArrayList<String>> e : conf.getPathToAliases().entrySet()) {
        List<String> aliases = e.getValue();
        if (aliases == null || aliases.isEmpty()) {
            continue;
        }
        String tableName = conf.getPathToPartitionInfo().get(e.getKey()).getTableName();
        if (tableNameToConf.containsKey(tableName)) {
            continue;
        }
        for (String alias : aliases) {
            Operator<?> rootOp = conf.getAliasToWork().get(alias);
            if (!(rootOp instanceof TableScanOperator)) {
                continue;
            }
            TableScanDesc tableScanDesc = ((TableScanOperator) rootOp).getConf();
            List<String> nestedColumnPaths = tableScanDesc.getNeededNestedColumnPaths();
            if (nestedColumnPaths == null || nestedColumnPaths.isEmpty()) {
                continue;
            }
            if (!tableNameToConf.containsKey(tableName)) {
                Configuration clonedConf = new Configuration(hconf);
                clonedConf.unset(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
                tableNameToConf.put(tableName, clonedConf);
            }
            Configuration newConf = tableNameToConf.get(tableName);
            ColumnProjectionUtils.appendNestedColumnPaths(newConf, nestedColumnPaths);
        }
    }
    // Assign tables without nested column pruning info to the default conf
    for (PartitionDesc pd : conf.getPathToPartitionInfo().values()) {
        if (!tableNameToConf.containsKey(pd.getTableName())) {
            tableNameToConf.put(pd.getTableName(), hconf);
        }
    }
    for (PartitionDesc pd : conf.getAliasToPartnInfo().values()) {
        if (!tableNameToConf.containsKey(pd.getTableName())) {
            tableNameToConf.put(pd.getTableName(), hconf);
        }
    }
    return tableNameToConf;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 3 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class HiveOpConverter method visit.

/**
 * TODO: 1. PPD needs to get pushed in to TS
 *
 * @param scanRel
 * @return
 */
OpAttr visit(HiveTableScan scanRel) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
    }
    RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable();
    // 1. Setup TableScan Desc
    // 1.1 Build col details used by scan
    ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
    List<VirtualColumn> virtualCols = new ArrayList<VirtualColumn>();
    List<Integer> neededColumnIDs = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    Set<Integer> vcolsInCalcite = new HashSet<Integer>();
    List<String> partColNames = new ArrayList<String>();
    Map<Integer, VirtualColumn> VColsMap = HiveCalciteUtil.getVColsMap(ht.getVirtualCols(), ht.getNoOfNonVirtualCols());
    Map<Integer, ColumnInfo> posToPartColInfo = ht.getPartColInfoMap();
    Map<Integer, ColumnInfo> posToNonPartColInfo = ht.getNonPartColInfoMap();
    List<Integer> neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT();
    List<String> scanColNames = scanRel.getRowType().getFieldNames();
    String tableAlias = scanRel.getConcatQbIDAlias();
    String colName;
    ColumnInfo colInfo;
    VirtualColumn vc;
    for (int index = 0; index < scanRel.getRowType().getFieldList().size(); index++) {
        colName = scanColNames.get(index);
        if (VColsMap.containsKey(index)) {
            vc = VColsMap.get(index);
            virtualCols.add(vc);
            colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden());
            vcolsInCalcite.add(index);
        } else if (posToPartColInfo.containsKey(index)) {
            partColNames.add(colName);
            colInfo = posToPartColInfo.get(index);
            vcolsInCalcite.add(index);
        } else {
            colInfo = posToNonPartColInfo.get(index);
        }
        colInfos.add(colInfo);
        if (neededColIndxsFrmReloptHT.contains(index)) {
            neededColumnIDs.add(index);
            neededColumnNames.add(colName);
        }
    }
    // 1.2 Create TableScanDesc
    TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD());
    // 1.3. Set Partition cols in TSDesc
    tsd.setPartColumns(partColNames);
    // 1.4. Set needed cols in TSDesc
    tsd.setNeededColumnIDs(neededColumnIDs);
    tsd.setNeededColumns(neededColumnNames);
    // 2. Setup TableScan
    TableScanOperator ts = (TableScanOperator) OperatorFactory.get(semanticAnalyzer.getOpContext(), tsd, new RowSchema(colInfos));
    // tablescan with same alias.
    if (topOps.get(tableAlias) != null) {
        tableAlias = tableAlias + this.uniqueCounter;
    }
    topOps.put(tableAlias, ts);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]");
    }
    return new OpAttr(tableAlias, vcolsInCalcite, ts);
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) HashSet(java.util.HashSet)

Example 4 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project parquet-mr by apache.

the class Hive012Binding method pushFilters.

private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) {
    final TableScanDesc scanDesc = tableScan.getConf();
    if (scanDesc == null) {
        LOG.debug("Not pushing filters because TableScanDesc is null");
        return;
    }
    // construct column name list for reference by filter push down
    Utilities.setColumnNameList(jobConf, tableScan);
    // push down filters
    final ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
    if (filterExpr == null) {
        LOG.debug("Not pushing filters because FilterExpr is null");
        return;
    }
    final String filterText = filterExpr.getExprString();
    final String filterExprSerialized = Utilities.serializeExpression(filterExpr);
    jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText);
    jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExprSerialized);
}
Also used : TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 5 with TableScanDesc

use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.

the class DruidStorageHandler method getOperatorDescProperties.

@Override
public Map<String, String> getOperatorDescProperties(OperatorDesc operatorDesc, Map<String, String> initialProps) {
    if (operatorDesc instanceof TableScanDesc) {
        TableScanDesc tableScanDesc = (TableScanDesc) operatorDesc;
        ExprNodeGenericFuncDesc filterExpr = tableScanDesc.getFilterExpr();
        String druidQuery = initialProps.get(Constants.DRUID_QUERY_JSON);
        if (filterExpr != null && druidQuery != null) {
            try {
                Query query = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, BaseQuery.class);
                Query queryWithDynamicFilters = DruidStorageHandlerUtils.addDynamicFilters(query, filterExpr, conf, false);
                Map<String, String> props = Maps.newHashMap(initialProps);
                props.put(Constants.DRUID_QUERY_JSON, DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(queryWithDynamicFilters));
                return props;
            } catch (IOException e) {
                LOG.error("Exception while deserializing druid query. Explain plan may not have final druid query", e);
            }
        }
    }
    // Case when we do not have any additional info to add.
    return initialProps;
}
Also used : BaseQuery(org.apache.druid.query.BaseQuery) Query(org.apache.druid.query.Query) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) IOException(java.io.IOException)

Aggregations

TableScanDesc (org.apache.hadoop.hive.ql.plan.TableScanDesc)28 ArrayList (java.util.ArrayList)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 Table (org.apache.hadoop.hive.ql.metadata.Table)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)7 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)4 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)4 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)4 Serializable (java.io.Serializable)3 List (java.util.List)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)3 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3