use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class HiveInputFormat method pushFilters.
public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
// ensure filters are not set from previous pushFilters
jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
jobConf.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
Utilities.unsetSchemaEvolution(jobConf);
TableScanDesc scanDesc = tableScan.getConf();
if (scanDesc == null) {
return;
}
Utilities.addTableSchemaToConf(jobConf, tableScan);
// construct column name list and types for reference by filter push down
Utilities.setColumnNameList(jobConf, tableScan);
Utilities.setColumnTypeList(jobConf, tableScan);
// push down filters
ExprNodeGenericFuncDesc filterExpr = (ExprNodeGenericFuncDesc) scanDesc.getFilterExpr();
if (filterExpr == null) {
return;
}
String serializedFilterObj = scanDesc.getSerializedFilterObject();
String serializedFilterExpr = scanDesc.getSerializedFilterExpr();
boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null;
if (!hasObj) {
Serializable filterObject = scanDesc.getFilterObject();
if (filterObject != null) {
serializedFilterObj = SerializationUtilities.serializeObject(filterObject);
}
}
if (serializedFilterObj != null) {
jobConf.set(TableScanDesc.FILTER_OBJECT_CONF_STR, serializedFilterObj);
}
if (!hasExpr) {
serializedFilterExpr = SerializationUtilities.serializeExpression(filterExpr);
}
String filterText = filterExpr.getExprString();
if (LOG.isDebugEnabled()) {
LOG.debug("Pushdown initiated with filterText = " + filterText + ", filterExpr = " + filterExpr + ", serializedFilterExpr = " + serializedFilterExpr + " (" + (hasExpr ? "desc" : "new") + ")" + (serializedFilterObj == null ? "" : (", serializedFilterObj = " + serializedFilterObj + " (" + (hasObj ? "desc" : "new") + ")")));
}
jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText);
jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, serializedFilterExpr);
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class MapOperator method cloneConfsForNestedColPruning.
/**
* For each source table, combine the nested column pruning information from all its
* table scan descriptors and set it in a configuration copy. This is necessary since
* the configuration property "READ_NESTED_COLUMN_PATH_CONF_STR" is set on a per-table
* basis, so we can't just use a single configuration for all the tables.
*/
private Map<String, Configuration> cloneConfsForNestedColPruning(Configuration hconf) {
Map<String, Configuration> tableNameToConf = new HashMap<>();
for (Map.Entry<Path, ArrayList<String>> e : conf.getPathToAliases().entrySet()) {
List<String> aliases = e.getValue();
if (aliases == null || aliases.isEmpty()) {
continue;
}
String tableName = conf.getPathToPartitionInfo().get(e.getKey()).getTableName();
if (tableNameToConf.containsKey(tableName)) {
continue;
}
for (String alias : aliases) {
Operator<?> rootOp = conf.getAliasToWork().get(alias);
if (!(rootOp instanceof TableScanOperator)) {
continue;
}
TableScanDesc tableScanDesc = ((TableScanOperator) rootOp).getConf();
List<String> nestedColumnPaths = tableScanDesc.getNeededNestedColumnPaths();
if (nestedColumnPaths == null || nestedColumnPaths.isEmpty()) {
continue;
}
if (!tableNameToConf.containsKey(tableName)) {
Configuration clonedConf = new Configuration(hconf);
clonedConf.unset(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR);
tableNameToConf.put(tableName, clonedConf);
}
Configuration newConf = tableNameToConf.get(tableName);
ColumnProjectionUtils.appendNestedColumnPaths(newConf, nestedColumnPaths);
}
}
// Assign tables without nested column pruning info to the default conf
for (PartitionDesc pd : conf.getPathToPartitionInfo().values()) {
if (!tableNameToConf.containsKey(pd.getTableName())) {
tableNameToConf.put(pd.getTableName(), hconf);
}
}
for (PartitionDesc pd : conf.getAliasToPartnInfo().values()) {
if (!tableNameToConf.containsKey(pd.getTableName())) {
tableNameToConf.put(pd.getTableName(), hconf);
}
}
return tableNameToConf;
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class HiveOpConverter method visit.
/**
* TODO: 1. PPD needs to get pushed in to TS
*
* @param scanRel
* @return
*/
OpAttr visit(HiveTableScan scanRel) {
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
}
RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable();
// 1. Setup TableScan Desc
// 1.1 Build col details used by scan
ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
List<VirtualColumn> virtualCols = new ArrayList<VirtualColumn>();
List<Integer> neededColumnIDs = new ArrayList<Integer>();
List<String> neededColumnNames = new ArrayList<String>();
Set<Integer> vcolsInCalcite = new HashSet<Integer>();
List<String> partColNames = new ArrayList<String>();
Map<Integer, VirtualColumn> VColsMap = HiveCalciteUtil.getVColsMap(ht.getVirtualCols(), ht.getNoOfNonVirtualCols());
Map<Integer, ColumnInfo> posToPartColInfo = ht.getPartColInfoMap();
Map<Integer, ColumnInfo> posToNonPartColInfo = ht.getNonPartColInfoMap();
List<Integer> neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT();
List<String> scanColNames = scanRel.getRowType().getFieldNames();
String tableAlias = scanRel.getConcatQbIDAlias();
String colName;
ColumnInfo colInfo;
VirtualColumn vc;
for (int index = 0; index < scanRel.getRowType().getFieldList().size(); index++) {
colName = scanColNames.get(index);
if (VColsMap.containsKey(index)) {
vc = VColsMap.get(index);
virtualCols.add(vc);
colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden());
vcolsInCalcite.add(index);
} else if (posToPartColInfo.containsKey(index)) {
partColNames.add(colName);
colInfo = posToPartColInfo.get(index);
vcolsInCalcite.add(index);
} else {
colInfo = posToNonPartColInfo.get(index);
}
colInfos.add(colInfo);
if (neededColIndxsFrmReloptHT.contains(index)) {
neededColumnIDs.add(index);
neededColumnNames.add(colName);
}
}
// 1.2 Create TableScanDesc
TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD());
// 1.3. Set Partition cols in TSDesc
tsd.setPartColumns(partColNames);
// 1.4. Set needed cols in TSDesc
tsd.setNeededColumnIDs(neededColumnIDs);
tsd.setNeededColumns(neededColumnNames);
// 2. Setup TableScan
TableScanOperator ts = (TableScanOperator) OperatorFactory.get(semanticAnalyzer.getOpContext(), tsd, new RowSchema(colInfos));
// tablescan with same alias.
if (topOps.get(tableAlias) != null) {
tableAlias = tableAlias + this.uniqueCounter;
}
topOps.put(tableAlias, ts);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]");
}
return new OpAttr(tableAlias, vcolsInCalcite, ts);
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project parquet-mr by apache.
the class Hive012Binding method pushFilters.
private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) {
final TableScanDesc scanDesc = tableScan.getConf();
if (scanDesc == null) {
LOG.debug("Not pushing filters because TableScanDesc is null");
return;
}
// construct column name list for reference by filter push down
Utilities.setColumnNameList(jobConf, tableScan);
// push down filters
final ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
if (filterExpr == null) {
LOG.debug("Not pushing filters because FilterExpr is null");
return;
}
final String filterText = filterExpr.getExprString();
final String filterExprSerialized = Utilities.serializeExpression(filterExpr);
jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText);
jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExprSerialized);
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class DruidStorageHandler method getOperatorDescProperties.
@Override
public Map<String, String> getOperatorDescProperties(OperatorDesc operatorDesc, Map<String, String> initialProps) {
if (operatorDesc instanceof TableScanDesc) {
TableScanDesc tableScanDesc = (TableScanDesc) operatorDesc;
ExprNodeGenericFuncDesc filterExpr = tableScanDesc.getFilterExpr();
String druidQuery = initialProps.get(Constants.DRUID_QUERY_JSON);
if (filterExpr != null && druidQuery != null) {
try {
Query query = DruidStorageHandlerUtils.JSON_MAPPER.readValue(druidQuery, BaseQuery.class);
Query queryWithDynamicFilters = DruidStorageHandlerUtils.addDynamicFilters(query, filterExpr, conf, false);
Map<String, String> props = Maps.newHashMap(initialProps);
props.put(Constants.DRUID_QUERY_JSON, DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(queryWithDynamicFilters));
return props;
} catch (IOException e) {
LOG.error("Exception while deserializing druid query. Explain plan may not have final druid query", e);
}
}
}
// Case when we do not have any additional info to add.
return initialProps;
}
Aggregations