use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class HiveTableScanVisitor method visit.
/**
* TODO: 1. PPD needs to get pushed in to TS.
*/
@Override
OpAttr visit(HiveTableScan scanRel) {
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
}
RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable();
// 1. Setup TableScan Desc
// 1.1 Build col details used by scan
ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
List<VirtualColumn> virtualCols = new ArrayList<VirtualColumn>();
List<Integer> neededColumnIDs = new ArrayList<Integer>();
List<String> neededColumnNames = new ArrayList<String>();
Set<Integer> vcolsInCalcite = new HashSet<Integer>();
List<String> partColNames = new ArrayList<String>();
Map<Integer, VirtualColumn> vColsMap = HiveCalciteUtil.getVColsMap(ht.getVirtualCols(), ht.getNoOfNonVirtualCols());
Map<Integer, ColumnInfo> posToPartColInfo = ht.getPartColInfoMap();
Map<Integer, ColumnInfo> posToNonPartColInfo = ht.getNonPartColInfoMap();
List<Integer> neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT();
List<String> scanColNames = scanRel.getRowType().getFieldNames();
String tableAlias = scanRel.getConcatQbIDAlias();
String colName;
ColumnInfo colInfo;
VirtualColumn vc;
for (int index = 0; index < scanRel.getRowType().getFieldList().size(); index++) {
colName = scanColNames.get(index);
if (vColsMap.containsKey(index)) {
vc = vColsMap.get(index);
virtualCols.add(vc);
colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden());
vcolsInCalcite.add(index);
} else if (posToPartColInfo.containsKey(index)) {
partColNames.add(colName);
colInfo = posToPartColInfo.get(index);
vcolsInCalcite.add(index);
} else {
colInfo = posToNonPartColInfo.get(index);
}
colInfos.add(colInfo);
if (neededColIndxsFrmReloptHT.contains(index)) {
neededColumnIDs.add(index);
neededColumnNames.add(colName);
}
}
// 1.2 Create TableScanDesc
TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD());
// 1.3. Set Partition cols in TSDesc
tsd.setPartColumns(partColNames);
// 1.4. Set needed cols in TSDesc
tsd.setNeededColumnIDs(neededColumnIDs);
tsd.setNeededColumns(neededColumnNames);
// 2. Setup TableScan
TableScanOperator ts = (TableScanOperator) OperatorFactory.get(hiveOpConverter.getSemanticAnalyzer().getOpContext(), tsd, new RowSchema(colInfos));
// tablescan with same alias.
if (hiveOpConverter.getTopOps().get(tableAlias) != null) {
tableAlias = tableAlias + hiveOpConverter.getUniqueCounter();
}
hiveOpConverter.getTopOps().put(tableAlias, ts);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]");
}
return new OpAttr(tableAlias, vcolsInCalcite, ts);
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class HiveInputFormat method pushFiltersAndAsOf.
public static void pushFiltersAndAsOf(JobConf jobConf, TableScanOperator tableScan, final MapWork mrwork) {
// Push as of information
pushAsOf(jobConf, tableScan);
// ensure filters are not set from previous pushFilters
jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
jobConf.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
Utilities.unsetSchemaEvolution(jobConf);
TableScanDesc scanDesc = tableScan.getConf();
if (scanDesc == null) {
return;
}
Utilities.addTableSchemaToConf(jobConf, tableScan);
// construct column name list and types for reference by filter push down
Utilities.setColumnNameList(jobConf, tableScan);
Utilities.setColumnTypeList(jobConf, tableScan);
// push down filters
ExprNodeGenericFuncDesc filterExpr = scanDesc.getFilterExpr();
String pruningFilter = jobConf.get(TableScanDesc.PARTITION_PRUNING_FILTER);
// If we have a pruning filter then combine it with the original
if (pruningFilter != null) {
ExprNodeGenericFuncDesc pruningExpr = SerializationUtilities.deserializeExpression(pruningFilter);
if (filterExpr != null) {
// Combine the 2 filters with AND
filterExpr = ExprNodeDescUtils.and(filterExpr, pruningExpr);
} else {
// Use the pruning filter if there was no filter before
filterExpr = pruningExpr;
}
// Set the combined filter in the TableScanDesc and remove the pruning filter
scanDesc.setFilterExpr(filterExpr);
scanDesc.setSerializedFilterExpr(SerializationUtilities.serializeExpression(filterExpr));
jobConf.unset(TableScanDesc.PARTITION_PRUNING_FILTER);
}
if (filterExpr == null) {
return;
}
// since we don't clone jobConf per alias
if (mrwork != null && mrwork.getAliases() != null && mrwork.getAliases().size() > 1 && jobConf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname).equals("mr") && (scanDesc.getTableMetadata() == null || !(scanDesc.getTableMetadata().getStorageHandler() instanceof HiveStoragePredicateHandler))) {
return;
}
String serializedFilterObj = scanDesc.getSerializedFilterObject();
String serializedFilterExpr = scanDesc.getSerializedFilterExpr();
boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null;
if (!hasObj) {
Serializable filterObject = scanDesc.getFilterObject();
if (filterObject != null) {
serializedFilterObj = SerializationUtilities.serializeObject(filterObject);
}
}
if (serializedFilterObj != null) {
jobConf.set(TableScanDesc.FILTER_OBJECT_CONF_STR, serializedFilterObj);
}
if (!hasExpr) {
serializedFilterExpr = SerializationUtilities.serializeExpression(filterExpr);
}
String filterText = filterExpr.getExprString();
if (LOG.isDebugEnabled()) {
LOG.debug("Pushdown initiated with filterText = " + filterText + ", filterExpr = " + filterExpr + ", serializedFilterExpr = " + serializedFilterExpr + " (" + (hasExpr ? "desc" : "new") + ")" + (serializedFilterObj == null ? "" : (", serializedFilterObj = " + serializedFilterObj + " (" + (hasObj ? "desc" : "new") + ")")));
}
jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText);
jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, serializedFilterExpr);
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class OpProcFactory method pushFilterToStorageHandler.
/**
* Attempts to push a predicate down into a storage handler. For
* native tables, this is a no-op.
*
* @param tableScanOp table scan against which predicate applies
*
* @param originalPredicate predicate to be pushed down
*
* @param owi object walk info
*
* @param hiveConf Hive configuration
*
* @return portion of predicate which needs to be evaluated
* by Hive as a post-filter, or null if it was possible
* to push down the entire predicate
*/
private static ExprNodeGenericFuncDesc pushFilterToStorageHandler(TableScanOperator tableScanOp, ExprNodeGenericFuncDesc originalPredicate, OpWalkerInfo owi, HiveConf hiveConf) throws SemanticException {
TableScanDesc tableScanDesc = tableScanOp.getConf();
Table tbl = tableScanDesc.getTableMetadata();
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) {
// attach the original predicate to the table scan operator for index
// optimizations that require the pushed predicate before pcr & later
// optimizations are applied
tableScanDesc.setFilterExpr(originalPredicate);
}
if (!tbl.isNonNative()) {
return originalPredicate;
}
HiveStorageHandler storageHandler = tbl.getStorageHandler();
if (!(storageHandler instanceof HiveStoragePredicateHandler)) {
// The storage handler does not provide predicate decomposition
// support, so we'll implement the entire filter in Hive. However,
// we still provide the full predicate to the storage handler in
// case it wants to do any of its own prefiltering.
tableScanDesc.setFilterExpr(originalPredicate);
return originalPredicate;
}
HiveStoragePredicateHandler predicateHandler = (HiveStoragePredicateHandler) storageHandler;
JobConf jobConf = new JobConf(owi.getParseContext().getConf());
Utilities.setColumnNameList(jobConf, tableScanOp);
Utilities.setColumnTypeList(jobConf, tableScanOp);
try {
Utilities.copyTableJobPropertiesToConf(Utilities.getTableDesc(tbl), jobConf);
} catch (Exception e) {
throw new SemanticException(e);
}
Deserializer deserializer = tbl.getDeserializer();
HiveStoragePredicateHandler.DecomposedPredicate decomposed = predicateHandler.decomposePredicate(jobConf, deserializer, originalPredicate);
if (decomposed == null) {
// not able to push anything down
if (LOG.isDebugEnabled()) {
LOG.debug("No pushdown possible for predicate: " + originalPredicate.getExprString());
}
return originalPredicate;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Original predicate: " + originalPredicate.getExprString());
if (decomposed.pushedPredicate != null) {
LOG.debug("Pushed predicate: " + decomposed.pushedPredicate.getExprString());
}
if (decomposed.residualPredicate != null) {
LOG.debug("Residual predicate: " + decomposed.residualPredicate.getExprString());
}
}
tableScanDesc.setFilterExpr(decomposed.pushedPredicate);
tableScanDesc.setFilterObject(decomposed.pushedPredicateObject);
return decomposed.residualPredicate;
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class ColumnPrunerProcFactory method setupNeededColumns.
/**
* Sets up needed columns for TSOP. Mainly, transfers column names from input
* RowSchema as well as the needed virtual columns, into TableScanDesc.
*/
public static void setupNeededColumns(TableScanOperator scanOp, RowSchema inputRS, List<FieldNode> cols) throws SemanticException {
List<Integer> neededColumnIds = new ArrayList<Integer>();
List<String> neededColumnNames = new ArrayList<String>();
List<String> neededNestedColumnPaths = new ArrayList<>();
List<String> referencedColumnNames = new ArrayList<String>();
TableScanDesc desc = scanOp.getConf();
List<VirtualColumn> virtualCols = desc.getVirtualCols();
List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();
// add virtual columns for ANALYZE TABLE
if (scanOp.getConf().isGatherStats()) {
cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName()));
}
for (FieldNode fn : cols) {
String column = fn.getFieldName();
ColumnInfo colInfo = inputRS.getColumnInfo(column);
if (colInfo == null) {
continue;
}
referencedColumnNames.add(column);
if (colInfo.getIsVirtualCol()) {
// list.
for (int j = 0; j < virtualCols.size(); j++) {
VirtualColumn vc = virtualCols.get(j);
if (vc.getName().equals(colInfo.getInternalName())) {
newVirtualCols.add(vc);
}
}
// no need to pass virtual columns to reader.
continue;
}
int position = inputRS.getPosition(column);
if (position >= 0) {
// get the needed columns by id and name
neededColumnIds.add(position);
neededColumnNames.add(column);
neededNestedColumnPaths.addAll(fn.toPaths());
}
}
desc.setVirtualCols(newVirtualCols);
scanOp.setNeededColumnIDs(neededColumnIds);
scanOp.setNeededColumns(neededColumnNames);
scanOp.setNeededNestedColumnPaths(neededNestedColumnPaths);
scanOp.setReferencedColumns(referencedColumnNames);
}
use of org.apache.hadoop.hive.ql.plan.TableScanDesc in project hive by apache.
the class GenMapRedUtils method createTemporaryTableScanOperator.
public static TableScanOperator createTemporaryTableScanOperator(CompilationOpContext ctx, RowSchema rowSchema) {
TableScanOperator tableScanOp = (TableScanOperator) OperatorFactory.get(ctx, new TableScanDesc(null), rowSchema);
// Set needed columns for this dummy TableScanOperator
List<Integer> neededColumnIds = new ArrayList<Integer>();
List<String> neededColumnNames = new ArrayList<String>();
List<ColumnInfo> parentColumnInfos = rowSchema.getSignature();
for (int i = 0; i < parentColumnInfos.size(); i++) {
neededColumnIds.add(i);
neededColumnNames.add(parentColumnInfos.get(i).getInternalName());
}
tableScanOp.setNeededColumnIDs(neededColumnIds);
tableScanOp.setNeededColumns(neededColumnNames);
tableScanOp.setReferencedColumns(neededColumnNames);
return tableScanOp;
}
Aggregations