use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class HiveInputFormat method pushProjectionsAndFiltersAndAsOf.
protected void pushProjectionsAndFiltersAndAsOf(JobConf jobConf, Path splitPath) {
Path splitPathWithNoSchema = Path.getPathWithoutSchemeAndAuthority(splitPath);
if (this.mrwork == null) {
init(job);
}
if (this.mrwork.getPathToAliases() == null) {
return;
}
ArrayList<String> aliases = new ArrayList<String>();
Iterator<Entry<Path, List<String>>> iterator = this.mrwork.getPathToAliases().entrySet().iterator();
Set<Path> splitParentPaths = null;
int pathsSize = this.mrwork.getPathToAliases().entrySet().size();
while (iterator.hasNext()) {
Entry<Path, List<String>> entry = iterator.next();
Path key = entry.getKey();
// Note for HIVE-1903: for non-native tables we might only see a table location provided as path in splitPath.
// In this case the code part below should still work, as the "key" will be an exact match for splitPath.
// Also: we should not anticipate table paths to be under other tables' locations.
boolean match;
if (pathsSize > 1) {
// In such cases, use pre-computed paths for comparison
if (splitParentPaths == null) {
splitParentPaths = new HashSet<>();
FileUtils.populateParentPaths(splitParentPaths, splitPath);
FileUtils.populateParentPaths(splitParentPaths, splitPathWithNoSchema);
}
match = splitParentPaths.contains(key);
} else {
match = FileUtils.isPathWithinSubtree(splitPath, key) || FileUtils.isPathWithinSubtree(splitPathWithNoSchema, key);
}
if (match) {
List<String> list = entry.getValue();
for (String val : list) {
aliases.add(val);
}
}
}
for (String alias : aliases) {
Operator<? extends OperatorDesc> op = this.mrwork.getAliasToWork().get(alias);
if (op instanceof TableScanOperator) {
TableScanOperator ts = (TableScanOperator) op;
// push down projections.
ColumnProjectionUtils.appendReadColumns(jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
// push down filters and as of information
pushFiltersAndAsOf(jobConf, ts, this.mrwork);
AcidUtils.setAcidOperationalProperties(job, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties());
AcidUtils.setValidWriteIdList(job, ts.getConf());
}
}
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class OpProcFactory method createFilter.
protected static Object createFilter(Operator op, Map<String, List<ExprNodeDesc>> predicates, OpWalkerInfo owi) throws SemanticException {
RowSchema inputRS = op.getSchema();
// combine all predicates into a single expression
List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
Iterator<List<ExprNodeDesc>> iterator = predicates.values().iterator();
while (iterator.hasNext()) {
for (ExprNodeDesc pred : iterator.next()) {
preds = ExprNodeDescUtils.split(pred, preds);
}
}
if (preds.isEmpty()) {
return null;
}
ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc) {
boolean pushFilterToStorage;
HiveConf hiveConf = owi.getParseContext().getConf();
pushFilterToStorage = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
if (pushFilterToStorage) {
condn = pushFilterToStorageHandler((TableScanOperator) op, (ExprNodeGenericFuncDesc) condn, owi, hiveConf);
if (condn == null) {
// we pushed the whole thing down
return null;
}
}
}
// add new filter op
List<Operator<? extends OperatorDesc>> originalChilren = op.getChildOperators();
op.setChildOperators(null);
Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(new FilterDesc(condn, false), new RowSchema(inputRS.getSignature()), op);
output.setChildOperators(originalChilren);
for (Operator<? extends OperatorDesc> ch : originalChilren) {
List<Operator<? extends OperatorDesc>> parentOperators = ch.getParentOperators();
int pos = parentOperators.indexOf(op);
assert pos != -1;
parentOperators.remove(pos);
// add the new op as the old
parentOperators.add(pos, output);
}
if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
// remove the candidate filter ops
removeCandidates(op, owi);
}
// push down current ppd context to newly added filter
ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
if (walkerInfo != null) {
walkerInfo.getNonFinalCandidates().clear();
owi.putPrunedPreds(output, walkerInfo);
}
return output;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class AbstractCreateViewAnalyzer method addInputs.
private void addInputs(SemanticAnalyzer analyzer) {
inputs.addAll(analyzer.getInputs());
for (Map.Entry<String, TableScanOperator> entry : analyzer.getTopOps().entrySet()) {
String alias = entry.getKey();
TableScanOperator topOp = entry.getValue();
ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(alias, analyzer.getViewAliasToInput());
// Adds tables only for create view (PPD filter can be appended by outer query)
Table table = topOp.getConf().getTableMetadata();
PlanUtils.addInput(inputs, new ReadEntity(table, parentViewInfo));
}
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class ConvertJoinMapJoin method hasExternalTableAncestor.
private static boolean hasExternalTableAncestor(Operator op, StringBuilder sb) {
boolean result = false;
Operator ancestor = OperatorUtils.findSingleOperatorUpstream(op, TableScanOperator.class);
if (ancestor != null) {
TableScanOperator ts = (TableScanOperator) ancestor;
if (MetaStoreUtils.isExternalTable(ts.getConf().getTableMetadata().getTTable())) {
sb.append(ts.getConf().getTableMetadata().getFullyQualifiedName());
return true;
}
}
return result;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class DynamicPartitionPruningOptimization method disableSemiJoinOptDueToExternalTable.
private boolean disableSemiJoinOptDueToExternalTable(HiveConf conf, TableScanOperator ts, DynamicListContext ctx) {
boolean disableSemiJoin = false;
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS)) {
// We already have the TableScan for one side of the join. Check this now.
if (MetaStoreUtils.isExternalTable(ts.getConf().getTableMetadata().getTTable())) {
LOG.debug("Disabling semijoin optimzation on {} since it is an external table.", ts.getConf().getTableMetadata().getFullyQualifiedName());
disableSemiJoin = true;
} else {
// Check the other side of the join, using the DynamicListContext
ExprNodeDesc exprNodeDesc = ctx.getKeyCol();
ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc);
if (colExpr != null) {
// fetch table alias
ExprNodeDescUtils.ColumnOrigin columnOrigin = ExprNodeDescUtils.findColumnOrigin(exprNodeDesc, ctx.generator);
if (columnOrigin != null && columnOrigin.op instanceof TableScanOperator) {
// Join key origin has been traced to a table column. Check if the table is external.
TableScanOperator joinKeyTs = (TableScanOperator) columnOrigin.op;
if (MetaStoreUtils.isExternalTable(joinKeyTs.getConf().getTableMetadata().getTTable())) {
LOG.debug("Join key {} is from {} which is an external table. Disabling semijoin optimization.", columnOrigin.col, joinKeyTs.getConf().getTableMetadata().getFullyQualifiedName());
disableSemiJoin = true;
}
}
}
}
}
return disableSemiJoin;
}
Aggregations