use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class NullScanTaskDispatcher method processAlias.
private void processAlias(MapWork work, HashSet<TableScanOperator> tableScans) {
ArrayList<String> aliases = new ArrayList<String>();
for (TableScanOperator tso : tableScans) {
// should not apply this for non-native table
if (tso.getConf().getTableMetadata().getStorageHandler() != null) {
continue;
}
String alias = getAliasForTableScanOperator(work, tso);
aliases.add(alias);
tso.getConf().setIsMetadataOnly(true);
}
// group path alias according to work
LinkedHashMap<Path, ArrayList<String>> candidates = new LinkedHashMap<>();
for (Path path : work.getPaths()) {
ArrayList<String> aliasesAffected = work.getPathToAliases().get(path);
if (aliasesAffected != null && aliasesAffected.size() > 0) {
candidates.put(path, aliasesAffected);
}
}
for (Entry<Path, ArrayList<String>> entry : candidates.entrySet()) {
processAlias(work, entry.getKey(), entry.getValue(), aliases);
}
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SamplingOptimizer method resolve.
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
for (Task<?> task : pctx.getRootTasks()) {
if (!(task instanceof MapRedTask) || !((MapRedTask) task).getWork().isFinalMapRed()) {
// this could be replaced by bucketing on RS + bucketed fetcher for next MR
continue;
}
MapredWork mrWork = ((MapRedTask) task).getWork();
MapWork mapWork = mrWork.getMapWork();
ReduceWork reduceWork = mrWork.getReduceWork();
if (reduceWork == null || reduceWork.getNumReduceTasks() != 1 || mapWork.getAliasToWork().size() != 1 || mapWork.getSamplingType() > 0 || reduceWork.getReducer() == null) {
continue;
}
// GROUPBY operator in reducer may not be processed in parallel. Skip optimizing.
if (OperatorUtils.findSingleOperator(reduceWork.getReducer(), GroupByOperator.class) != null) {
continue;
}
Operator<?> operator = mapWork.getAliasToWork().values().iterator().next();
if (!(operator instanceof TableScanOperator)) {
continue;
}
ReduceSinkOperator child = OperatorUtils.findSingleOperator(operator, ReduceSinkOperator.class);
if (child == null || child.getConf().getNumReducers() != 1 || !child.getConf().getPartitionCols().isEmpty()) {
continue;
}
child.getConf().setNumReducers(-1);
reduceWork.setNumReduceTasks(-1);
mapWork.setSamplingType(MapWork.SAMPLING_ON_START);
}
return pctx;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SemanticAnalyzer method genPlan.
@SuppressWarnings("nls")
public Operator genPlan(QB qb, boolean skipAmbiguityCheck) throws SemanticException {
// First generate all the opInfos for the elements in the from clause
// Must be deterministic order map - see HIVE-8707
Map<String, Operator> aliasToOpInfo = new LinkedHashMap<String, Operator>();
// Recurse over the subqueries to fill the subquery part of the plan
for (String alias : qb.getSubqAliases()) {
QBExpr qbexpr = qb.getSubqForAlias(alias);
Operator operator = genPlan(qb, qbexpr);
aliasToOpInfo.put(alias, operator);
if (qb.getViewToTabSchema().containsKey(alias)) {
// we set viewProjectToTableSchema so that we can leverage ColumnPruner.
if (operator instanceof SelectOperator) {
if (this.viewProjectToTableSchema == null) {
this.viewProjectToTableSchema = new LinkedHashMap<>();
}
viewProjectToTableSchema.put((SelectOperator) operator, qb.getViewToTabSchema().get(alias));
} else {
throw new SemanticException("View " + alias + " is corresponding to " + operator.getType().name() + ", rather than a SelectOperator.");
}
}
}
// Recurse over all the source tables
for (String alias : qb.getTabAliases()) {
Operator op = genTablePlan(alias, qb);
aliasToOpInfo.put(alias, op);
}
if (aliasToOpInfo.isEmpty()) {
qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable());
TableScanOperator op = (TableScanOperator) genTablePlan(DUMMY_TABLE, qb);
op.getConf().setRowLimit(1);
qb.addAlias(DUMMY_TABLE);
qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE);
aliasToOpInfo.put(DUMMY_TABLE, op);
}
Operator srcOpInfo = null;
Operator lastPTFOp = null;
if (queryProperties.hasPTF()) {
//After processing subqueries and source tables, process
// partitioned table functions
HashMap<ASTNode, PTFInvocationSpec> ptfNodeToSpec = qb.getPTFNodeToSpec();
if (ptfNodeToSpec != null) {
for (Entry<ASTNode, PTFInvocationSpec> entry : ptfNodeToSpec.entrySet()) {
ASTNode ast = entry.getKey();
PTFInvocationSpec spec = entry.getValue();
String inputAlias = spec.getQueryInputName();
Operator inOp = aliasToOpInfo.get(inputAlias);
if (inOp == null) {
throw new SemanticException(generateErrorMessage(ast, "Cannot resolve input Operator for PTF invocation"));
}
lastPTFOp = genPTFPlan(spec, inOp);
String ptfAlias = spec.getFunction().getAlias();
if (ptfAlias != null) {
aliasToOpInfo.put(ptfAlias, lastPTFOp);
}
}
}
}
// For all the source tables that have a lateral view, attach the
// appropriate operators to the TS
genLateralViewPlans(aliasToOpInfo, qb);
// process join
if (qb.getParseInfo().getJoinExpr() != null) {
ASTNode joinExpr = qb.getParseInfo().getJoinExpr();
if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) {
QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr, aliasToOpInfo);
qb.setQbJoinTree(joinTree);
} else {
QBJoinTree joinTree = genJoinTree(qb, joinExpr, aliasToOpInfo);
qb.setQbJoinTree(joinTree);
/*
* if there is only one destination in Query try to push where predicates
* as Join conditions
*/
Set<String> dests = qb.getParseInfo().getClauseNames();
if (dests.size() == 1 && joinTree.getNoOuterJoin()) {
String dest = dests.iterator().next();
ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest);
if (whereClause != null) {
extractJoinCondsFromWhereClause(joinTree, qb, dest, (ASTNode) whereClause.getChild(0), aliasToOpInfo);
}
}
if (!disableJoinMerge) {
mergeJoinTree(qb);
}
}
// if any filters are present in the join tree, push them on top of the
// table
pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo);
srcOpInfo = genJoinPlan(qb, aliasToOpInfo);
} else {
// Now if there are more than 1 sources then we have a join case
// later we can extend this to the union all case as well
srcOpInfo = aliasToOpInfo.values().iterator().next();
// with ptfs, there maybe more (note for PTFChains:
// 1 ptf invocation may entail multiple PTF operators)
srcOpInfo = lastPTFOp != null ? lastPTFOp : srcOpInfo;
}
Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo, aliasToOpInfo);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Plan for Query Block " + qb.getId());
}
if (qb.getAlias() != null) {
rewriteRRForSubQ(qb.getAlias(), bodyOpInfo, skipAmbiguityCheck);
}
setQB(qb);
return bodyOpInfo;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SemanticAnalyzer method genTablePlan.
@SuppressWarnings("nls")
private Operator genTablePlan(String alias, QB qb) throws SemanticException {
String alias_id = getAliasId(alias, qb);
Table tab = qb.getMetaData().getSrcForAlias(alias);
RowResolver rwsch;
// is the table already present
TableScanOperator top = topOps.get(alias_id);
// Obtain table props in query
Map<String, String> properties = qb.getTabPropsForAlias(alias);
if (top == null) {
// Determine row schema for TSOP.
// Include column names from SerDe, the partition and virtual columns.
rwsch = new RowResolver();
try {
// Including parameters passed in the query
if (properties != null) {
for (Entry<String, String> prop : properties.entrySet()) {
if (tab.getSerdeParam(prop.getKey()) != null) {
LOG.warn("SerDe property in input query overrides stored SerDe property");
}
tab.setSerdeParam(prop.getKey(), prop.getValue());
}
}
// Obtain inspector for schema
StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer().getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
/**
* if the column is a skewed column, use ColumnInfo accordingly
*/
ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), alias, false);
colInfo.setSkewedCol((isSkewedCol(alias, qb, fields.get(i).getFieldName())) ? true : false);
rwsch.put(alias, fields.get(i).getFieldName(), colInfo);
}
} catch (SerDeException e) {
throw new RuntimeException(e);
}
// Finally add the partitioning columns
for (FieldSchema part_col : tab.getPartCols()) {
LOG.trace("Adding partition col: " + part_col);
rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true));
}
// put all virtual columns in RowResolver.
Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
// use a list for easy cumtomize
List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
while (vcs.hasNext()) {
VirtualColumn vc = vcs.next();
rwsch.put(alias, vc.getName().toLowerCase(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden()));
vcList.add(vc);
}
// Create the root of the operator tree
TableScanDesc tsDesc = new TableScanDesc(alias, vcList, tab);
setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch);
SplitSample sample = nameToSplitSample.get(alias_id);
if (sample != null && sample.getRowCount() != null) {
tsDesc.setRowLimit(sample.getRowCount());
nameToSplitSample.remove(alias_id);
}
top = (TableScanOperator) putOpInsertMap(OperatorFactory.get(getOpContext(), tsDesc, new RowSchema(rwsch.getColumnInfos())), rwsch);
// Set insiderView so that we can skip the column authorization for this.
top.setInsideView(qb.isInsideView() || qb.getAliasInsideView().contains(alias.toLowerCase()));
// Add this to the list of top operators - we always start from a table
// scan
topOps.put(alias_id, top);
// Add a mapping from the table scan operator to Table
topToTable.put(top, tab);
if (properties != null) {
topToTableProps.put(top, properties);
tsDesc.setOpProps(properties);
}
} else {
rwsch = opParseCtx.get(top).getRowResolver();
top.setChildOperators(null);
}
// check if this table is sampled and needs more than input pruning
Operator<? extends OperatorDesc> op = top;
TableSample ts = qb.getParseInfo().getTabSample(alias);
if (ts != null) {
TableScanOperator tableScanOp = top;
tableScanOp.getConf().setTableSample(ts);
int num = ts.getNumerator();
int den = ts.getDenominator();
ArrayList<ASTNode> sampleExprs = ts.getExprs();
// TODO: Do the type checking of the expressions
List<String> tabBucketCols = tab.getBucketCols();
int numBuckets = tab.getNumBuckets();
// If there are no sample cols and no bucket cols then throw an error
if (tabBucketCols.size() == 0 && sampleExprs.size() == 0) {
throw new SemanticException(ErrorMsg.NON_BUCKETED_TABLE.getMsg() + " " + tab.getTableName());
}
if (num > den) {
throw new SemanticException(ErrorMsg.BUCKETED_NUMERATOR_BIGGER_DENOMINATOR.getMsg() + " " + tab.getTableName());
}
// check if a predicate is needed
// predicate is needed if either input pruning is not enough
// or if input pruning is not possible
// check if the sample columns are the same as the table bucket columns
boolean colsEqual = true;
if ((sampleExprs.size() != tabBucketCols.size()) && (sampleExprs.size() != 0)) {
colsEqual = false;
}
for (int i = 0; i < sampleExprs.size() && colsEqual; i++) {
boolean colFound = false;
for (int j = 0; j < tabBucketCols.size() && !colFound; j++) {
if (sampleExprs.get(i).getToken().getType() != HiveParser.TOK_TABLE_OR_COL) {
break;
}
if (((ASTNode) sampleExprs.get(i).getChild(0)).getText().equalsIgnoreCase(tabBucketCols.get(j))) {
colFound = true;
}
}
colsEqual = (colsEqual && colFound);
}
// Check if input can be pruned
ts.setInputPruning((sampleExprs == null || sampleExprs.size() == 0 || colsEqual));
// check if input pruning is enough
if ((sampleExprs == null || sampleExprs.size() == 0 || colsEqual) && (num == den || (den % numBuckets == 0 || numBuckets % den == 0))) {
// input pruning is enough; add the filter for the optimizer to use it
// later
LOG.info("No need for sample filter");
ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null);
FilterDesc filterDesc = new FilterDesc(samplePredicate, true, new SampleDesc(ts.getNumerator(), ts.getDenominator(), tabBucketCols, true));
filterDesc.setGenerated(true);
op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
} else {
// need to add filter
// create tableOp to be filterDesc and set as child to 'top'
LOG.info("Need sample filter");
ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, qb.getMetaData(), null);
FilterDesc filterDesc = new FilterDesc(samplePredicate, true);
filterDesc.setGenerated(true);
op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
}
} else {
boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
if (testMode) {
String tabName = tab.getTableName();
// has the user explicitly asked not to sample this table
String unSampleTblList = conf.getVar(HiveConf.ConfVars.HIVETESTMODENOSAMPLE);
String[] unSampleTbls = unSampleTblList.split(",");
boolean unsample = false;
for (String unSampleTbl : unSampleTbls) {
if (tabName.equalsIgnoreCase(unSampleTbl)) {
unsample = true;
}
}
if (!unsample) {
int numBuckets = tab.getNumBuckets();
// If the input table is bucketed, choose the first bucket
if (numBuckets > 0) {
TableSample tsSample = new TableSample(1, numBuckets);
tsSample.setInputPruning(true);
qb.getParseInfo().setTabSample(alias, tsSample);
ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab.getBucketCols(), true, alias, rwsch, qb.getMetaData(), null);
FilterDesc filterDesc = new FilterDesc(samplePred, true, new SampleDesc(tsSample.getNumerator(), tsSample.getDenominator(), tab.getBucketCols(), true));
filterDesc.setGenerated(true);
op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
LOG.info("No need for sample filter");
} else {
// The table is not bucketed, add a dummy filter :: rand()
int freq = conf.getIntVar(HiveConf.ConfVars.HIVETESTMODESAMPLEFREQ);
TableSample tsSample = new TableSample(1, freq);
tsSample.setInputPruning(false);
qb.getParseInfo().setTabSample(alias, tsSample);
LOG.info("Need sample filter");
ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer.valueOf(460476415)));
ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc);
FilterDesc filterDesc = new FilterDesc(samplePred, true);
filterDesc.setGenerated(true);
op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top);
}
}
}
}
Operator output = putOpInsertMap(op, rwsch);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Table Plan for " + alias + " " + op.toString());
}
return output;
}
Aggregations