use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class LineageLogger method getEdges.
/**
* Based on the final select operator, find out all the target columns.
* For each target column, find out its sources based on the dependency index.
*/
@VisibleForTesting
public static List<Edge> getEdges(QueryPlan plan, Index index) {
LinkedHashMap<String, ObjectPair<SelectOperator, org.apache.hadoop.hive.ql.metadata.Table>> finalSelOps = index.getFinalSelectOps();
Map<String, Vertex> vertexCache = new LinkedHashMap<String, Vertex>();
List<Edge> edges = new ArrayList<Edge>();
for (ObjectPair<SelectOperator, org.apache.hadoop.hive.ql.metadata.Table> pair : finalSelOps.values()) {
List<FieldSchema> fieldSchemas = plan.getResultSchema().getFieldSchemas();
SelectOperator finalSelOp = pair.getFirst();
org.apache.hadoop.hive.ql.metadata.Table t = pair.getSecond();
String destTableName = null;
List<String> colNames = null;
if (t != null) {
destTableName = t.getFullyQualifiedName();
fieldSchemas = t.getCols();
} else {
// Based on the plan outputs, find out the target table name and column names.
for (WriteEntity output : plan.getOutputs()) {
Entity.Type entityType = output.getType();
if (entityType == Entity.Type.TABLE || entityType == Entity.Type.PARTITION) {
t = output.getTable();
destTableName = t.getFullyQualifiedName();
List<FieldSchema> cols = t.getCols();
if (cols != null && !cols.isEmpty()) {
colNames = Utilities.getColumnNamesFromFieldSchema(cols);
}
break;
}
}
}
Map<ColumnInfo, Dependency> colMap = index.getDependencies(finalSelOp);
List<Dependency> dependencies = colMap != null ? Lists.newArrayList(colMap.values()) : null;
int fields = fieldSchemas.size();
if (t != null && colMap != null && fields < colMap.size()) {
// Dynamic partition keys should be added to field schemas.
List<FieldSchema> partitionKeys = t.getPartitionKeys();
int dynamicKeyCount = colMap.size() - fields;
int keyOffset = partitionKeys.size() - dynamicKeyCount;
if (keyOffset >= 0) {
fields += dynamicKeyCount;
for (int i = 0; i < dynamicKeyCount; i++) {
FieldSchema field = partitionKeys.get(keyOffset + i);
fieldSchemas.add(field);
if (colNames != null) {
colNames.add(field.getName());
}
}
}
}
if (dependencies == null || dependencies.size() != fields) {
log("Result schema has " + fields + " fields, but we don't get as many dependencies");
} else {
// Go through each target column, generate the lineage edges.
Set<Vertex> targets = new LinkedHashSet<Vertex>();
for (int i = 0; i < fields; i++) {
Vertex target = getOrCreateVertex(vertexCache, getTargetFieldName(i, destTableName, colNames, fieldSchemas), Vertex.Type.COLUMN);
targets.add(target);
Dependency dep = dependencies.get(i);
addEdge(vertexCache, edges, dep.getBaseCols(), target, dep.getExpr(), Edge.Type.PROJECTION);
}
Set<Predicate> conds = index.getPredicates(finalSelOp);
if (conds != null && !conds.isEmpty()) {
for (Predicate cond : conds) {
addEdge(vertexCache, edges, cond.getBaseCols(), new LinkedHashSet<Vertex>(targets), cond.getExpr(), Edge.Type.PREDICATE);
}
}
}
}
return edges;
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class SemanticAnalyzer method genLateralViewPlans.
/**
* Generates the operator DAG needed to implement lateral views and attaches
* it to the TS operator.
*
* @param aliasToOpInfo
* A mapping from a table alias to the TS operator. This function
* replaces the operator mapping as necessary
* @param qb
* @throws SemanticException
*/
void genLateralViewPlans(Map<String, Operator> aliasToOpInfo, QB qb) throws SemanticException {
Map<String, ArrayList<ASTNode>> aliasToLateralViews = qb.getParseInfo().getAliasToLateralViews();
for (Entry<String, Operator> e : aliasToOpInfo.entrySet()) {
String alias = e.getKey();
// See if the alias has a lateral view. If so, chain the lateral view
// operator on
ArrayList<ASTNode> lateralViews = aliasToLateralViews.get(alias);
if (lateralViews != null) {
Operator op = e.getValue();
for (ASTNode lateralViewTree : aliasToLateralViews.get(alias)) {
// There are 2 paths from the TS operator (or a previous LVJ operator)
// to the same LateralViewJoinOperator.
// TS -> SelectOperator(*) -> LateralViewJoinOperator
// TS -> SelectOperator (gets cols for UDTF) -> UDTFOperator0
// -> LateralViewJoinOperator
//
Operator lateralViewJoin = genLateralViewPlan(qb, op, lateralViewTree);
op = lateralViewJoin;
}
e.setValue(op);
}
}
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class SemanticAnalyzer method genUnionPlan.
@SuppressWarnings("nls")
private Operator genUnionPlan(String unionalias, String leftalias, Operator leftOp, String rightalias, Operator rightOp) throws SemanticException {
// Currently, the unions are not merged - each union has only 2 parents. So,
// a n-way union will lead to (n-1) union operators.
// This can be easily merged into 1 union
RowResolver leftRR = opParseCtx.get(leftOp).getRowResolver();
RowResolver rightRR = opParseCtx.get(rightOp).getRowResolver();
LinkedHashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
LinkedHashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
// make sure the schemas of both sides are the same
ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias(qb.getAliases().get(0));
if (leftmap.size() != rightmap.size()) {
throw new SemanticException("Schema of both sides of union should match.");
}
RowResolver unionoutRR = new RowResolver();
Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator();
Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator();
while (lIter.hasNext()) {
Map.Entry<String, ColumnInfo> lEntry = lIter.next();
Map.Entry<String, ColumnInfo> rEntry = rIter.next();
ColumnInfo lInfo = lEntry.getValue();
ColumnInfo rInfo = rEntry.getValue();
// use left alias (~mysql, postgresql)
String field = lEntry.getKey();
// try widening conversion, otherwise fail union
TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType());
if (commonTypeInfo == null) {
throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table"));
}
ColumnInfo unionColInfo = new ColumnInfo(lInfo);
unionColInfo.setType(commonTypeInfo);
unionoutRR.put(unionalias, field, unionColInfo);
}
// For Spark,TEZ we rely on the generated SelectOperator to do the type casting.
// Consider:
// SEL_1 (int) SEL_2 (int) SEL_3 (double)
// If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1
// with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error
// will happen afterwards. The solution here is to insert one after UNION_1, which
// cast int to double.
boolean isMR = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr");
if (!isMR || !(leftOp instanceof UnionOperator)) {
leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias);
}
if (!isMR || !(rightOp instanceof UnionOperator)) {
rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias);
}
// else create a new one
if (leftOp instanceof UnionOperator || (leftOp instanceof SelectOperator && leftOp.getParentOperators() != null && !leftOp.getParentOperators().isEmpty() && leftOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) leftOp).isIdentitySelect())) {
if (!(leftOp instanceof UnionOperator)) {
Operator oldChild = leftOp;
leftOp = (Operator) leftOp.getParentOperators().get(0);
leftOp.removeChildAndAdoptItsChildren(oldChild);
}
// make left a child of right
List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
child.add(leftOp);
rightOp.setChildOperators(child);
List<Operator<? extends OperatorDesc>> parent = leftOp.getParentOperators();
parent.add(rightOp);
UnionDesc uDesc = ((UnionOperator) leftOp).getConf();
uDesc.setNumInputs(uDesc.getNumInputs() + 1);
return putOpInsertMap(leftOp, unionoutRR);
}
if (rightOp instanceof UnionOperator || (rightOp instanceof SelectOperator && rightOp.getParentOperators() != null && !rightOp.getParentOperators().isEmpty() && rightOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) rightOp).isIdentitySelect())) {
if (!(rightOp instanceof UnionOperator)) {
Operator oldChild = rightOp;
rightOp = (Operator) rightOp.getParentOperators().get(0);
rightOp.removeChildAndAdoptItsChildren(oldChild);
}
// make right a child of left
List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
child.add(rightOp);
leftOp.setChildOperators(child);
List<Operator<? extends OperatorDesc>> parent = rightOp.getParentOperators();
parent.add(leftOp);
UnionDesc uDesc = ((UnionOperator) rightOp).getConf();
uDesc.setNumInputs(uDesc.getNumInputs() + 1);
return putOpInsertMap(rightOp, unionoutRR);
}
// Create a new union operator
Operator<? extends OperatorDesc> unionforward = OperatorFactory.getAndMakeChild(getOpContext(), new UnionDesc(), new RowSchema(unionoutRR.getColumnInfos()));
// set union operator as child of each of leftOp and rightOp
List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
child.add(unionforward);
rightOp.setChildOperators(child);
child = new ArrayList<Operator<? extends OperatorDesc>>();
child.add(unionforward);
leftOp.setChildOperators(child);
List<Operator<? extends OperatorDesc>> parent = new ArrayList<Operator<? extends OperatorDesc>>();
parent.add(leftOp);
parent.add(rightOp);
unionforward.setParentOperators(parent);
// create operator info list to return
return putOpInsertMap(unionforward, unionoutRR);
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class SemanticAnalyzer method genPlan.
@SuppressWarnings("nls")
public Operator genPlan(QB qb, boolean skipAmbiguityCheck) throws SemanticException {
// First generate all the opInfos for the elements in the from clause
// Must be deterministic order map - see HIVE-8707
Map<String, Operator> aliasToOpInfo = new LinkedHashMap<String, Operator>();
// Recurse over the subqueries to fill the subquery part of the plan
for (String alias : qb.getSubqAliases()) {
QBExpr qbexpr = qb.getSubqForAlias(alias);
Operator<?> operator = genPlan(qb, qbexpr);
aliasToOpInfo.put(alias, operator);
if (qb.getViewToTabSchema().containsKey(alias)) {
// we set viewProjectToTableSchema so that we can leverage ColumnPruner.
if (operator instanceof LimitOperator) {
// If create view has LIMIT operator, this can happen
// Fetch parent operator
operator = operator.getParentOperators().get(0);
}
if (operator instanceof SelectOperator) {
if (this.viewProjectToTableSchema == null) {
this.viewProjectToTableSchema = new LinkedHashMap<>();
}
viewProjectToTableSchema.put((SelectOperator) operator, qb.getViewToTabSchema().get(alias));
} else {
throw new SemanticException("View " + alias + " is corresponding to " + operator.getType().name() + ", rather than a SelectOperator.");
}
}
}
// Recurse over all the source tables
for (String alias : qb.getTabAliases()) {
if (alias.equals(DUMMY_TABLE)) {
continue;
}
Operator op = genTablePlan(alias, qb);
aliasToOpInfo.put(alias, op);
}
if (aliasToOpInfo.isEmpty()) {
qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable());
TableScanOperator op = (TableScanOperator) genTablePlan(DUMMY_TABLE, qb);
op.getConf().setRowLimit(1);
qb.addAlias(DUMMY_TABLE);
qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE);
aliasToOpInfo.put(DUMMY_TABLE, op);
}
Operator srcOpInfo = null;
Operator lastPTFOp = null;
if (queryProperties.hasPTF()) {
// After processing subqueries and source tables, process
// partitioned table functions
HashMap<ASTNode, PTFInvocationSpec> ptfNodeToSpec = qb.getPTFNodeToSpec();
if (ptfNodeToSpec != null) {
for (Entry<ASTNode, PTFInvocationSpec> entry : ptfNodeToSpec.entrySet()) {
ASTNode ast = entry.getKey();
PTFInvocationSpec spec = entry.getValue();
String inputAlias = spec.getQueryInputName();
Operator inOp = aliasToOpInfo.get(inputAlias);
if (inOp == null) {
throw new SemanticException(generateErrorMessage(ast, "Cannot resolve input Operator for PTF invocation"));
}
lastPTFOp = genPTFPlan(spec, inOp);
String ptfAlias = spec.getFunction().getAlias();
if (ptfAlias != null) {
aliasToOpInfo.put(ptfAlias, lastPTFOp);
}
}
}
}
// For all the source tables that have a lateral view, attach the
// appropriate operators to the TS
genLateralViewPlans(aliasToOpInfo, qb);
// process join
if (qb.getParseInfo().getJoinExpr() != null) {
ASTNode joinExpr = qb.getParseInfo().getJoinExpr();
if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) {
QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr, aliasToOpInfo);
qb.setQbJoinTree(joinTree);
} else {
QBJoinTree joinTree = genJoinTree(qb, joinExpr, aliasToOpInfo);
qb.setQbJoinTree(joinTree);
/*
* if there is only one destination in Query try to push where predicates
* as Join conditions
*/
Set<String> dests = qb.getParseInfo().getClauseNames();
if (dests.size() == 1 && joinTree.getNoOuterJoin()) {
String dest = dests.iterator().next();
ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest);
if (whereClause != null) {
extractJoinCondsFromWhereClause(joinTree, qb, dest, (ASTNode) whereClause.getChild(0), aliasToOpInfo);
}
}
if (!disableJoinMerge) {
mergeJoinTree(qb);
}
}
// if any filters are present in the join tree, push them on top of the
// table
pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo);
srcOpInfo = genJoinPlan(qb, aliasToOpInfo);
} else {
// Now if there are more than 1 sources then we have a join case
// later we can extend this to the union all case as well
srcOpInfo = aliasToOpInfo.values().iterator().next();
// with ptfs, there maybe more (note for PTFChains:
// 1 ptf invocation may entail multiple PTF operators)
srcOpInfo = lastPTFOp != null ? lastPTFOp : srcOpInfo;
}
Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo, aliasToOpInfo);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Plan for Query Block " + qb.getId());
}
if (qb.getAlias() != null) {
rewriteRRForSubQ(qb.getAlias(), bodyOpInfo, skipAmbiguityCheck);
}
setQB(qb);
return bodyOpInfo;
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class TezCompiler method removeSemiJoinCyclesDueToMapsideJoins.
private static void removeSemiJoinCyclesDueToMapsideJoins(OptimizeTezProcContext procCtx) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
return;
}
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" + MapJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
opRules.put(new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
opRules.put(new RuleRegExp("R3", CommonMergeJoinOperator.getOperatorName() + "%" + MapJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
opRules.put(new RuleRegExp("R4", CommonMergeJoinOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
SemiJoinCycleRemovalDueTOMapsideJoinContext ctx = new SemiJoinCycleRemovalDueTOMapsideJoinContext();
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
// process the list
ParseContext pCtx = procCtx.parseContext;
for (Operator<?> parentJoin : ctx.childParentMap.keySet()) {
Operator<?> childJoin = ctx.childParentMap.get(parentJoin);
if (parentJoin.getChildOperators().size() == 1) {
continue;
}
for (Operator<?> child : parentJoin.getChildOperators()) {
if (!(child instanceof SelectOperator)) {
continue;
}
while (child.getChildOperators().size() > 0) {
child = child.getChildOperators().get(0);
}
if (!(child instanceof ReduceSinkOperator)) {
continue;
}
ReduceSinkOperator rs = ((ReduceSinkOperator) child);
SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
if (sjInfo == null) {
continue;
}
TableScanOperator ts = sjInfo.getTsOp();
// cycle with childJoin.
for (Operator<?> parent : childJoin.getParentOperators()) {
if (parent == parentJoin) {
continue;
}
assert parent instanceof ReduceSinkOperator;
while (parent.getParentOperators().size() > 0) {
parent = parent.getParentOperators().get(0);
}
if (parent == ts) {
// We have a cycle!
if (sjInfo.getIsHint()) {
throw new SemanticException("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
}
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
}
}
}
}
}
Aggregations