use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveOpConverter method genJoin.
private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
// 1. Extract join type
JoinCondDesc[] joinCondns;
boolean semiJoin;
boolean noOuterJoin;
if (join instanceof HiveMultiJoin) {
HiveMultiJoin hmj = (HiveMultiJoin) join;
joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
}
semiJoin = false;
noOuterJoin = !hmj.isOuterJoin();
} else {
joinCondns = new JoinCondDesc[1];
semiJoin = join instanceof SemiJoin;
JoinType joinType;
if (semiJoin) {
joinType = JoinType.LEFTSEMI;
} else {
joinType = extractJoinType((Join) join);
}
joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
}
// 2. We create the join aux structures
ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
Operator<?>[] childOps = new Operator[children.size()];
Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
int outputPos = 0;
for (int pos = 0; pos < children.size(); pos++) {
// 2.1. Backtracking from RS
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
if (inputRS.getNumParent() != 1) {
throw new SemanticException("RS should have single parent");
}
Operator<?> parent = inputRS.getParentOperators().get(0);
ReduceSinkDesc rsDesc = inputRS.getConf();
int[] index = inputRS.getValueIndex();
Byte tag = (byte) rsDesc.getTag();
// 2.1.1. If semijoin...
if (semiJoin && pos != 0) {
exprMap.put(tag, new ArrayList<ExprNodeDesc>());
childOps[pos] = inputRS;
continue;
}
posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
List<String> valColNames = rsDesc.getOutputValueColumnNames();
Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
for (int i = 0; i < index.length; i++) {
ColumnInfo info = new ColumnInfo(parentColumns.get(i));
info.setInternalName(outputColumnNames.get(outputPos));
info.setTabAlias(tabAlias);
outputColumns.add(info);
reversedExprs.put(outputColumnNames.get(outputPos), tag);
outputPos++;
}
exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
colExprMap.putAll(descriptors);
childOps[pos] = inputRS;
}
// 3. We populate the filters and filterMap structure needed in the join descriptor
List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
int[][] filterMap = new int[children.size()][];
for (int i = 0; i < children.size(); i++) {
filtersPerInput.add(new ArrayList<ExprNodeDesc>());
}
// 3. We populate the filters structure
for (int i = 0; i < filterExpressions.size(); i++) {
int leftPos = joinCondns[i].getLeft();
int rightPos = joinCondns[i].getRight();
for (ExprNodeDesc expr : filterExpressions.get(i)) {
// We need to update the exprNode, as currently
// they refer to columns in the output of the join;
// they should refer to the columns output by the RS
int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
if (inputPos == -1) {
inputPos = leftPos;
}
filtersPerInput.get(inputPos).add(expr);
if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
if (inputPos == leftPos) {
updateFilterMap(filterMap, leftPos, rightPos);
} else {
updateFilterMap(filterMap, rightPos, leftPos);
}
}
}
}
for (int pos = 0; pos < children.size(); pos++) {
ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
ReduceSinkDesc rsDesc = inputRS.getConf();
Byte tag = (byte) rsDesc.getTag();
filters.put(tag, filtersPerInput.get(pos));
}
// 4. We create the join operator with its descriptor
JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
desc.setReversedExprs(reversedExprs);
desc.setFilterMap(filterMap);
JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
joinOp.setColumnExprMap(colExprMap);
joinOp.setPosToAliasMap(posToAliasMap);
joinOp.getConf().setBaseSrc(baseSrc);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
}
return joinOp;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class ColumnPrunerProcFactory method pruneJoinOperator.
private static void pruneJoinOperator(NodeProcessorCtx ctx, CommonJoinOperator op, JoinDesc conf, Map<String, ExprNodeDesc> columnExprMap, Map<Byte, List<Integer>> retainMap, boolean mapJoin) throws SemanticException {
ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators();
LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
if (cppCtx.genColLists(op) == null) {
return;
}
List<FieldNode> neededColList = new ArrayList<>(cppCtx.genColLists(op));
Map<Byte, List<FieldNode>> prunedColLists = new HashMap<>();
for (byte tag : conf.getTagOrder()) {
prunedColLists.put(tag, new ArrayList<FieldNode>());
}
// add the columns in join filters
Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet();
Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator();
while (iter.hasNext()) {
Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next();
Byte tag = entry.getKey();
for (ExprNodeDesc desc : entry.getValue()) {
List<FieldNode> cols = prunedColLists.get(tag);
cols = mergeFieldNodesWithDesc(cols, desc);
prunedColLists.put(tag, cols);
}
}
// add the columns in residual filters
if (conf.getResidualFilterExprs() != null) {
for (ExprNodeDesc desc : conf.getResidualFilterExprs()) {
neededColList = mergeFieldNodesWithDesc(neededColList, desc);
}
}
RowSchema joinRS = op.getSchema();
ArrayList<String> outputCols = new ArrayList<String>();
ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < conf.getOutputColumnNames().size(); i++) {
String internalName = conf.getOutputColumnNames().get(i);
ExprNodeDesc desc = columnExprMap.get(internalName);
Byte tag = conf.getReversedExprs().get(internalName);
if (lookupColumn(neededColList, internalName) == null) {
int index = conf.getExprs().get(tag).indexOf(desc);
if (index < 0) {
continue;
}
conf.getExprs().get(tag).remove(desc);
if (retainMap != null) {
retainMap.get(tag).remove(index);
}
} else {
List<FieldNode> prunedRSList = prunedColLists.get(tag);
if (prunedRSList == null) {
prunedRSList = new ArrayList<>();
prunedColLists.put(tag, prunedRSList);
}
prunedColLists.put(tag, mergeFieldNodesWithDesc(prunedRSList, desc));
outputCols.add(internalName);
newColExprMap.put(internalName, desc);
}
}
if (mapJoin) {
// regenerate the valueTableDesc
List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
for (int pos = 0; pos < op.getParentOperators().size(); pos++) {
List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos));
StringBuilder keyOrder = new StringBuilder();
for (int i = 0; i < valueCols.size(); i++) {
keyOrder.append("+");
}
TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
valueTableDescs.add(valueTableDesc);
}
((MapJoinDesc) conf).setValueTblDescs(valueTableDescs);
Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet();
Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator();
while (iters.hasNext()) {
Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next();
List<ExprNodeDesc> lists = entry.getValue();
for (int j = 0; j < lists.size(); j++) {
ExprNodeDesc desc = lists.get(j);
Byte tag = entry.getKey();
List<FieldNode> cols = prunedColLists.get(tag);
cols = mergeFieldNodesWithDesc(cols, desc);
prunedColLists.put(tag, cols);
}
}
}
for (Operator<? extends OperatorDesc> child : childOperators) {
if (child instanceof ReduceSinkOperator) {
boolean[] flags = getPruneReduceSinkOpRetainFlags(toColumnNames(neededColList), (ReduceSinkOperator) child);
pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx);
}
}
for (int i = 0; i < outputCols.size(); i++) {
String internalName = outputCols.get(i);
ColumnInfo col = joinRS.getColumnInfo(internalName);
rs.add(col);
}
LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
op.setColumnExprMap(newColExprMap);
conf.setOutputColumnNames(outputCols);
op.getSchema().setSignature(rs);
cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class ColumnPrunerProcFactory method preserveColumnOrder.
/**
* The pruning needs to preserve the order of columns in the input schema
* @param op
* @param cols
* @return
* @throws SemanticException
*/
private static List<FieldNode> preserveColumnOrder(Operator<? extends OperatorDesc> op, List<FieldNode> cols) throws SemanticException {
RowSchema inputSchema = op.getSchema();
if (inputSchema != null) {
ArrayList<FieldNode> rs = new ArrayList<>();
ArrayList<ColumnInfo> inputCols = inputSchema.getSignature();
for (ColumnInfo i : inputCols) {
FieldNode fn = lookupColumn(cols, i.getInternalName());
if (fn != null) {
rs.add(fn);
}
}
return rs;
} else {
return cols;
}
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class ConstantPropagateProcCtx method getPropagatedConstants.
/**
* Get propagated constant map from parents.
*
* Traverse all parents of current operator, if there is propagated constant (determined by
* assignment expression like column=constant value), resolve the column using RowResolver and add
* it to current constant map.
*
* @param op
* operator getting the propagated constants.
* @return map of ColumnInfo to ExprNodeDesc. The values of that map must be either
* ExprNodeConstantDesc or ExprNodeNullDesc.
*/
public Map<ColumnInfo, ExprNodeDesc> getPropagatedConstants(Operator<? extends Serializable> op) {
// this map should map columnInfo to ExprConstantNodeDesc
Map<ColumnInfo, ExprNodeDesc> constants = new HashMap<ColumnInfo, ExprNodeDesc>();
if (op.getSchema() == null) {
return constants;
}
RowSchema rs = op.getSchema();
LOG.debug("Getting constants of op:" + op + " with rs:" + rs);
if (op.getParentOperators() == null) {
return constants;
}
// A previous solution is based on tableAlias and colAlias, which is
// unsafe, esp. when CBO generates derived table names. see HIVE-13602.
// For correctness purpose, we only trust colExpMap.
// We assume that CBO can do the constantPropagation before this function is
// called to help improve the performance.
// UnionOperator, LimitOperator and FilterOperator are special, they should already be
// column-position aligned.
List<Map<Integer, ExprNodeDesc>> parentsToConstant = new ArrayList<>();
boolean areAllParentsContainConstant = true;
boolean noParentsContainConstant = true;
for (Operator<?> parent : op.getParentOperators()) {
Map<ColumnInfo, ExprNodeDesc> constMap = opToConstantExprs.get(parent);
if (constMap == null) {
LOG.debug("Constant of Op " + parent.getOperatorId() + " is not found");
areAllParentsContainConstant = false;
} else {
noParentsContainConstant = false;
Map<Integer, ExprNodeDesc> map = new HashMap<>();
for (Entry<ColumnInfo, ExprNodeDesc> entry : constMap.entrySet()) {
map.put(parent.getSchema().getPosition(entry.getKey().getInternalName()), entry.getValue());
}
parentsToConstant.add(map);
LOG.debug("Constant of Op " + parent.getOperatorId() + " " + constMap);
}
}
if (noParentsContainConstant) {
return constants;
}
ArrayList<ColumnInfo> signature = op.getSchema().getSignature();
if (op instanceof LimitOperator || op instanceof FilterOperator) {
// there should be only one parent.
if (op.getParentOperators().size() == 1) {
Map<Integer, ExprNodeDesc> parentToConstant = parentsToConstant.get(0);
for (int index = 0; index < signature.size(); index++) {
if (parentToConstant.containsKey(index)) {
constants.put(signature.get(index), parentToConstant.get(index));
}
}
}
} else if (op instanceof UnionOperator && areAllParentsContainConstant) {
for (int index = 0; index < signature.size(); index++) {
ExprNodeDesc constant = null;
for (Map<Integer, ExprNodeDesc> parentToConstant : parentsToConstant) {
if (!parentToConstant.containsKey(index)) {
// if this parent does not contain a constant at this position, we
// continue to look at other positions.
constant = null;
break;
} else {
if (constant == null) {
constant = parentToConstant.get(index);
} else {
// compare if they are the same constant.
ExprNodeDesc nextConstant = parentToConstant.get(index);
if (!nextConstant.isSame(constant)) {
// they are not the same constant. for example, union all of 1
// and 2.
constant = null;
break;
}
}
}
}
// we have checked all the parents for the "index" position.
if (constant != null) {
constants.put(signature.get(index), constant);
}
}
} else if (op instanceof JoinOperator) {
JoinOperator joinOp = (JoinOperator) op;
Iterator<Entry<Byte, List<ExprNodeDesc>>> itr = joinOp.getConf().getExprs().entrySet().iterator();
while (itr.hasNext()) {
Entry<Byte, List<ExprNodeDesc>> e = itr.next();
int tag = e.getKey();
Operator<?> parent = op.getParentOperators().get(tag);
List<ExprNodeDesc> exprs = e.getValue();
if (exprs == null) {
continue;
}
for (ExprNodeDesc expr : exprs) {
// we are only interested in ExprNodeColumnDesc
if (expr instanceof ExprNodeColumnDesc) {
String parentColName = ((ExprNodeColumnDesc) expr).getColumn();
// find this parentColName in its parent's rs
int parentPos = parent.getSchema().getPosition(parentColName);
if (parentsToConstant.get(tag).containsKey(parentPos)) {
// reverse look up colExprMap to find the childColName
if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
if (entry.getValue().isSame(expr)) {
// now propagate the constant from the parent to the child
constants.put(signature.get(op.getSchema().getPosition(entry.getKey())), parentsToConstant.get(tag).get(parentPos));
}
}
}
}
}
}
}
} else {
// there should be only one parent.
if (op.getParentOperators().size() == 1) {
Operator<?> parent = op.getParentOperators().get(0);
if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
if (op.getSchema().getPosition(entry.getKey()) == -1) {
// Not present
continue;
}
ExprNodeDesc expr = entry.getValue();
if (expr instanceof ExprNodeColumnDesc) {
String parentColName = ((ExprNodeColumnDesc) expr).getColumn();
// find this parentColName in its parent's rs
int parentPos = parent.getSchema().getPosition(parentColName);
if (parentsToConstant.get(0).containsKey(parentPos)) {
// this position in parent is a constant
// now propagate the constant from the parent to the child
constants.put(signature.get(op.getSchema().getPosition(entry.getKey())), parentsToConstant.get(0).get(parentPos));
}
}
}
}
}
}
LOG.debug("Offering constants " + constants.keySet() + " to operator " + op.toString());
return constants;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class ConstantPropagateProcFactory method evaluateColumn.
/**
* Evaluate column, replace the deterministic columns with constants if possible
*
* @param desc
* @param ctx
* @param op
* @param colToConstants
* @return
*/
private static ExprNodeDesc evaluateColumn(ExprNodeColumnDesc desc, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> parent) {
RowSchema rs = parent.getSchema();
ColumnInfo ci = rs.getColumnInfo(desc.getColumn());
if (ci == null) {
if (LOG.isErrorEnabled()) {
LOG.error("Reverse look up of column " + desc + " error!");
}
ci = rs.getColumnInfo(desc.getTabAlias(), desc.getColumn());
}
if (ci == null) {
if (LOG.isErrorEnabled()) {
LOG.error("Can't resolve " + desc.getTabAlias() + "." + desc.getColumn());
}
return null;
}
ExprNodeDesc constant = null;
// Additional work for union operator, see union27.q
if (ci.getAlias() == null) {
for (Entry<ColumnInfo, ExprNodeDesc> e : cppCtx.getOpToConstantExprs().get(parent).entrySet()) {
if (e.getKey().getInternalName().equals(ci.getInternalName())) {
constant = e.getValue();
break;
}
}
} else {
constant = cppCtx.getOpToConstantExprs().get(parent).get(ci);
}
if (constant != null) {
if (constant instanceof ExprNodeConstantDesc && !constant.getTypeInfo().equals(desc.getTypeInfo())) {
return typeCast(constant, desc.getTypeInfo());
}
return constant;
} else {
return null;
}
}
Aggregations