use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class HiveOpConverter method genReduceSinkAndBacktrackSelect.
private static SelectOperator genReduceSinkAndBacktrackSelect(Operator<?> input, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf, List<String> keepColNames) throws SemanticException {
// 1. Generate RS operator
// 1.1 Prune the tableNames, only count the tableNames that are not empty strings
// as empty string in table aliases is only allowed for virtual columns.
String tableAlias = null;
Set<String> tableNames = input.getSchema().getTableNames();
for (String tableName : tableNames) {
if (tableName != null) {
if (tableName.length() == 0) {
if (tableAlias == null) {
tableAlias = tableName;
}
} else {
if (tableAlias == null || tableAlias.length() == 0) {
tableAlias = tableName;
} else {
if (!tableName.equals(tableAlias)) {
throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is more than one");
}
}
}
}
}
if (tableAlias == null) {
throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
}
// 1.2 Now generate RS operator
ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, nullOrder, numReducers, acidOperation, hiveConf);
// 2. Generate backtrack Select operator
Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames, rsOp.getConf().getOutputKeyColumnNames(), rsOp.getConf().getOutputValueColumnNames(), rsOp.getValueIndex(), input);
SelectDesc selectDesc = new SelectDesc(new ArrayList<ExprNodeDesc>(descriptors.values()), new ArrayList<String>(descriptors.keySet()));
ArrayList<ColumnInfo> cinfoLst = createColInfosSubset(input, keepColNames);
SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(selectDesc, new RowSchema(cinfoLst), rsOp);
selectOp.setColumnExprMap(descriptors);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + selectOp + " with row schema: [" + selectOp.getSchema() + "]");
}
return selectOp;
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class TableAccessAnalyzer method genColNameMap.
/*
* This method takes in an input operator and a subset of its output
* column names, and generates the input column names for the operator
* corresponding to those outputs. If the mapping from the input column
* name to the output column name is not simple, the method returns
* false, else it returns true. The list of output column names is
* modified by this method to be the list of corresponding input column
* names.
*/
private static boolean genColNameMap(Operator<? extends OperatorDesc> op, List<String> currColNames) {
List<ExprNodeDesc> colList = null;
List<String> outputColNames = null;
assert (op.columnNamesRowResolvedCanBeObtained());
// column names
if (op instanceof SelectOperator) {
SelectDesc selectDesc = ((SelectOperator) op).getConf();
if (!selectDesc.isSelStarNoCompute()) {
colList = selectDesc.getColList();
outputColNames = selectDesc.getOutputColumnNames();
// Only columns and constants can be selected
for (int pos = 0; pos < colList.size(); pos++) {
ExprNodeDesc colExpr = colList.get(pos);
String outputColName = outputColNames.get(pos);
// If it is not a column we need for the keys, move on
if (!currColNames.contains(outputColName)) {
continue;
}
if (colExpr instanceof ExprNodeConstantDesc) {
currColNames.remove(outputColName);
continue;
} else if (colExpr instanceof ExprNodeColumnDesc) {
String inputColName = ((ExprNodeColumnDesc) colExpr).getColumn();
if (!outputColName.equals(inputColName)) {
currColNames.set(currColNames.indexOf(outputColName), inputColName);
}
} else {
// the column map can not be generated
return false;
}
}
}
}
return true;
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class ColumnPrunerProcCtx method handleFilterUnionChildren.
/**
* If the input filter operator has direct child(ren) which are union operator,
* and the filter's column is not the same as union's
* create select operator between them. The select operator has same number of columns as
* pruned child operator.
*
* @param curOp
* The filter operator which need to handle children.
* @throws SemanticException
*/
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
return;
}
List<FieldNode> parentPrunList = prunedColLists.get(curOp);
if (parentPrunList == null || parentPrunList.size() == 0) {
return;
}
List<FieldNode> prunList = null;
for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
if (child instanceof UnionOperator) {
prunList = genColLists(child);
if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
continue;
}
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
for (ColumnInfo colInfo : child.getSchema().getSignature()) {
if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
continue;
}
ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
exprs.add(colDesc);
outputColNames.add(colInfo.getInternalName());
ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
newCol.setAlias(colInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(colInfo.getInternalName(), colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputColNames, false);
curOp.removeChild(child);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
OperatorFactory.makeChild(sel, child);
sel.setColumnExprMap(colExprMap);
}
}
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class ConvertJoinMapJoin method removeCycleCreatingSemiJoinOps.
// Remove any semijoin branch associated with hashjoin's parent's operator
// pipeline which can cause a cycle after hashjoin optimization.
private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, Operator<?> parentSelectOpOfBigTable, ParseContext parseContext) throws SemanticException {
Map<ReduceSinkOperator, TableScanOperator> semiJoinMap = new HashMap<ReduceSinkOperator, TableScanOperator>();
for (Operator<?> op : parentSelectOpOfBigTable.getChildOperators()) {
if (!(op instanceof SelectOperator)) {
continue;
}
while (op.getChildOperators().size() > 0) {
op = op.getChildOperators().get(0);
}
// If not ReduceSink Op, skip
if (!(op instanceof ReduceSinkOperator)) {
continue;
}
ReduceSinkOperator rs = (ReduceSinkOperator) op;
TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(rs);
if (ts == null) {
// skip, no semijoin branch
continue;
}
// Found a semijoin branch.
for (Operator<?> parent : mapjoinOp.getParentOperators()) {
if (!(parent instanceof ReduceSinkOperator)) {
continue;
}
Set<TableScanOperator> tsOps = OperatorUtils.findOperatorsUpstream(parent, TableScanOperator.class);
for (TableScanOperator parentTS : tsOps) {
// If the parent is same as the ts, then we have a cycle.
if (ts == parentTS) {
semiJoinMap.put(rs, ts);
break;
}
}
}
}
if (semiJoinMap.size() > 0) {
for (ReduceSinkOperator rs : semiJoinMap.keySet()) {
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(parseContext, rs, semiJoinMap.get(rs));
}
}
}
use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.
the class BucketingSortingOpProcFactory method extractTraits.
static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop, Operator<?> childop) throws SemanticException {
List<ExprNodeDesc> outputValues = Collections.emptyList();
if (childop instanceof SelectOperator) {
SelectDesc select = ((SelectOperator) childop).getConf();
outputValues = ExprNodeDescUtils.backtrack(select.getColList(), childop, rop);
}
if (outputValues.isEmpty()) {
return;
}
// Go through the set of partition columns, and find their representatives in the values
// These represent the bucketed columns
List<BucketCol> bucketCols = extractBucketCols(rop, outputValues);
// Go through the set of key columns, and find their representatives in the values
// These represent the sorted columns
List<SortCol> sortCols = extractSortCols(rop, outputValues);
List<ColumnInfo> colInfos = childop.getSchema().getSignature();
if (!bucketCols.isEmpty()) {
List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
bctx.setBucketedCols(childop, newBucketCols);
}
if (!sortCols.isEmpty()) {
List<SortCol> newSortCols = getNewSortCols(sortCols, colInfos);
bctx.setSortedCols(childop, newSortCols);
}
}
Aggregations