use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class SemanticAnalyzer method genSelectAllDesc.
private Operator genSelectAllDesc(Operator input) {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
List<ColumnInfo> columns = inputRR.getColumnInfos();
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
List<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
for (ColumnInfo col : columns) {
colList.add(new ExprNodeColumnDesc(col, true));
columnNames.add(col.getInternalName());
columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true));
}
RowResolver outputRR = inputRR.duplicate();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR);
output.setColumnExprMap(columnExprMap);
return output;
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class SemanticAnalyzer method genLateralViewPlan.
private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) throws SemanticException {
RowResolver lvForwardRR = new RowResolver();
RowResolver source = opParseCtx.get(op).getRowResolver();
Map<String, ExprNodeDesc> lvfColExprMap = new HashMap<String, ExprNodeDesc>();
Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
List<String> colNames = new ArrayList<String>();
for (ColumnInfo col : source.getColumnInfos()) {
String[] tabCol = source.reverseLookup(col.getInternalName());
lvForwardRR.put(tabCol[0], tabCol[1], col);
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(col);
colList.add(colExpr);
colNames.add(colExpr.getColumn());
lvfColExprMap.put(col.getInternalName(), colExpr);
selColExprMap.put(col.getInternalName(), colExpr.clone());
}
Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), op), lvForwardRR);
lvForward.setColumnExprMap(lvfColExprMap);
// The order in which the two paths are added is important. The
// lateral view join operator depends on having the select operator
// give it the row first.
// Get the all path by making a select(*).
RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
// Operator allPath = op;
SelectDesc sDesc = new SelectDesc(colList, colNames, false);
sDesc.setSelStarNoCompute(true);
Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(sDesc, new RowSchema(allPathRR.getColumnInfos()), lvForward), allPathRR);
allPath.setColumnExprMap(selColExprMap);
int allColumns = allPathRR.getColumnInfos().size();
// Get the UDTF Path
QB blankQb = new QB(null, null, false);
Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree.getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER);
// add udtf aliases to QB
for (String udtfAlias : blankQb.getAliases()) {
qb.addAlias(udtfAlias);
}
RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();
// Merge the two into the lateral view join
// The cols of the merged result will be the combination of both the
// cols of the UDTF path and the cols of the all path. The internal
// names have to be changed to avoid conflicts
RowResolver lateralViewRR = new RowResolver();
List<String> outputInternalColNames = new ArrayList<String>();
// For PPD, we need a column to expression map so that during the walk,
// the processor knows how to transform the internal col names.
// Following steps are dependant on the fact that we called
// LVmerge.. in the above order
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
LVmergeRowResolvers(allPathRR, lateralViewRR, colExprMap, outputInternalColNames);
LVmergeRowResolvers(udtfPathRR, lateralViewRR, colExprMap, outputInternalColNames);
Operator lateralViewJoin = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewJoinDesc(allColumns, outputInternalColNames), new RowSchema(lateralViewRR.getColumnInfos()), allPath, udtfPath), lateralViewRR);
lateralViewJoin.setColumnExprMap(colExprMap);
return lateralViewJoin;
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class SemanticAnalyzer method genMaterializedViewDataOrgPlan.
private Operator genMaterializedViewDataOrgPlan(List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos, RowResolver inputRR, Operator input) {
// In this case, we will introduce a RS and immediately after a SEL that restores
// the row schema to what follow-up operations are expecting
Set<String> keys = sortColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
Set<String> distributeKeys = distributeColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
List<ExprNodeDesc> keyCols = new ArrayList<>();
List<String> keyColNames = new ArrayList<>();
StringBuilder order = new StringBuilder();
StringBuilder nullOrder = new StringBuilder();
List<ExprNodeDesc> valCols = new ArrayList<>();
List<String> valColNames = new ArrayList<>();
List<ExprNodeDesc> partCols = new ArrayList<>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
Map<String, String> nameMapping = new HashMap<>();
// map _col0 to KEY._col0, etc
for (ColumnInfo ci : inputRR.getRowSchema().getSignature()) {
ExprNodeColumnDesc e = new ExprNodeColumnDesc(ci);
String columnName = ci.getInternalName();
if (keys.contains(columnName)) {
// key (sort column)
keyColNames.add(columnName);
keyCols.add(e);
colExprMap.put(Utilities.ReduceField.KEY + "." + columnName, e);
nameMapping.put(columnName, Utilities.ReduceField.KEY + "." + columnName);
order.append("+");
nullOrder.append("a");
} else {
// value
valColNames.add(columnName);
valCols.add(e);
colExprMap.put(Utilities.ReduceField.VALUE + "." + columnName, e);
nameMapping.put(columnName, Utilities.ReduceField.VALUE + "." + columnName);
}
if (distributeKeys.contains(columnName)) {
// distribute column
partCols.add(e.clone());
}
}
// Create Key/Value TableDesc. When the operator plan is split into MR tasks,
// the reduce operator will initialize Extract operator with information
// from Key and Value TableDesc
List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(keyCols, keyColNames, 0, "");
TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, order.toString(), nullOrder.toString());
List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(valCols, valColNames, 0, "");
TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
List<List<Integer>> distinctColumnIndices = new ArrayList<>();
// Number of reducers is set to default (-1)
ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, valueTable, Operation.NOT_ACID);
RowResolver rsRR = new RowResolver();
List<ColumnInfo> rsSignature = new ArrayList<>();
for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
colInfo.setInternalName(nameMapping.get(colInfo.getInternalName()));
rsSignature.add(colInfo);
rsRR.put(nm[0], nm[1], colInfo);
if (nm2 != null) {
rsRR.addMappingOnly(nm2[0], nm2[1], colInfo);
}
}
Operator<?> result = putOpInsertMap(OperatorFactory.getAndMakeChild(rsConf, new RowSchema(rsSignature), input), rsRR);
result.setColumnExprMap(colExprMap);
// Create SEL operator
RowResolver selRR = new RowResolver();
List<ColumnInfo> selSignature = new ArrayList<>();
List<ExprNodeDesc> columnExprs = new ArrayList<>();
List<String> colNames = new ArrayList<>();
Map<String, ExprNodeDesc> selColExprMap = new HashMap<>();
for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
selSignature.add(colInfo);
selRR.put(nm[0], nm[1], colInfo);
if (nm2 != null) {
selRR.addMappingOnly(nm2[0], nm2[1], colInfo);
}
String colName = colInfo.getInternalName();
ExprNodeDesc exprNodeDesc;
if (keys.contains(colName)) {
exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.KEY.toString() + "." + colName, null, false);
columnExprs.add(exprNodeDesc);
} else {
exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.VALUE.toString() + "." + colName, null, false);
columnExprs.add(exprNodeDesc);
}
colNames.add(colName);
selColExprMap.put(colName, exprNodeDesc);
}
SelectDesc selConf = new SelectDesc(columnExprs, colNames);
result = putOpInsertMap(OperatorFactory.getAndMakeChild(selConf, new RowSchema(selSignature), result), selRR);
result.setColumnExprMap(selColExprMap);
return result;
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class TestExecDriver method populateMapRedPlan5.
@SuppressWarnings("unchecked")
private void populateMapRedPlan5(Table src) throws SemanticException {
// map-side work
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
Operator<ReduceSinkDesc> op0 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("0")), Utilities.makeList(getStringColumn("0"), getStringColumn("1")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
addMapWork(mr, src, "a", op4);
ReduceWork rWork = new ReduceWork();
mr.setReduceWork(rWork);
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setKeyDesc(op0.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan5.out"), Utilities.defaultTd, false));
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
rWork.setReducer(op2);
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class TestExecDriver method populateMapRedPlan2.
@SuppressWarnings("unchecked")
private void populateMapRedPlan2(Table src) throws Exception {
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
// map-side work
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
addMapWork(mr, src, "a", op1);
ReduceWork rWork = new ReduceWork();
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
mr.setReduceWork(rWork);
// reduce side work
Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan2.out"), Utilities.defaultTd, false));
Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
rWork.setReducer(op2);
}
Aggregations