use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genUDTFPlan.
private Operator genUDTFPlan(GenericUDTF genericUDTF, String outputTableAlias, ArrayList<String> colAliases, QB qb, Operator input, boolean outerLV) throws SemanticException {
// No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
QBParseInfo qbp = qb.getParseInfo();
if (!qbp.getDestToGroupBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
}
if (!qbp.getDestToDistributeBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
}
if (!qbp.getDestToSortBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
}
if (!qbp.getDestToClusterBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
}
if (!qbp.getAliasToLateralViews().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
}
// Use the RowResolver from the input operator to generate a input
// ObjectInspector that can be used to initialize the UDTF. Then, the
// resulting output object inspector can be used to make the RowResolver
// for the UDTF operator
RowResolver selectRR = opParseCtx.get(input).getRowResolver();
ArrayList<ColumnInfo> inputCols = selectRR.getColumnInfos();
// Create the object inspector for the input columns and initialize the UDTF
ArrayList<String> colNames = new ArrayList<String>();
ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()];
for (int i = 0; i < inputCols.size(); i++) {
colNames.add(inputCols.get(i).getInternalName());
colOIs[i] = inputCols.get(i).getObjectInspector();
}
StandardStructObjectInspector rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, Arrays.asList(colOIs));
StructObjectInspector outputOI = genericUDTF.initialize(rowOI);
int numUdtfCols = outputOI.getAllStructFieldRefs().size();
if (colAliases.isEmpty()) {
// user did not specfied alias names, infer names from outputOI
for (StructField field : outputOI.getAllStructFieldRefs()) {
colAliases.add(field.getFieldName());
}
}
// Make sure that the number of column aliases in the AS clause matches
// the number of columns output by the UDTF
int numSuppliedAliases = colAliases.size();
if (numUdtfCols != numSuppliedAliases) {
throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numUdtfCols + " aliases " + "but got " + numSuppliedAliases));
}
// Generate the output column info's / row resolver using internal names.
ArrayList<ColumnInfo> udtfCols = new ArrayList<ColumnInfo>();
Iterator<String> colAliasesIter = colAliases.iterator();
for (StructField sf : outputOI.getAllStructFieldRefs()) {
String colAlias = colAliasesIter.next();
assert (colAlias != null);
// Since the UDTF operator feeds into a LVJ operator that will rename
// all the internal names, we can just use field name from the UDTF's OI
// as the internal name
ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
udtfCols.add(col);
}
// Create the row resolver for this operator from the output columns
RowResolver out_rwsch = new RowResolver();
for (int i = 0; i < udtfCols.size(); i++) {
out_rwsch.put(outputTableAlias, colAliases.get(i), udtfCols.get(i));
}
// Add the UDTFOperator to the operator DAG
Operator<?> udtf = putOpInsertMap(OperatorFactory.getAndMakeChild(new UDTFDesc(genericUDTF, outerLV), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
return udtf;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genMapGroupByForSemijoin.
private // the
Operator genMapGroupByForSemijoin(// the
QB qb, // the
ArrayList<ASTNode> fields, // "tab.col"
Operator inputOperatorInfo, GroupByDesc.Mode mode) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
qb.getParseInfo();
// join keys should only
groupByOutputRowResolver.setIsExprResolver(true);
for (int i = 0; i < fields.size(); ++i) {
// get the group by keys to ColumnInfo
ASTNode colName = fields.get(i);
ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
groupByKeys.add(grpByExprNode);
// generate output column names
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
groupByOutputRowResolver.putExpression(colName, colInfo2);
// establish mapping from the output column to the input column
colExprMap.put(field, grpByExprNode);
}
// Generate group-by operator
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genPTFPlanForComponentQuery.
private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input) throws SemanticException {
/*
* 1. Create the PTFDesc from the Qspec attached to this QB.
*/
RowResolver rr = opParseCtx.get(input).getRowResolver();
PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* 2. build Map-side Op Graph. Graph template is either:
* Input -> PTF_map -> ReduceSink
* or
* Input -> ReduceSink
*
* Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR.
*/
{
PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain();
/*
* a. add Map-side PTF Operator if needed
*/
if (tabDef.isTransformsRawInput()) {
RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();
ptfDesc.setMapSide(true);
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR);
rr = opParseCtx.get(input).getRowResolver();
}
/*
* b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
*/
ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
StringBuilder orderString = new StringBuilder();
StringBuilder nullOrderString = new StringBuilder();
/*
* Use the input RR of TableScanOperator in case there is no map-side
* reshape of input.
* If the parent of ReduceSinkOperator is PTFOperator, use it's
* output RR.
*/
buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString, nullOrderString);
input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), nullOrderString.toString(), -1, Operation.NOT_ACID);
}
/*
* 3. build Reduce-side Op Graph
*/
{
/*
* c. Rebuilt the QueryDef.
* Why?
* - so that the ExprNodeDescriptors in the QueryDef are based on the
* Select Operator's RowResolver
*/
rr = opParseCtx.get(input).getRowResolver();
ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* d. Construct PTF Operator.
*/
RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR);
}
return input;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genReduceSinkPlan.
@SuppressWarnings("nls")
private Operator genReduceSinkPlan(Operator<?> input, ArrayList<ExprNodeDesc> partitionCols, ArrayList<ExprNodeDesc> sortCols, String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, boolean pullConstants) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
Operator dummy = Operator.createDummy();
dummy.setParentOperators(Arrays.asList(input));
ArrayList<ExprNodeDesc> newSortCols = new ArrayList<ExprNodeDesc>();
StringBuilder newSortOrder = new StringBuilder();
StringBuilder newNullOrder = new StringBuilder();
ArrayList<ExprNodeDesc> sortColsBack = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < sortCols.size(); i++) {
ExprNodeDesc sortCol = sortCols.get(i);
// we are pulling constants but this is not a constant
if (!pullConstants || !(sortCol instanceof ExprNodeConstantDesc)) {
newSortCols.add(sortCol);
newSortOrder.append(sortOrder.charAt(i));
newNullOrder.append(nullOrder.charAt(i));
sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input));
}
}
// For the generation of the values expression just get the inputs
// signature and generate field expressions for those
RowResolver rsRR = new RowResolver();
ArrayList<String> outputColumns = new ArrayList<String>();
ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> valueColsBack = new ArrayList<ExprNodeDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<ExprNodeDesc> constantCols = new ArrayList<ExprNodeDesc>();
ArrayList<ColumnInfo> columnInfos = inputRR.getColumnInfos();
int[] index = new int[columnInfos.size()];
for (int i = 0; i < index.length; i++) {
ColumnInfo colInfo = columnInfos.get(i);
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo);
// backtrack can be null when input is script operator
ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input);
if (pullConstants && valueBack instanceof ExprNodeConstantDesc) {
// ignore, it will be generated by SEL op
index[i] = Integer.MAX_VALUE;
constantCols.add(valueBack);
continue;
}
int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack);
if (kindex >= 0) {
index[i] = kindex;
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
newColInfo.setTabAlias(nm[0]);
rsRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
continue;
}
int vindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, valueColsBack);
if (vindex >= 0) {
index[i] = -vindex - 1;
continue;
}
index[i] = -valueCols.size() - 1;
String outputColName = getColumnInternalName(valueCols.size());
valueCols.add(value);
valueColsBack.add(valueBack);
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
newColInfo.setTabAlias(nm[0]);
rsRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
outputColumns.add(outputColName);
}
dummy.setParentOperators(null);
ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(newSortCols, valueCols, outputColumns, false, -1, partitionCols, newSortOrder.toString(), newNullOrder.toString(), numReducers, acidOp);
Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR);
List<String> keyColNames = rsdesc.getOutputKeyColumnNames();
for (int i = 0; i < keyColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), sortCols.get(i));
}
List<String> valueColNames = rsdesc.getOutputValueColumnNames();
for (int i = 0; i < valueColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i));
}
interim.setColumnExprMap(colExprMap);
RowResolver selectRR = new RowResolver();
ArrayList<ExprNodeDesc> selCols = new ArrayList<ExprNodeDesc>();
ArrayList<String> selOutputCols = new ArrayList<String>();
Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
Iterator<ExprNodeDesc> constants = constantCols.iterator();
for (int i = 0; i < index.length; i++) {
ColumnInfo prev = columnInfos.get(i);
String[] nm = inputRR.reverseLookup(prev.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName());
ColumnInfo info = new ColumnInfo(prev);
ExprNodeDesc desc;
if (index[i] == Integer.MAX_VALUE) {
desc = constants.next();
} else {
String field;
if (index[i] >= 0) {
field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
} else {
field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
}
desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
}
selCols.add(desc);
String internalName = getColumnInternalName(i);
info.setInternalName(internalName);
selectRR.put(nm[0], nm[1], info);
if (nm2 != null) {
selectRR.addMappingOnly(nm2[0], nm2[1], info);
}
selOutputCols.add(internalName);
selColExprMap.put(internalName, desc);
}
SelectDesc select = new SelectDesc(selCols, selOutputCols);
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(select, new RowSchema(selectRR.getColumnInfos()), interim), selectRR);
output.setColumnExprMap(selColExprMap);
return output;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SemanticAnalyzer method genUnionPlan.
@SuppressWarnings("nls")
private Operator genUnionPlan(String unionalias, String leftalias, Operator leftOp, String rightalias, Operator rightOp) throws SemanticException {
// Currently, the unions are not merged - each union has only 2 parents. So,
// a n-way union will lead to (n-1) union operators.
// This can be easily merged into 1 union
RowResolver leftRR = opParseCtx.get(leftOp).getRowResolver();
RowResolver rightRR = opParseCtx.get(rightOp).getRowResolver();
LinkedHashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
LinkedHashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
// make sure the schemas of both sides are the same
ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias(qb.getAliases().get(0));
if (leftmap.size() != rightmap.size()) {
throw new SemanticException("Schema of both sides of union should match.");
}
RowResolver unionoutRR = new RowResolver();
Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator();
Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator();
while (lIter.hasNext()) {
Map.Entry<String, ColumnInfo> lEntry = lIter.next();
Map.Entry<String, ColumnInfo> rEntry = rIter.next();
ColumnInfo lInfo = lEntry.getValue();
ColumnInfo rInfo = rEntry.getValue();
// use left alias (~mysql, postgresql)
String field = lEntry.getKey();
// try widening conversion, otherwise fail union
TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType());
if (commonTypeInfo == null) {
throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table"));
}
ColumnInfo unionColInfo = new ColumnInfo(lInfo);
unionColInfo.setType(commonTypeInfo);
unionoutRR.put(unionalias, field, unionColInfo);
}
// For Spark,TEZ we rely on the generated SelectOperator to do the type casting.
// Consider:
// SEL_1 (int) SEL_2 (int) SEL_3 (double)
// If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1
// with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error
// will happen afterwards. The solution here is to insert one after UNION_1, which
// cast int to double.
boolean isMR = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr");
if (!isMR || !(leftOp instanceof UnionOperator)) {
leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias);
}
if (!isMR || !(rightOp instanceof UnionOperator)) {
rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias);
}
// else create a new one
if (leftOp instanceof UnionOperator || (leftOp instanceof SelectOperator && leftOp.getParentOperators() != null && !leftOp.getParentOperators().isEmpty() && leftOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) leftOp).isIdentitySelect())) {
if (!(leftOp instanceof UnionOperator)) {
Operator oldChild = leftOp;
leftOp = (Operator) leftOp.getParentOperators().get(0);
leftOp.removeChildAndAdoptItsChildren(oldChild);
}
// make left a child of right
List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
child.add(leftOp);
rightOp.setChildOperators(child);
List<Operator<? extends OperatorDesc>> parent = leftOp.getParentOperators();
parent.add(rightOp);
UnionDesc uDesc = ((UnionOperator) leftOp).getConf();
uDesc.setNumInputs(uDesc.getNumInputs() + 1);
return putOpInsertMap(leftOp, unionoutRR);
}
if (rightOp instanceof UnionOperator || (rightOp instanceof SelectOperator && rightOp.getParentOperators() != null && !rightOp.getParentOperators().isEmpty() && rightOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) rightOp).isIdentitySelect())) {
if (!(rightOp instanceof UnionOperator)) {
Operator oldChild = rightOp;
rightOp = (Operator) rightOp.getParentOperators().get(0);
rightOp.removeChildAndAdoptItsChildren(oldChild);
}
// make right a child of left
List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
child.add(rightOp);
leftOp.setChildOperators(child);
List<Operator<? extends OperatorDesc>> parent = rightOp.getParentOperators();
parent.add(leftOp);
UnionDesc uDesc = ((UnionOperator) rightOp).getConf();
uDesc.setNumInputs(uDesc.getNumInputs() + 1);
return putOpInsertMap(rightOp, unionoutRR);
}
// Create a new union operator
Operator<? extends OperatorDesc> unionforward = OperatorFactory.getAndMakeChild(getOpContext(), new UnionDesc(), new RowSchema(unionoutRR.getColumnInfos()));
// set union operator as child of each of leftOp and rightOp
List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
child.add(unionforward);
rightOp.setChildOperators(child);
child = new ArrayList<Operator<? extends OperatorDesc>>();
child.add(unionforward);
leftOp.setChildOperators(child);
List<Operator<? extends OperatorDesc>> parent = new ArrayList<Operator<? extends OperatorDesc>>();
parent.add(leftOp);
parent.add(rightOp);
unionforward.setParentOperators(parent);
// create operator info list to return
return putOpInsertMap(unionforward, unionoutRR);
}
Aggregations