use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveOpConverter method genPTF.
private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticException {
Operator<?> input = inputOpAf.inputs.get(0);
wSpec.validateAndMakeEffective();
WindowingComponentizer groups = new WindowingComponentizer(wSpec);
RowResolver rr = new RowResolver();
for (ColumnInfo ci : input.getSchema().getSignature()) {
rr.put(inputOpAf.tabAlias, ci.getInternalName(), ci);
}
while (groups.hasNext()) {
wSpec = groups.next(hiveConf, semanticAnalyzer, unparseTranslator, rr);
// 1. Create RS and backtrack Select operator on top
ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
StringBuilder order = new StringBuilder();
StringBuilder nullOrder = new StringBuilder();
for (PartitionExpression partCol : wSpec.getQueryPartitionSpec().getExpressions()) {
ExprNodeDesc partExpr = semanticAnalyzer.genExprNodeDesc(partCol.getExpression(), rr);
if (ExprNodeDescUtils.indexOf(partExpr, partCols) < 0) {
keyCols.add(partExpr);
partCols.add(partExpr);
order.append('+');
nullOrder.append('a');
}
}
if (wSpec.getQueryOrderSpec() != null) {
for (OrderExpression orderCol : wSpec.getQueryOrderSpec().getExpressions()) {
ExprNodeDesc orderExpr = semanticAnalyzer.genExprNodeDesc(orderCol.getExpression(), rr);
char orderChar = orderCol.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-';
char nullOrderChar = orderCol.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z';
int index = ExprNodeDescUtils.indexOf(orderExpr, keyCols);
if (index >= 0) {
order.setCharAt(index, orderChar);
nullOrder.setCharAt(index, nullOrderChar);
continue;
}
keyCols.add(orderExpr);
order.append(orderChar);
nullOrder.append(nullOrderChar);
}
}
SelectOperator selectOp = genReduceSinkAndBacktrackSelect(input, keyCols.toArray(new ExprNodeDesc[keyCols.size()]), 0, partCols, order.toString(), nullOrder.toString(), -1, Operation.NOT_ACID, hiveConf);
// 2. Finally create PTF
PTFTranslator translator = new PTFTranslator();
PTFDesc ptfDesc = translator.translate(wSpec, semanticAnalyzer, hiveConf, rr, unparseTranslator);
RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
Operator<?> ptfOp = OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), selectOp);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + ptfOp + " with row schema: [" + ptfOp.getSchema() + "]");
}
// 3. Prepare for next iteration (if any)
rr = ptfOpRR;
input = ptfOp;
}
return inputOpAf.clone(input);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveOpConverter method visit.
OpAttr visit(HiveUnion unionRel) throws SemanticException {
// 1. Convert inputs
List<RelNode> inputsList = extractRelNodeFromUnion(unionRel);
OpAttr[] inputs = new OpAttr[inputsList.size()];
for (int i = 0; i < inputs.length; i++) {
inputs[i] = dispatch(inputsList.get(i));
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName() + " with row type: [" + unionRel.getRowType() + "]");
}
// 2. Create a new union operator
UnionDesc unionDesc = new UnionDesc();
unionDesc.setNumInputs(inputs.length);
String tableAlias = getHiveDerivedTableAlias();
ArrayList<ColumnInfo> cinfoLst = createColInfos(inputs[0].inputs.get(0), tableAlias);
Operator<?>[] children = new Operator<?>[inputs.length];
for (int i = 0; i < children.length; i++) {
if (i == 0) {
children[i] = inputs[i].inputs.get(0);
} else {
Operator<?> op = inputs[i].inputs.get(0);
// We need to check if the other input branches for union is following the first branch
// We may need to cast the data types for specific columns.
children[i] = genInputSelectForUnion(op, cinfoLst);
}
}
Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild(semanticAnalyzer.getOpContext(), unionDesc, new RowSchema(cinfoLst), children);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]");
}
// 3. Return result
return new OpAttr(tableAlias, inputs[0].vcolsInCalcite, unionOp);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGBRS.
private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<String> outputKeyColumnNames = new ArrayList<String>();
List<String> outputValueColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0);
ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
int keyLength = reduceKeys.size();
if (inclGrpSetInMapSide(gbInfo)) {
addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap);
keyLength++;
}
if (mapGB.getConf().getKeys().size() > reduceKeys.size()) {
// NOTE: All dist cols have single output col name;
reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys().size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false));
} else if (!gbInfo.distColIndices.isEmpty()) {
// This is the case where distinct cols are part of GB Keys in which case
// we still need to add it to out put col names
outputKeyColumnNames.add(SemanticAnalyzer.getColumnInternalName(reduceKeys.size()));
}
ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys().size(), outputValueColumnNames, colInfoLst, colExprMap, false, false);
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB);
rsOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsOp);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceSelectOperatorProcess.
/**
* This method replaces the original SelectOperator with the new
* SelectOperator with a new column indexed_key_column.
*/
private void replaceSelectOperatorProcess(SelectOperator operator) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
// we need to set the colList, outputColumnNames, colExprMap,
// rowSchema for only that SelectOperator which precedes the GroupByOperator
// count(indexed_key_column) needs to be replaced by
// sum(`_count_of_indexed_key_column`)
List<ExprNodeDesc> selColList = operator.getConf().getColList();
selColList.add(rewriteQueryCtx.getAggrExprNode());
List<String> selOutputColNames = operator.getConf().getOutputColumnNames();
selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn());
operator.getColumnExprMap().put(rewriteQueryCtx.getAggrExprNode().getColumn(), rewriteQueryCtx.getAggrExprNode());
RowSchema selRS = operator.getSchema();
List<ColumnInfo> selRSSignature = selRS.getSignature();
// Need to create a new type for Column[_count_of_indexed_key_column] node
PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo("bigint");
pti.setTypeName("bigint");
ColumnInfo newCI = new ColumnInfo(rewriteQueryCtx.getAggregateFunction(), pti, "", false);
selRSSignature.add(newCI);
selRS.setSignature((ArrayList<ColumnInfo>) selRSSignature);
operator.setSchema(selRS);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceTableScanProcess.
/**
* This method replaces the original TableScanOperator with the new
* TableScanOperator and metadata that scans over the index table rather than
* scanning over the original table.
*
*/
private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
String alias = rewriteQueryCtx.getAlias();
// Need to remove the original TableScanOperators from these data structures
// and add new ones
HashMap<String, TableScanOperator> topOps = rewriteQueryCtx.getParseContext().getTopOps();
// remove original TableScanOperator
topOps.remove(alias);
String indexTableName = rewriteQueryCtx.getIndexName();
Table indexTableHandle = null;
try {
indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
} catch (HiveException e) {
LOG.error("Error while getting the table handle for index table.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
// construct a new descriptor for the index table scan
TableScanDesc indexTableScanDesc = new TableScanDesc(indexTableHandle);
indexTableScanDesc.setGatherStats(false);
String k = MetaStoreUtils.encodeTableName(indexTableName) + Path.SEPARATOR;
indexTableScanDesc.setStatsAggPrefix(k);
scanOperator.setConf(indexTableScanDesc);
// Construct the new RowResolver for the new TableScanOperator
ArrayList<ColumnInfo> sigRS = new ArrayList<ColumnInfo>();
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector();
StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey());
sigRS.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), indexTableName, false));
} catch (SerDeException e) {
LOG.error("Error while creating the RowResolver for new TableScanOperator.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
RowSchema rs = new RowSchema(sigRS);
// Set row resolver for new table
String newAlias = indexTableName;
int index = alias.lastIndexOf(":");
if (index >= 0) {
newAlias = alias.substring(0, index) + ":" + indexTableName;
}
// Scan operator now points to other table
scanOperator.getConf().setAlias(newAlias);
scanOperator.setAlias(indexTableName);
topOps.put(newAlias, scanOperator);
rewriteQueryCtx.getParseContext().setTopOps(topOps);
ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rs, Arrays.asList(new FieldNode(rewriteQueryCtx.getIndexKey())));
}
Aggregations