use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class AnnotateRunTimeStatsOptimizer method setRuntimeStatsDir.
private static void setRuntimeStatsDir(Operator<? extends OperatorDesc> op, ParseContext pctx) throws SemanticException {
try {
OperatorDesc conf = op.getConf();
if (conf != null) {
LOG.info("setRuntimeStatsDir for " + op.getOperatorId());
String path = new Path(pctx.getContext().getExplainConfig().getExplainRootPath(), op.getOperatorId()).toString();
StatsPublisher statsPublisher = new FSStatsPublisher();
StatsCollectionContext runtimeStatsContext = new StatsCollectionContext(pctx.getConf());
runtimeStatsContext.setStatsTmpDir(path);
if (!statsPublisher.init(runtimeStatsContext)) {
LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
}
conf.setRuntimeStatsTmpDir(path);
} else {
LOG.debug("skip setRuntimeStatsDir for " + op.getOperatorId() + " because OperatorDesc is null");
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class HiveOpConverter method genReduceSink.
@SuppressWarnings({ "rawtypes", "unchecked" })
private static ReduceSinkOperator genReduceSink(Operator<?> input, String tableAlias, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf) throws SemanticException {
// dummy for backtracking
Operator dummy = Operator.createDummy();
dummy.setParentOperators(Arrays.asList(input));
ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
// Compute join keys and store in reduceKeys
for (ExprNodeDesc key : keys) {
reduceKeys.add(key);
reduceKeysBack.add(ExprNodeDescUtils.backtrack(key, dummy, input));
}
// Walk over the input schema and copy in the output
ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ColumnInfo> inputColumns = input.getSchema().getSignature();
ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
List<String> outputColumnNames = new ArrayList<String>();
int[] index = new int[inputColumns.size()];
for (int i = 0; i < inputColumns.size(); i++) {
ColumnInfo colInfo = inputColumns.get(i);
String outputColName = colInfo.getInternalName();
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo);
// backtrack can be null when input is script operator
ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, input);
int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
if (kindex >= 0) {
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
newColInfo.setAlias(outputColName);
newColInfo.setTabAlias(tableAlias);
outputColumns.add(newColInfo);
index[i] = kindex;
continue;
}
int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
if (vindex >= 0) {
index[i] = -vindex - 1;
continue;
}
index[i] = -reduceValues.size() - 1;
reduceValues.add(expr);
reduceValuesBack.add(exprBack);
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
newColInfo.setAlias(outputColName);
newColInfo.setTabAlias(tableAlias);
outputColumns.add(newColInfo);
outputColumnNames.add(outputColName);
}
dummy.setParentOperators(null);
// Use only 1 reducer if no reduce keys
if (reduceKeys.size() == 0) {
numReducers = 1;
// Cartesian product is not supported in strict mode
String error = StrictChecks.checkCartesian(hiveConf);
if (error != null)
throw new SemanticException(error);
}
ReduceSinkDesc rsDesc;
if (order.isEmpty()) {
rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, reduceKeys.size(), numReducers, acidOperation);
} else {
rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, partitionCols, order, nullOrder, numReducers, acidOperation);
}
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputColumns), input);
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
for (int i = 0; i < keyColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
}
List<String> valColNames = rsDesc.getOutputValueColumnNames();
for (int i = 0; i < valColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
}
rsOp.setValueIndex(index);
rsOp.setColumnExprMap(colExprMap);
rsOp.setInputAliases(input.getSchema().getTableNames().toArray(new String[input.getSchema().getTableNames().size()]));
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + rsOp + " with row schema: [" + rsOp.getSchema() + "]");
}
return rsOp;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class HiveOpConverter method translateJoin.
private OpAttr translateJoin(RelNode joinRel) throws SemanticException {
// 0. Additional data structures needed for the join optimization
// through Hive
String[] baseSrc = new String[joinRel.getInputs().size()];
String tabAlias = getHiveDerivedTableAlias();
// 1. Convert inputs
OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
for (int i = 0; i < inputs.length; i++) {
inputs[i] = dispatch(joinRel.getInput(i));
children.add(inputs[i].inputs.get(0));
baseSrc[i] = inputs[i].tabAlias;
}
// 2. Generate tags
for (int tag = 0; tag < children.size(); tag++) {
ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
reduceSinkOp.getConf().setTag(tag);
}
// 3. Virtual columns
Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
if (joinRel instanceof HiveMultiJoin || !(joinRel instanceof SemiJoin)) {
int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
for (int i = 1; i < inputs.length; i++) {
newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
}
// 4. Extract join key expressions from HiveSortExchange
ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
for (int i = 0; i < inputs.length; i++) {
joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getJoinExpressions();
}
// 5. Extract rest of join predicate info. We infer the rest of join condition
// that will be added to the filters (join conditions that are not part of
// the join key)
List<RexNode> joinFilters;
if (joinRel instanceof HiveJoin) {
joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
} else if (joinRel instanceof HiveMultiJoin) {
joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
} else if (joinRel instanceof HiveSemiJoin) {
joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
} else {
throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
}
List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
for (int i = 0; i < joinFilters.size(); i++) {
List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
if (joinFilters.get(i) != null) {
for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
ExprNodeDesc expr = convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
filterExpressionsForInput.add(expr);
}
}
filterExpressions.add(filterExpressionsForInput);
}
// 6. Generate Join operator
JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
// 7. Return result
return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class HiveOpConverter method visit.
OpAttr visit(HiveSortExchange exchangeRel) throws SemanticException {
OpAttr inputOpAf = dispatch(exchangeRel.getInput());
String tabAlias = inputOpAf.tabAlias;
if (tabAlias == null || tabAlias.length() == 0) {
tabAlias = getHiveDerivedTableAlias();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]");
}
RelDistribution distribution = exchangeRel.getDistribution();
if (distribution.getType() != Type.HASH_DISTRIBUTED) {
throw new SemanticException("Only hash distribution supported for LogicalExchange");
}
ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getJoinKeys().size()];
for (int index = 0; index < exchangeRel.getJoinKeys().size(); index++) {
expressions[index] = convertToExprNode(exchangeRel.getJoinKeys().get(index), exchangeRel.getInput(), inputOpAf.tabAlias, inputOpAf);
}
exchangeRel.setJoinExpressions(expressions);
ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), tabAlias, expressions, -1, -1, Operation.NOT_ACID, hiveConf);
return new OpAttr(tabAlias, inputOpAf.vcolsInCalcite, rsOp);
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceGroupByOperatorProcess.
/**
* We need to replace the count(indexed_column_key) GenericUDAF aggregation
* function for group-by construct to "sum" GenericUDAF. This method creates a
* new operator tree for a sample query that creates a GroupByOperator with
* sum aggregation function and uses that GroupByOperator information to
* replace the original GroupByOperator aggregation information. It replaces
* the AggregationDesc (aggregation descriptor) of the old GroupByOperator
* with the new Aggregation Desc of the new GroupByOperator.
* @return
*/
private void replaceGroupByOperatorProcess(GroupByOperator operator, int index) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
// We need to replace the GroupByOperator which is before RS
if (index == 0) {
// the query contains the sum aggregation GenericUDAF
String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + " from `" + rewriteQueryCtx.getIndexName() + "` group by " + rewriteQueryCtx.getIndexKey() + " ";
// retrieve the operator tree for the query, and the required GroupByOperator from it
Operator<?> newOperatorTree = RewriteParseContextGenerator.generateOperatorTree(rewriteQueryCtx.getParseContext().getQueryState(), selReplacementCommand);
// we get our new GroupByOperator here
GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream(newOperatorTree, GroupByOperator.class);
if (newGbyOperator == null) {
throw new SemanticException("Error replacing GroupBy operator.");
}
// we need this information to set the correct colList, outputColumnNames
// in SelectOperator
ExprNodeColumnDesc aggrExprNode = null;
// Construct the new AggregationDesc to get rid of the current
// internal names and replace them with new internal names
// as required by the operator tree
GroupByDesc newConf = newGbyOperator.getConf();
List<AggregationDesc> newAggrList = newConf.getAggregators();
if (newAggrList != null && newAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : newAggrList) {
rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0);
rewriteQueryCtx.setAggrExprNode(aggrExprNode);
}
}
// Now the GroupByOperator has the new AggregationList;
// sum(`_count_of_indexed_key`)
// instead of count(indexed_key)
GroupByDesc oldConf = operator.getConf();
oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
operator.setConf(oldConf);
} else {
// we just need to reset the GenericUDAFEvaluator and its name for this
// GroupByOperator whose parent is the ReduceSinkOperator
GroupByDesc childConf = operator.getConf();
List<AggregationDesc> childAggrList = childConf.getAggregators();
if (childAggrList != null && childAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : childAggrList) {
List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
for (ExprNodeDesc expr : paraList) {
parametersOIList.add(expr.getWritableObjectInspector());
}
GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum", parametersOIList, false, false);
aggregationDesc.setGenericUDAFEvaluator(evaluator);
aggregationDesc.setGenericUDAFName("sum");
}
}
}
}
Aggregations