use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class Registry method getQualifiedFunctionInfoUnderLock.
private FunctionInfo getQualifiedFunctionInfoUnderLock(String qualifiedName) throws SemanticException {
FunctionInfo info = mFunctions.get(qualifiedName);
if (info != null && info.isBlockedFunction()) {
throw new SemanticException("UDF " + qualifiedName + " is not allowed");
}
if (!isNative && info != null && info.isDiscarded()) {
// the persistent function is discarded. try reload
mFunctions.remove(qualifiedName);
return null;
}
// and if necessary load the JARs in this thread.
if (isNative && info != null && info.isPersistent()) {
return registerToSessionRegistry(qualifiedName, info);
}
if (info != null || !isNative) {
// We have the UDF, or we are in the session registry (or both).
return info;
}
// If we are in the system registry and this feature is enabled, try to get it from metastore.
SessionState ss = SessionState.get();
HiveConf conf = (ss == null) ? null : ss.getConf();
if (conf == null || !HiveConf.getBoolVar(conf, ConfVars.HIVE_ALLOW_UDF_LOAD_ON_DEMAND)) {
return null;
}
// This is a little bit weird. We'll do the MS call outside of the lock. Our caller calls us
// under lock, so we'd preserve the lock state for them; their finally block will release the
// lock correctly. See the comment on the lock field - the locking needs to be reworked.
lock.unlock();
try {
return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf);
} finally {
lock.lock();
}
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class IndexWhereProcessor method process.
@Override
public /**
* Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
TableScanOperator operator = (TableScanOperator) nd;
List<Node> opChildren = operator.getChildren();
TableScanDesc operatorDesc = operator.getConf();
if (operatorDesc == null || !tsToIndices.containsKey(operator)) {
return null;
}
List<Index> indexes = tsToIndices.get(operator);
ExprNodeDesc predicate = operatorDesc.getFilterExpr();
IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
ParseContext pctx = context.getParseContext();
LOG.info("Processing predicate for index optimization");
if (predicate == null) {
LOG.info("null predicate pushed down");
return null;
}
LOG.info(predicate.getExprString());
// check if we have tsToIndices on all partitions in this table scan
Set<Partition> queryPartitions;
try {
queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(operator, pctx, indexes);
if (queryPartitions == null) {
// partitions not covered
return null;
}
} catch (HiveException e) {
LOG.error("Fatal Error: problem accessing metastore", e);
throw new SemanticException(e);
}
// we can only process MapReduce tasks to check input size
if (!context.getCurrentTask().isMapRedTask()) {
return null;
}
MapRedTask currentTask = (MapRedTask) context.getCurrentTask();
// get potential reentrant index queries from each index
Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
// make sure we have an index on the table being scanned
TableDesc tblDesc = operator.getTableDesc();
Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
for (Index indexOnTable : indexes) {
if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
List<Index> newType = new ArrayList<Index>();
newType.add(indexOnTable);
indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
} else {
indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
}
}
// choose index type with most tsToIndices of the same type on the table
// TODO HIVE-2130 This would be a good place for some sort of cost based choice?
List<Index> bestIndexes = indexesByType.values().iterator().next();
for (List<Index> indexTypes : indexesByType.values()) {
if (bestIndexes.size() < indexTypes.size()) {
bestIndexes = indexTypes;
}
}
// rewrite index queries for the chosen index type
HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
tmpQueryContext.setQueryPartitions(queryPartitions);
rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();
if (indexTasks != null && indexTasks.size() > 0) {
queryContexts.put(bestIndexes.get(0), tmpQueryContext);
}
// choose an index rewrite to use
if (queryContexts.size() > 0) {
// TODO HIVE-2130 This would be a good place for some sort of cost based choice?
Index chosenIndex = queryContexts.keySet().iterator().next();
// modify the parse context to use indexing
// we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);
// prepare the map reduce job to use indexing
MapWork work = currentTask.getWork().getMapWork();
work.setInputformat(queryContext.getIndexInputFormat());
work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
// modify inputs based on index query
Set<ReadEntity> inputs = pctx.getSemanticInputs();
inputs.addAll(queryContext.getAdditionalSemanticInputs());
List<Task<?>> chosenRewrite = queryContext.getQueryTasks();
// add dependencies so index query runs first
insertIndexQuery(pctx, context, chosenRewrite);
}
return null;
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class TableBasedIndexHandler method generateIndexBuildTaskList.
@Override
public List<Task<?>> generateIndexBuildTaskList(org.apache.hadoop.hive.ql.metadata.Table baseTbl, org.apache.hadoop.hive.metastore.api.Index index, List<Partition> indexTblPartitions, List<Partition> baseTblPartitions, org.apache.hadoop.hive.ql.metadata.Table indexTbl, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws HiveException {
try {
TableDesc desc = Utilities.getTableDesc(indexTbl);
List<Partition> newBaseTblPartitions = new ArrayList<Partition>();
List<Task<?>> indexBuilderTasks = new ArrayList<Task<?>>();
if (!baseTbl.isPartitioned()) {
// the table does not have any partition, then create index for the
// whole table
Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, false, new PartitionDesc(desc, null), indexTbl.getTableName(), new PartitionDesc(Utilities.getTableDesc(baseTbl), null), baseTbl.getTableName(), indexTbl.getDbName());
indexBuilderTasks.add(indexBuilder);
} else {
// table
for (int i = 0; i < indexTblPartitions.size(); i++) {
Partition indexPart = indexTblPartitions.get(i);
Partition basePart = null;
for (int j = 0; j < baseTblPartitions.size(); j++) {
if (baseTblPartitions.get(j).getName().equals(indexPart.getName())) {
basePart = baseTblPartitions.get(j);
newBaseTblPartitions.add(baseTblPartitions.get(j));
break;
}
}
if (basePart == null) {
throw new RuntimeException("Partitions of base table and index table are inconsistent.");
}
// for each partition, spawn a map reduce task.
Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, true, new PartitionDesc(indexPart), indexTbl.getTableName(), new PartitionDesc(basePart), baseTbl.getTableName(), indexTbl.getDbName());
indexBuilderTasks.add(indexBuilder);
}
}
return indexBuilderTasks;
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class HiveOpConverter method visit.
OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
OpAttr inputOpAf = dispatch(sortRel.getInput());
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " with row type: [" + sortRel.getRowType() + "]");
if (sortRel.getCollation() == RelCollations.EMPTY) {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of limit");
} else if (sortRel.fetch == null) {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort");
} else {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort+limit");
}
}
Operator<?> inputOp = inputOpAf.inputs.get(0);
Operator<?> resultOp = inputOpAf.inputs.get(0);
// of their columns
if (sortRel.getCollation() != RelCollations.EMPTY) {
// In strict mode, in the presence of order by, limit must be specified.
if (sortRel.fetch == null) {
String error = StrictChecks.checkNoLimit(hiveConf);
if (error != null)
throw new SemanticException(error);
}
// 1.a. Extract order for each column from collation
// Generate sortCols and order
ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
Map<Integer, RexNode> obRefToCallMap = sortRel.getInputRefToCallMap();
List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
StringBuilder order = new StringBuilder();
StringBuilder nullOrder = new StringBuilder();
for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) {
int sortColumnPos = sortInfo.getFieldIndex();
ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature().get(sortColumnPos));
ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
sortCols.add(sortColumn);
if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
order.append("-");
} else {
order.append("+");
}
if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) {
nullOrder.append("a");
} else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) {
nullOrder.append("z");
} else {
// Default
nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a");
}
if (obRefToCallMap != null) {
RexNode obExpr = obRefToCallMap.get(sortColumnPos);
sortColsPosBuilder.set(sortColumnPos);
if (obExpr == null) {
sortOutputColsPosBuilder.set(sortColumnPos);
}
}
}
// Use only 1 reducer for order by
int numReducers = 1;
// We keep the columns only the columns that are part of the final output
List<String> keepColumns = new ArrayList<String>();
final ImmutableBitSet sortColsPos = sortColsPosBuilder.build();
final ImmutableBitSet sortOutputColsPos = sortOutputColsPosBuilder.build();
final ArrayList<ColumnInfo> inputSchema = inputOp.getSchema().getSignature();
for (int pos = 0; pos < inputSchema.size(); pos++) {
if ((sortColsPos.get(pos) && sortOutputColsPos.get(pos)) || (!sortColsPos.get(pos) && !sortOutputColsPos.get(pos))) {
keepColumns.add(inputSchema.get(pos).getInternalName());
}
}
// 1.b. Generate reduce sink and project operator
resultOp = genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList<ExprNodeDesc>(), order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveConf, keepColumns);
}
// 2. If we need to generate limit
if (sortRel.fetch != null) {
int limit = RexLiteral.intValue(sortRel.fetch);
int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset);
LimitDesc limitDesc = new LimitDesc(offset, limit);
ArrayList<ColumnInfo> cinfoLst = createColInfos(resultOp);
resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
}
}
// 3. Return result
return inputOpAf.clone(resultOp);
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class HiveOpConverter method updateExprNode.
/*
* This method updates the input expr, changing all the
* ExprNodeColumnDesc in it to refer to columns given by the
* colExprMap.
*
* For instance, "col_0 = 1" would become "VALUE.col_0 = 1";
* the execution engine expects filters in the Join operators
* to be expressed that way.
*/
private static int updateExprNode(ExprNodeDesc expr, final Map<String, Byte> reversedExprs, final Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
int inputPos = -1;
if (expr instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) expr;
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc functionChild : func.getChildren()) {
if (functionChild instanceof ExprNodeColumnDesc) {
String colRef = functionChild.getExprString();
int pos = reversedExprs.get(colRef);
if (pos != -1) {
if (inputPos == -1) {
inputPos = pos;
} else if (inputPos != pos) {
throw new SemanticException("UpdateExprNode is expecting only one position for join operator convert. But there are more than one.");
}
}
newChildren.add(colExprMap.get(colRef));
} else {
int pos = updateExprNode(functionChild, reversedExprs, colExprMap);
if (pos != -1) {
if (inputPos == -1) {
inputPos = pos;
} else if (inputPos != pos) {
throw new SemanticException("UpdateExprNode is expecting only one position for join operator convert. But there are more than one.");
}
}
newChildren.add(functionChild);
}
}
func.setChildren(newChildren);
}
return inputPos;
}
Aggregations