use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class AbstractSMBJoinProc method canConvertJoinToBucketMapJoin.
// Can the join operator be converted to a bucket map-merge join operator ?
@SuppressWarnings("unchecked")
protected boolean canConvertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException {
// This has already been inspected and rejected
if (context.getRejectedJoinOps().contains(joinOp)) {
return false;
}
if (!this.pGraphContext.getJoinOps().contains(joinOp)) {
return false;
}
Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
try {
String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
bigTableMatcherClass = JavaUtils.loadClass(selector);
} catch (ClassNotFoundException e) {
throw new SemanticException(e.getMessage());
}
BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
JoinDesc joinDesc = joinOp.getConf();
JoinCondDesc[] joinCondns = joinDesc.getConds();
Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
if (joinCandidates.isEmpty()) {
// of any type. So return false.
return false;
}
int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates);
if (bigTablePosition < 0) {
// contains aliases from sub-query
return false;
}
context.setBigTablePosition(bigTablePosition);
String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1];
joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias);
Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
// get the join keys from parent ReduceSink operators
for (Operator<? extends OperatorDesc> parentOp : parentOps) {
ReduceSinkDesc rsconf = ((ReduceSinkOperator) parentOp).getConf();
Byte tag = (byte) rsconf.getTag();
List<ExprNodeDesc> keys = rsconf.getKeyCols();
keyExprMap.put(tag, keys);
}
context.setKeyExprMap(keyExprMap);
// Make a deep copy of the aliases so that they are not changed in the context
String[] joinSrcs = joinOp.getConf().getBaseSrc();
String[] srcs = new String[joinSrcs.length];
for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]);
srcs[srcPos] = new String(joinSrcs[srcPos]);
}
// table matcher.
return checkConvertBucketMapJoin(context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs));
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class AccumuloPredicateHandler method generateRanges.
/**
* Encapsulates the traversal over some {@link ExprNodeDesc} tree for the generation of Accumuluo
* Ranges using expressions involving the Accumulo rowid-mapped Hive column
*
* @param columnMapper
* Mapping of Hive to Accumulo columns for the query
* @param hiveRowIdColumnName
* Name of the hive column mapped to the Accumulo rowid
* @param root
* Root of some ExprNodeDesc tree to traverse, the WHERE clause
* @return An object representing the result from the ExprNodeDesc tree traversal using the
* AccumuloRangeGenerator
*/
protected Object generateRanges(ColumnMapper columnMapper, String hiveRowIdColumnName, ExprNodeDesc root) {
AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, columnMapper.getRowIdMapping(), hiveRowIdColumnName);
Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> roots = new ArrayList<Node>();
roots.add(root);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
try {
ogw.startWalking(roots, nodeOutput);
} catch (SemanticException ex) {
throw new RuntimeException(ex);
}
return nodeOutput.get(root);
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceTableScanProcess.
/**
* This method replaces the original TableScanOperator with the new
* TableScanOperator and metadata that scans over the index table rather than
* scanning over the original table.
*
*/
private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
String alias = rewriteQueryCtx.getAlias();
// Need to remove the original TableScanOperators from these data structures
// and add new ones
HashMap<String, TableScanOperator> topOps = rewriteQueryCtx.getParseContext().getTopOps();
// remove original TableScanOperator
topOps.remove(alias);
String indexTableName = rewriteQueryCtx.getIndexName();
Table indexTableHandle = null;
try {
indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
} catch (HiveException e) {
LOG.error("Error while getting the table handle for index table.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
// construct a new descriptor for the index table scan
TableScanDesc indexTableScanDesc = new TableScanDesc(indexTableHandle);
indexTableScanDesc.setGatherStats(false);
String k = MetaStoreUtils.encodeTableName(indexTableName) + Path.SEPARATOR;
indexTableScanDesc.setStatsAggPrefix(k);
scanOperator.setConf(indexTableScanDesc);
// Construct the new RowResolver for the new TableScanOperator
ArrayList<ColumnInfo> sigRS = new ArrayList<ColumnInfo>();
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector();
StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey());
sigRS.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), indexTableName, false));
} catch (SerDeException e) {
LOG.error("Error while creating the RowResolver for new TableScanOperator.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
RowSchema rs = new RowSchema(sigRS);
// Set row resolver for new table
String newAlias = indexTableName;
int index = alias.lastIndexOf(":");
if (index >= 0) {
newAlias = alias.substring(0, index) + ":" + indexTableName;
}
// Scan operator now points to other table
scanOperator.getConf().setAlias(newAlias);
scanOperator.setAlias(indexTableName);
topOps.put(newAlias, scanOperator);
rewriteQueryCtx.getParseContext().setTopOps(topOps);
ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rs, Arrays.asList(new FieldNode(rewriteQueryCtx.getIndexKey())));
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class CommonJoinTaskDispatcher method mergeMapJoinTaskIntoItsChildMapRedTask.
/*
* A task and its child task has been converted from join to mapjoin.
* See if the two tasks can be merged.
*/
private void mergeMapJoinTaskIntoItsChildMapRedTask(MapRedTask mapJoinTask, Configuration conf) throws SemanticException {
// If so, check if we can merge mapJoinTask into that child.
if (mapJoinTask.getChildTasks() == null || mapJoinTask.getChildTasks().size() > 1) {
// child-tasks in which case we don't want to do anything.
return;
}
Task<? extends Serializable> childTask = mapJoinTask.getChildTasks().get(0);
if (!(childTask instanceof MapRedTask)) {
// Nothing to do if it is not a MapReduce task.
return;
}
MapRedTask childMapRedTask = (MapRedTask) childTask;
MapWork mapJoinMapWork = mapJoinTask.getWork().getMapWork();
MapWork childMapWork = childMapRedTask.getWork().getMapWork();
Map<String, Operator<? extends OperatorDesc>> mapJoinAliasToWork = mapJoinMapWork.getAliasToWork();
if (mapJoinAliasToWork.size() > 1) {
// Do not merge if the MapredWork of MapJoin has multiple input aliases.
return;
}
Entry<String, Operator<? extends OperatorDesc>> mapJoinAliasToWorkEntry = mapJoinAliasToWork.entrySet().iterator().next();
String mapJoinAlias = mapJoinAliasToWorkEntry.getKey();
TableScanOperator mapJoinTaskTableScanOperator = OperatorUtils.findSingleOperator(mapJoinAliasToWorkEntry.getValue(), TableScanOperator.class);
if (mapJoinTaskTableScanOperator == null) {
throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + " operator as the work associated with alias " + mapJoinAlias + ". Found a " + mapJoinAliasToWork.get(mapJoinAlias).getName() + " operator.");
}
FileSinkOperator mapJoinTaskFileSinkOperator = OperatorUtils.findSingleOperator(mapJoinTaskTableScanOperator, FileSinkOperator.class);
if (mapJoinTaskFileSinkOperator == null) {
throw new SemanticException("Cannot find the " + FileSinkOperator.getOperatorName() + " operator at the last operator of the MapJoin Task.");
}
// The mapJoinTaskFileSinkOperator writes to a different directory
Path childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName();
List<String> childMRAliases = childMapWork.getPathToAliases().get(childMRPath);
if (childMRAliases == null || childMRAliases.size() != 1) {
return;
}
String childMRAlias = childMRAliases.get(0);
// Sanity check to make sure there is no alias conflict after merge.
for (Entry<Path, ArrayList<String>> entry : childMapWork.getPathToAliases().entrySet()) {
Path path = entry.getKey();
List<String> aliases = entry.getValue();
if (path.equals(childMRPath)) {
continue;
}
if (aliases.contains(mapJoinAlias)) {
// alias confict should not happen here.
return;
}
}
MapredLocalWork mapJoinLocalWork = mapJoinMapWork.getMapRedLocalWork();
MapredLocalWork childLocalWork = childMapWork.getMapRedLocalWork();
if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) || (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) {
// We should relax this constraint with a follow-up jira.
return;
}
// is under the limit.
if (!isLocalTableTotalSizeUnderLimitAfterMerge(conf, mapJoinLocalWork, childLocalWork)) {
// Do not merge.
return;
}
TableScanOperator childMRTaskTableScanOperator = OperatorUtils.findSingleOperator(childMapWork.getAliasToWork().get(childMRAlias.toString()), TableScanOperator.class);
if (childMRTaskTableScanOperator == null) {
throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + " operator as the work associated with alias " + childMRAlias + ". Found a " + childMapWork.getAliasToWork().get(childMRAlias).getName() + " operator.");
}
List<Operator<? extends OperatorDesc>> parentsInMapJoinTask = mapJoinTaskFileSinkOperator.getParentOperators();
List<Operator<? extends OperatorDesc>> childrenInChildMRTask = childMRTaskTableScanOperator.getChildOperators();
if (parentsInMapJoinTask.size() > 1 || childrenInChildMRTask.size() > 1) {
// Do not merge if we do not know how to connect two operator trees.
return;
}
// Step 2: Merge mapJoinTask into the Map-side of its child.
// Step 2.1: Connect the operator trees of two MapRedTasks.
Operator<? extends OperatorDesc> parentInMapJoinTask = parentsInMapJoinTask.get(0);
Operator<? extends OperatorDesc> childInChildMRTask = childrenInChildMRTask.get(0);
parentInMapJoinTask.replaceChild(mapJoinTaskFileSinkOperator, childInChildMRTask);
childInChildMRTask.replaceParent(childMRTaskTableScanOperator, parentInMapJoinTask);
// Step 2.2: Replace the corresponding part childMRWork's MapWork.
GenMapRedUtils.replaceMapWork(mapJoinAlias, childMRAlias.toString(), mapJoinMapWork, childMapWork);
// Step 2.3: Fill up stuff in local work
if (mapJoinLocalWork != null) {
if (childLocalWork == null) {
childMapWork.setMapRedLocalWork(mapJoinLocalWork);
} else {
childLocalWork.getAliasToFetchWork().putAll(mapJoinLocalWork.getAliasToFetchWork());
childLocalWork.getAliasToWork().putAll(mapJoinLocalWork.getAliasToWork());
}
}
// Step 2.4: Remove this MapJoin task
List<Task<? extends Serializable>> parentTasks = mapJoinTask.getParentTasks();
mapJoinTask.setParentTasks(null);
mapJoinTask.setChildTasks(null);
childMapRedTask.getParentTasks().remove(mapJoinTask);
if (parentTasks != null) {
childMapRedTask.getParentTasks().addAll(parentTasks);
for (Task<? extends Serializable> parentTask : parentTasks) {
parentTask.getChildTasks().remove(mapJoinTask);
if (!parentTask.getChildTasks().contains(childMapRedTask)) {
parentTask.getChildTasks().add(childMapRedTask);
}
}
} else {
if (physicalContext.getRootTasks().contains(mapJoinTask)) {
physicalContext.removeFromRootTask(mapJoinTask);
if (childMapRedTask.getParentTasks() != null && childMapRedTask.getParentTasks().size() == 0 && !physicalContext.getRootTasks().contains(childMapRedTask)) {
physicalContext.addToRootTask(childMapRedTask);
}
}
}
if (childMapRedTask.getParentTasks().size() == 0) {
childMapRedTask.setParentTasks(null);
}
}
use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.
the class PcrExprProcFactory method evalExprWithPart.
static Object evalExprWithPart(ExprNodeDesc expr, Partition p, List<VirtualColumn> vcs) throws SemanticException {
StructObjectInspector rowObjectInspector;
Table tbl = p.getTable();
try {
rowObjectInspector = (StructObjectInspector) tbl.getDeserializer().getObjectInspector();
} catch (SerDeException e) {
throw new SemanticException(e);
}
try {
return PartExprEvalUtils.evalExprWithPart(expr, p, vcs, rowObjectInspector);
} catch (HiveException e) {
throw new SemanticException(e);
}
}
Aggregations