use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class ColumnAccessAnalyzer method analyzeColumnAccess.
public ColumnAccessInfo analyzeColumnAccess(ColumnAccessInfo columnAccessInfo) throws SemanticException {
if (columnAccessInfo == null) {
columnAccessInfo = new ColumnAccessInfo();
}
Collection<TableScanOperator> topOps = pGraphContext.getTopOps().values();
for (TableScanOperator top : topOps) {
// if a table is inside view, we do not care about its authorization.
if (!top.isInsideView()) {
Table table = top.getConf().getTableMetadata();
String tableName = table.getCompleteName();
List<String> referenced = top.getReferencedColumns();
for (String column : referenced) {
columnAccessInfo.add(tableName, column);
}
if (table.isPartitioned()) {
PrunedPartitionList parts = pGraphContext.getPrunedPartitions(table.getTableName(), top);
if (parts.getReferredPartCols() != null) {
for (String partKey : parts.getReferredPartCols()) {
columnAccessInfo.add(tableName, partKey);
}
}
}
}
}
return columnAccessInfo;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceTableScanProcess.
/**
* This method replaces the original TableScanOperator with the new
* TableScanOperator and metadata that scans over the index table rather than
* scanning over the original table.
*
*/
private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
String alias = rewriteQueryCtx.getAlias();
// Need to remove the original TableScanOperators from these data structures
// and add new ones
HashMap<String, TableScanOperator> topOps = rewriteQueryCtx.getParseContext().getTopOps();
// remove original TableScanOperator
topOps.remove(alias);
String indexTableName = rewriteQueryCtx.getIndexName();
Table indexTableHandle = null;
try {
indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
} catch (HiveException e) {
LOG.error("Error while getting the table handle for index table.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
// construct a new descriptor for the index table scan
TableScanDesc indexTableScanDesc = new TableScanDesc(indexTableHandle);
indexTableScanDesc.setGatherStats(false);
String k = MetaStoreUtils.encodeTableName(indexTableName) + Path.SEPARATOR;
indexTableScanDesc.setStatsAggPrefix(k);
scanOperator.setConf(indexTableScanDesc);
// Construct the new RowResolver for the new TableScanOperator
ArrayList<ColumnInfo> sigRS = new ArrayList<ColumnInfo>();
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector();
StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey());
sigRS.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), indexTableName, false));
} catch (SerDeException e) {
LOG.error("Error while creating the RowResolver for new TableScanOperator.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
RowSchema rs = new RowSchema(sigRS);
// Set row resolver for new table
String newAlias = indexTableName;
int index = alias.lastIndexOf(":");
if (index >= 0) {
newAlias = alias.substring(0, index) + ":" + indexTableName;
}
// Scan operator now points to other table
scanOperator.getConf().setAlias(newAlias);
scanOperator.setAlias(indexTableName);
topOps.put(newAlias, scanOperator);
rewriteQueryCtx.getParseContext().setTopOps(topOps);
ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rs, Arrays.asList(new FieldNode(rewriteQueryCtx.getIndexKey())));
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class ExprProcFactory method findSourceColumn.
private static boolean findSourceColumn(LineageCtx lctx, Predicate cond, String tabAlias, String alias) {
for (Map.Entry<String, TableScanOperator> topOpMap : lctx.getParseCtx().getTopOps().entrySet()) {
TableScanOperator tableScanOp = topOpMap.getValue();
Table tbl = tableScanOp.getConf().getTableMetadata();
if (tbl.getTableName().equals(tabAlias) || tabAlias.equals(tableScanOp.getConf().getAlias())) {
for (FieldSchema column : tbl.getCols()) {
if (column.getName().equals(alias)) {
TableAliasInfo table = new TableAliasInfo();
table.setTable(tbl.getTTable());
table.setAlias(tabAlias);
BaseColumnInfo colInfo = new BaseColumnInfo();
colInfo.setColumn(column);
colInfo.setTabAlias(table);
cond.getBaseCols().add(colInfo);
return true;
}
}
}
}
return false;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class CommonJoinTaskDispatcher method mergeMapJoinTaskIntoItsChildMapRedTask.
/*
* A task and its child task has been converted from join to mapjoin.
* See if the two tasks can be merged.
*/
private void mergeMapJoinTaskIntoItsChildMapRedTask(MapRedTask mapJoinTask, Configuration conf) throws SemanticException {
// If so, check if we can merge mapJoinTask into that child.
if (mapJoinTask.getChildTasks() == null || mapJoinTask.getChildTasks().size() > 1) {
// child-tasks in which case we don't want to do anything.
return;
}
Task<? extends Serializable> childTask = mapJoinTask.getChildTasks().get(0);
if (!(childTask instanceof MapRedTask)) {
// Nothing to do if it is not a MapReduce task.
return;
}
MapRedTask childMapRedTask = (MapRedTask) childTask;
MapWork mapJoinMapWork = mapJoinTask.getWork().getMapWork();
MapWork childMapWork = childMapRedTask.getWork().getMapWork();
Map<String, Operator<? extends OperatorDesc>> mapJoinAliasToWork = mapJoinMapWork.getAliasToWork();
if (mapJoinAliasToWork.size() > 1) {
// Do not merge if the MapredWork of MapJoin has multiple input aliases.
return;
}
Entry<String, Operator<? extends OperatorDesc>> mapJoinAliasToWorkEntry = mapJoinAliasToWork.entrySet().iterator().next();
String mapJoinAlias = mapJoinAliasToWorkEntry.getKey();
TableScanOperator mapJoinTaskTableScanOperator = OperatorUtils.findSingleOperator(mapJoinAliasToWorkEntry.getValue(), TableScanOperator.class);
if (mapJoinTaskTableScanOperator == null) {
throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + " operator as the work associated with alias " + mapJoinAlias + ". Found a " + mapJoinAliasToWork.get(mapJoinAlias).getName() + " operator.");
}
FileSinkOperator mapJoinTaskFileSinkOperator = OperatorUtils.findSingleOperator(mapJoinTaskTableScanOperator, FileSinkOperator.class);
if (mapJoinTaskFileSinkOperator == null) {
throw new SemanticException("Cannot find the " + FileSinkOperator.getOperatorName() + " operator at the last operator of the MapJoin Task.");
}
// The mapJoinTaskFileSinkOperator writes to a different directory
Path childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName();
List<String> childMRAliases = childMapWork.getPathToAliases().get(childMRPath);
if (childMRAliases == null || childMRAliases.size() != 1) {
return;
}
String childMRAlias = childMRAliases.get(0);
// Sanity check to make sure there is no alias conflict after merge.
for (Entry<Path, ArrayList<String>> entry : childMapWork.getPathToAliases().entrySet()) {
Path path = entry.getKey();
List<String> aliases = entry.getValue();
if (path.equals(childMRPath)) {
continue;
}
if (aliases.contains(mapJoinAlias)) {
// alias confict should not happen here.
return;
}
}
MapredLocalWork mapJoinLocalWork = mapJoinMapWork.getMapRedLocalWork();
MapredLocalWork childLocalWork = childMapWork.getMapRedLocalWork();
if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) || (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) {
// We should relax this constraint with a follow-up jira.
return;
}
// is under the limit.
if (!isLocalTableTotalSizeUnderLimitAfterMerge(conf, mapJoinLocalWork, childLocalWork)) {
// Do not merge.
return;
}
TableScanOperator childMRTaskTableScanOperator = OperatorUtils.findSingleOperator(childMapWork.getAliasToWork().get(childMRAlias.toString()), TableScanOperator.class);
if (childMRTaskTableScanOperator == null) {
throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() + " operator as the work associated with alias " + childMRAlias + ". Found a " + childMapWork.getAliasToWork().get(childMRAlias).getName() + " operator.");
}
List<Operator<? extends OperatorDesc>> parentsInMapJoinTask = mapJoinTaskFileSinkOperator.getParentOperators();
List<Operator<? extends OperatorDesc>> childrenInChildMRTask = childMRTaskTableScanOperator.getChildOperators();
if (parentsInMapJoinTask.size() > 1 || childrenInChildMRTask.size() > 1) {
// Do not merge if we do not know how to connect two operator trees.
return;
}
// Step 2: Merge mapJoinTask into the Map-side of its child.
// Step 2.1: Connect the operator trees of two MapRedTasks.
Operator<? extends OperatorDesc> parentInMapJoinTask = parentsInMapJoinTask.get(0);
Operator<? extends OperatorDesc> childInChildMRTask = childrenInChildMRTask.get(0);
parentInMapJoinTask.replaceChild(mapJoinTaskFileSinkOperator, childInChildMRTask);
childInChildMRTask.replaceParent(childMRTaskTableScanOperator, parentInMapJoinTask);
// Step 2.2: Replace the corresponding part childMRWork's MapWork.
GenMapRedUtils.replaceMapWork(mapJoinAlias, childMRAlias.toString(), mapJoinMapWork, childMapWork);
// Step 2.3: Fill up stuff in local work
if (mapJoinLocalWork != null) {
if (childLocalWork == null) {
childMapWork.setMapRedLocalWork(mapJoinLocalWork);
} else {
childLocalWork.getAliasToFetchWork().putAll(mapJoinLocalWork.getAliasToFetchWork());
childLocalWork.getAliasToWork().putAll(mapJoinLocalWork.getAliasToWork());
}
}
// Step 2.4: Remove this MapJoin task
List<Task<? extends Serializable>> parentTasks = mapJoinTask.getParentTasks();
mapJoinTask.setParentTasks(null);
mapJoinTask.setChildTasks(null);
childMapRedTask.getParentTasks().remove(mapJoinTask);
if (parentTasks != null) {
childMapRedTask.getParentTasks().addAll(parentTasks);
for (Task<? extends Serializable> parentTask : parentTasks) {
parentTask.getChildTasks().remove(mapJoinTask);
if (!parentTask.getChildTasks().contains(childMapRedTask)) {
parentTask.getChildTasks().add(childMapRedTask);
}
}
} else {
if (physicalContext.getRootTasks().contains(mapJoinTask)) {
physicalContext.removeFromRootTask(mapJoinTask);
if (childMapRedTask.getParentTasks() != null && childMapRedTask.getParentTasks().size() == 0 && !physicalContext.getRootTasks().contains(childMapRedTask)) {
physicalContext.addToRootTask(childMapRedTask);
}
}
}
if (childMapRedTask.getParentTasks().size() == 0) {
childMapRedTask.setParentTasks(null);
}
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class RewriteGBUsingIndex method shouldApplyOptimization.
/**
* We traverse the current operator tree to check for conditions in which the
* optimization cannot be applied.
*
* At the end, we check if all conditions have passed for rewrite. If yes, we
* determine if the the index is usable for rewrite. Else, we log the condition which
* did not meet the rewrite criterion.
*
* @return
* @throws SemanticException
*/
boolean shouldApplyOptimization() throws SemanticException {
Map<Table, List<Index>> tableToIndex = getIndexesForRewrite();
if (tableToIndex.isEmpty()) {
LOG.debug("No Valid Index Found to apply Rewrite, " + "skipping " + getName() + " optimization");
return false;
}
/*
* This code iterates over each TableScanOperator from the topOps map from ParseContext.
* For each operator tree originating from this top TableScanOperator, we determine
* if the optimization can be applied. If yes, we add the name of the top table to
* the tsOpToProcess to apply rewrite later on.
* */
for (Map.Entry<String, TableScanOperator> entry : parseContext.getTopOps().entrySet()) {
String alias = entry.getKey();
TableScanOperator topOp = entry.getValue();
Table table = topOp.getConf().getTableMetadata();
List<Index> indexes = tableToIndex.get(table);
if (indexes.isEmpty()) {
continue;
}
if (table.isPartitioned()) {
//all partitions. If not, then we do not apply the optimization
if (!checkIfIndexBuiltOnAllTablePartitions(topOp, indexes)) {
LOG.debug("Index is not built for all table partitions, " + "skipping " + getName() + " optimization");
continue;
}
}
//check if rewrite can be applied for operator tree
//if there are no partitions on base table
checkIfRewriteCanBeApplied(alias, topOp, table, indexes);
}
return !tsOpToProcess.isEmpty();
}
Aggregations