use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class TezCompiler method generateTaskTree.
@Override
protected void generateTaskTree(List<Task<?>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
GenTezUtils utils = new GenTezUtils();
GenTezWork genTezWork = new GenTezWork(utils);
GenTezProcContext procCtx = new GenTezProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
// The dispatcher generates the plan from the operator tree
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genTezWork);
opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());
opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());
opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork));
opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genTezWork);
opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(utils));
opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());
opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
SemanticGraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
// we need to specify the reserved memory for each work that contains Map Join
for (List<BaseWork> baseWorkList : procCtx.mapJoinWorkMap.values()) {
for (BaseWork w : baseWorkList) {
// work should be the smallest unit for memory allocation
w.setReservedMemoryMB((int) (conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
}
}
// we need to clone some operator plans and remove union operators still
int indexForTezUnion = 0;
for (BaseWork w : procCtx.workWithUnionOperators) {
GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
}
// then we make sure the file sink operators are set up right
for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
GenTezUtils.processFileSink(procCtx, fileSink);
}
// Connect any edges required for min/max pushdown
if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
// Process min/max
GenTezUtils.processDynamicSemiJoinPushDownOperator(procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
}
}
// and finally we hook up any events that need to be sent to the tez AM
LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
LOG.debug("Handling AppMasterEventOperator: " + event);
GenTezUtils.processAppMasterEvent(procCtx, event);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class TezCompiler method optimizeTaskPlan.
@Override
protected void optimizeTaskPlan(List<Task<?>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping null scan query optimization");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping metadata only query optimization");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
physicalCtx = new CrossProductHandler().resolve(physicalCtx);
} else {
LOG.debug("Skipping cross product analysis");
}
if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
physicalCtx = new LlapPreVectorizationPass().resolve(physicalCtx);
} else {
LOG.debug("Skipping llap pre-vectorization pass");
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
physicalCtx = new Vectorizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping vectorization");
}
if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
physicalCtx = new StageIDsRearranger().resolve(physicalCtx);
} else {
LOG.debug("Skipping stage id rearranger");
}
if ((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER)) && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) {
physicalCtx = new MemoryDecider().resolve(physicalCtx);
}
if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
LlapClusterStateForCompile llapInfo = LlapClusterStateForCompile.getClusterInfo(conf);
physicalCtx = new LlapDecider(llapInfo).resolve(physicalCtx);
} else {
LOG.debug("Skipping llap decider");
}
// This optimizer will serialize all filters that made it to the
// table scan operator to avoid having to do it multiple times on
// the backend. If you have a physical optimization that changes
// table scans or filters, you have to invoke it before this one.
physicalCtx = new SerializeFilter().resolve(physicalCtx);
if (physicalCtx.getContext().getExplainAnalyze() != null) {
new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan");
return;
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class PartitionPruner method getPartitionsFromServer.
private static PrunedPartitionList getPartitionsFromServer(Table tab, final String key, final ExprNodeDesc compactExpr, HiveConf conf, String alias, Set<String> partColsUsedInFilter, boolean isPruningByExactFilter) throws SemanticException {
try {
// Finally, check the filter for non-built-in UDFs. If these are present, we cannot
// do filtering on the server, and have to fall back to client path.
boolean doEvalClientSide = hasUserFunctions(compactExpr);
// Now filter.
List<Partition> partitions = new ArrayList<Partition>();
boolean hasUnknownPartitions = false;
PerfLogger perfLogger = SessionState.getPerfLogger();
if (!doEvalClientSide) {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
try {
hasUnknownPartitions = Hive.get().getPartitionsByExpr(tab, compactExpr, conf, partitions);
} catch (IMetaStoreClient.IncompatibleMetastoreException ime) {
// TODO: backward compat for Hive <= 0.12. Can be removed later.
LOG.warn("Metastore doesn't support getPartitionsByExpr", ime);
doEvalClientSide = true;
} finally {
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
}
}
if (doEvalClientSide) {
// Either we have user functions, or metastore is old version - filter names locally.
hasUnknownPartitions = pruneBySequentialScan(tab, partitions, compactExpr, conf);
}
// metastore and so some partitions may have no data based on other filters.
return new PrunedPartitionList(tab, key, new LinkedHashSet<Partition>(partitions), new ArrayList<String>(partColsUsedInFilter), hasUnknownPartitions || !isPruningByExactFilter);
} catch (SemanticException e) {
throw e;
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class PartitionPruner method pruneBySequentialScan.
/**
* Pruning partition by getting the partition names first and pruning using Hive expression
* evaluator on client.
* @param tab the table containing the partitions.
* @param partitions the resulting partitions.
* @param prunerExpr the SQL predicate that involves partition columns.
* @param conf Hive Configuration object, can not be NULL.
* @return true iff the partition pruning expression contains non-partition columns.
*/
private static boolean pruneBySequentialScan(Table tab, List<Partition> partitions, ExprNodeDesc prunerExpr, HiveConf conf) throws HiveException, MetaException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PRUNE_LISTING);
List<String> partNames = Hive.get().getPartitionNames(tab.getDbName(), tab.getTableName(), (short) -1);
String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
List<String> partCols = extractPartColNames(tab);
List<PrimitiveTypeInfo> partColTypeInfos = extractPartColTypes(tab);
boolean hasUnknownPartitions = prunePartitionNames(partCols, partColTypeInfos, prunerExpr, defaultPartitionName, partNames);
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING);
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
if (!partNames.isEmpty()) {
partitions.addAll(Hive.get().getPartitionsByNames(tab, partNames));
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
return hasUnknownPartitions;
}
use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.
the class PartitionPruner method getAllPartitions.
private static Set<Partition> getAllPartitions(Table tab) throws HiveException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
Set<Partition> result = Hive.get().getAllPartitionsOf(tab);
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
return result;
}
Aggregations