Search in sources :

Example 41 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class TezCompiler method generateTaskTree.

@Override
protected void generateTaskTree(List<Task<?>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenTezUtils utils = new GenTezUtils();
    GenTezWork genTezWork = new GenTezWork(utils);
    GenTezProcContext procCtx = new GenTezProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genTezWork);
    opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());
    opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());
    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork));
    opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genTezWork);
    opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(utils));
    opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());
    opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    SemanticGraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);
    // we need to specify the reserved memory for each work that contains Map Join
    for (List<BaseWork> baseWorkList : procCtx.mapJoinWorkMap.values()) {
        for (BaseWork w : baseWorkList) {
            // work should be the smallest unit for memory allocation
            w.setReservedMemoryMB((int) (conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
        }
    }
    // we need to clone some operator plans and remove union operators still
    int indexForTezUnion = 0;
    for (BaseWork w : procCtx.workWithUnionOperators) {
        GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
    }
    // then we make sure the file sink operators are set up right
    for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
        GenTezUtils.processFileSink(procCtx, fileSink);
    }
    // Connect any edges required for min/max pushdown
    if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
        for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
            // Process min/max
            GenTezUtils.processDynamicSemiJoinPushDownOperator(procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
        }
    }
    // and finally we hook up any events that need to be sent to the tez AM
    LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
    for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
        LOG.debug("Handling AppMasterEventOperator: " + event);
        GenTezUtils.processAppMasterEvent(procCtx, event);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) ReduceSinkMapJoinProc(org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc) CompositeProcessor(org.apache.hadoop.hive.ql.lib.CompositeProcessor) MergeJoinProc(org.apache.hadoop.hive.ql.optimizer.MergeJoinProc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 42 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class TezCompiler method optimizeTaskPlan.

@Override
protected void optimizeTaskPlan(List<Task<?>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
        physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping null scan query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
        physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping metadata only query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
        physicalCtx = new CrossProductHandler().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping cross product analysis");
    }
    if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
        physicalCtx = new LlapPreVectorizationPass().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping llap pre-vectorization pass");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
        physicalCtx = new Vectorizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping vectorization");
    }
    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
        physicalCtx = new StageIDsRearranger().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping stage id rearranger");
    }
    if ((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER)) && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) {
        physicalCtx = new MemoryDecider().resolve(physicalCtx);
    }
    if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
        LlapClusterStateForCompile llapInfo = LlapClusterStateForCompile.getClusterInfo(conf);
        physicalCtx = new LlapDecider(llapInfo).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping llap decider");
    }
    // This optimizer will serialize all filters that made it to the
    // table scan operator to avoid having to do it multiple times on
    // the backend. If you have a physical optimization that changes
    // table scans or filters, you have to invoke it before this one.
    physicalCtx = new SerializeFilter().resolve(physicalCtx);
    if (physicalCtx.getContext().getExplainAnalyze() != null) {
        new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan");
    return;
}
Also used : LlapDecider(org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider) LlapClusterStateForCompile(org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) AnnotateRunTimeStatsOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer) MemoryDecider(org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider) MetadataOnlyOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) LlapPreVectorizationPass(org.apache.hadoop.hive.ql.optimizer.physical.LlapPreVectorizationPass) NullScanOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer) Vectorizer(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer) SerializeFilter(org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter) CrossProductHandler(org.apache.hadoop.hive.ql.optimizer.physical.CrossProductHandler) StageIDsRearranger(org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)

Example 43 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class PartitionPruner method getPartitionsFromServer.

private static PrunedPartitionList getPartitionsFromServer(Table tab, final String key, final ExprNodeDesc compactExpr, HiveConf conf, String alias, Set<String> partColsUsedInFilter, boolean isPruningByExactFilter) throws SemanticException {
    try {
        // Finally, check the filter for non-built-in UDFs. If these are present, we cannot
        // do filtering on the server, and have to fall back to client path.
        boolean doEvalClientSide = hasUserFunctions(compactExpr);
        // Now filter.
        List<Partition> partitions = new ArrayList<Partition>();
        boolean hasUnknownPartitions = false;
        PerfLogger perfLogger = SessionState.getPerfLogger();
        if (!doEvalClientSide) {
            perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
            try {
                hasUnknownPartitions = Hive.get().getPartitionsByExpr(tab, compactExpr, conf, partitions);
            } catch (IMetaStoreClient.IncompatibleMetastoreException ime) {
                // TODO: backward compat for Hive <= 0.12. Can be removed later.
                LOG.warn("Metastore doesn't support getPartitionsByExpr", ime);
                doEvalClientSide = true;
            } finally {
                perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
            }
        }
        if (doEvalClientSide) {
            // Either we have user functions, or metastore is old version - filter names locally.
            hasUnknownPartitions = pruneBySequentialScan(tab, partitions, compactExpr, conf);
        }
        // metastore and so some partitions may have no data based on other filters.
        return new PrunedPartitionList(tab, key, new LinkedHashSet<Partition>(partitions), new ArrayList<String>(partColsUsedInFilter), hasUnknownPartitions || !isPruningByExactFilter);
    } catch (SemanticException e) {
        throw e;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 44 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class PartitionPruner method pruneBySequentialScan.

/**
 * Pruning partition by getting the partition names first and pruning using Hive expression
 * evaluator on client.
 * @param tab the table containing the partitions.
 * @param partitions the resulting partitions.
 * @param prunerExpr the SQL predicate that involves partition columns.
 * @param conf Hive Configuration object, can not be NULL.
 * @return true iff the partition pruning expression contains non-partition columns.
 */
private static boolean pruneBySequentialScan(Table tab, List<Partition> partitions, ExprNodeDesc prunerExpr, HiveConf conf) throws HiveException, MetaException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PRUNE_LISTING);
    List<String> partNames = Hive.get().getPartitionNames(tab.getDbName(), tab.getTableName(), (short) -1);
    String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
    List<String> partCols = extractPartColNames(tab);
    List<PrimitiveTypeInfo> partColTypeInfos = extractPartColTypes(tab);
    boolean hasUnknownPartitions = prunePartitionNames(partCols, partColTypeInfos, prunerExpr, defaultPartitionName, partNames);
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING);
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    if (!partNames.isEmpty()) {
        partitions.addAll(Hive.get().getPartitionsByNames(tab, partNames));
    }
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    return hasUnknownPartitions;
}
Also used : PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 45 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class PartitionPruner method getAllPartitions.

private static Set<Partition> getAllPartitions(Table tab) throws HiveException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    Set<Partition> result = Hive.get().getAllPartitionsOf(tab);
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
    return result;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger)

Aggregations

PerfLogger (org.apache.hadoop.hive.ql.log.PerfLogger)60 ArrayList (java.util.ArrayList)22 IOException (java.io.IOException)21 LockException (org.apache.hadoop.hive.ql.lockmgr.LockException)16 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)13 ExecutionException (java.util.concurrent.ExecutionException)11 Path (org.apache.hadoop.fs.Path)11 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)11 TException (org.apache.thrift.TException)11 HiveMetaException (org.apache.hadoop.hive.metastore.HiveMetaException)10 AlreadyExistsException (org.apache.hadoop.hive.metastore.api.AlreadyExistsException)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 FileNotFoundException (java.io.FileNotFoundException)8 UnknownHostException (java.net.UnknownHostException)8 LinkedList (java.util.LinkedList)8 JDODataStoreException (javax.jdo.JDODataStoreException)8 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)8 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)8 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)8