Search in sources :

Example 1 with EmbeddedAmbroseHiveProgressReporter

use of com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter in project ambrose by twitter.

the class AmbroseHiveFinishHook method postDriverRun.

@Override
public void postDriverRun(HiveDriverRunHookContext hookContext) {
    Configuration conf = hookContext.getConf();
    EmbeddedAmbroseHiveProgressReporter reporter = getEmbeddedProgressReporter();
    String workflowVersion = reporter.getWorkflowVersion();
    String queryId = AmbroseHiveUtil.getHiveQueryId(conf);
    if (workflowVersion == null) {
        LOG.warn("ScriptFingerprint not set for this script - not saving stats.");
    } else {
        Workflow workflow = new Workflow(queryId, workflowVersion, reporter.getJobs());
        outputStatsData(workflow);
        reporter.flushJsonToDisk();
    }
    displayStatistics();
    if (!isLastCommandProcessed(hookContext)) {
        return;
    }
    reporter.restoreEventStack();
    String sleepTime = System.getProperty(POST_SCRIPT_SLEEP_SECS_PARAM, "10");
    try {
        int sleepTimeSeconds = Integer.parseInt(sleepTime);
        LOG.info("Script complete but sleeping for " + sleepTimeSeconds + " seconds to keep the HiveStats REST server running. Hit ctrl-c to exit.");
        Thread.sleep(sleepTimeSeconds * 1000L);
        reporter.stopServer();
    } catch (NumberFormatException e) {
        LOG.warn(POST_SCRIPT_SLEEP_SECS_PARAM + " param is not a valid number, not sleeping: " + sleepTime);
    } catch (InterruptedException e) {
        LOG.warn("Sleep interrupted", e);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Workflow(com.twitter.ambrose.model.Workflow) EmbeddedAmbroseHiveProgressReporter(com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter)

Example 2 with EmbeddedAmbroseHiveProgressReporter

use of com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter in project ambrose by twitter.

the class AmbroseHiveFinishHook method displayStatistics.

private void displayStatistics() {
    EmbeddedAmbroseHiveProgressReporter reporter = getEmbeddedProgressReporter();
    Map<String, String> jobIdToNodeId = reporter.getJobIdToNodeId();
    LOG.info("MapReduce Jobs Launched: ");
    List<MapRedStats> lastMapRedStats = SessionState.get().getLastMapRedStatsList();
    for (int i = 0; i < lastMapRedStats.size(); i++) {
        MapRedStats mrStats = lastMapRedStats.get(i);
        String jobId = mrStats.getJobId();
        String nodeId = jobIdToNodeId.get(jobId);
        StringBuilder sb = new StringBuilder();
        sb.append("Job ").append(i).append(" (").append(jobId).append(", ").append(nodeId).append("): ").append(mrStats);
        LOG.info(sb.toString());
    }
}
Also used : MapRedStats(org.apache.hadoop.hive.ql.MapRedStats) EmbeddedAmbroseHiveProgressReporter(com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter)

Example 3 with EmbeddedAmbroseHiveProgressReporter

use of com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter in project ambrose by twitter.

the class AmbroseHiveFailHook method run.

@Override
public void run(HookContext hookContext) throws Exception {
    HiveConf conf = hookContext.getConf();
    Properties allConfProps = conf.getAllProperties();
    String queryId = AmbroseHiveUtil.getHiveQueryId(conf);
    EmbeddedAmbroseHiveProgressReporter reporter = getEmbeddedProgressReporter();
    List<TaskRunner> completeTaskList = hookContext.getCompleteTaskList();
    Field _taskResultField = accessTaskResultField();
    for (TaskRunner taskRunner : completeTaskList) {
        TaskResult taskResult = (TaskResult) _taskResultField.get(taskRunner);
        // get non-running, failed jobs
        if (!taskResult.isRunning() && taskResult.getExitVal() != 0) {
            Task<? extends Serializable> task = taskRunner.getTask();
            String nodeId = AmbroseHiveUtil.getNodeIdFromNodeName(conf, task.getId());
            DAGNode<Job> dagNode = reporter.getDAGNodeFromNodeId(nodeId);
            HiveJob job = (HiveJob) dagNode.getJob();
            job.setConfiguration(allConfProps);
            MapReduceJobState mrJobState = getJobState(job);
            mrJobState.setSuccessful(false);
            reporter.addJob((Job) job);
            reporter.pushEvent(queryId, new Event.JobFailedEvent(dagNode));
        }
    }
    reporter.restoreEventStack();
    String sleepTime = System.getProperty(POST_SCRIPT_SLEEP_SECS_PARAM, "10");
    try {
        int sleepTimeSeconds = Integer.parseInt(sleepTime);
        LOG.info("Script failed but sleeping for " + sleepTimeSeconds + " seconds to keep the HiveStats REST server running. Hit ctrl-c to exit.");
        Thread.sleep(sleepTimeSeconds * 1000L);
        reporter.stopServer();
    } catch (NumberFormatException e) {
        LOG.warn(POST_SCRIPT_SLEEP_SECS_PARAM + " param is not a valid number, not sleeping: " + sleepTime);
    } catch (InterruptedException e) {
        LOG.warn("Sleep interrupted", e);
    }
}
Also used : MapReduceJobState(com.twitter.ambrose.model.hadoop.MapReduceJobState) Properties(java.util.Properties) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner) Field(java.lang.reflect.Field) TaskResult(org.apache.hadoop.hive.ql.exec.TaskResult) Event(com.twitter.ambrose.model.Event) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Job(com.twitter.ambrose.model.Job) EmbeddedAmbroseHiveProgressReporter(com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter)

Example 4 with EmbeddedAmbroseHiveProgressReporter

use of com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter in project ambrose by twitter.

the class AmbroseHivePreHook method run.

@Override
public void run(HookContext hookContext) throws Exception {
    String queryId = AmbroseHiveUtil.getHiveQueryId(hookContext.getConf());
    EmbeddedAmbroseHiveProgressReporter reporter = getEmbeddedProgressReporter();
    HiveDAGTransformer transformer = new HiveDAGTransformer(hookContext);
    // conditional tasks may be filtered out by Hive at runtime. We them as
    // 'complete'
    Map<String, DAGNode<Job>> nodeIdToDAGNode = reporter.getNodeIdToDAGNode();
    sendFilteredJobsStatus(queryId, reporter, nodeIdToDAGNode);
    if (transformer.getTotalMRJobs() == 0) {
        return;
    }
    waitBetween(hookContext, reporter, queryId);
    nodeIdToDAGNode = transformer.getNodeIdToDAGNode();
    reporter.setNodeIdToDAGNode(nodeIdToDAGNode);
    reporter.setTotalMRJobs(transformer.getTotalMRJobs());
    reporter.sendDagNodeNameMap(queryId, nodeIdToDAGNode);
}
Also used : DAGNode(com.twitter.ambrose.model.DAGNode) EmbeddedAmbroseHiveProgressReporter(com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter)

Example 5 with EmbeddedAmbroseHiveProgressReporter

use of com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter in project ambrose by twitter.

the class AmbroseHiveStatPublisher method send.

private void send(String jobIDStr, Map<String, Double> counterValues) {
    EmbeddedAmbroseHiveProgressReporter reporter = getEmbeddedProgressReporter();
    Configuration conf = SessionState.get().getConf();
    String queryId = AmbroseHiveUtil.getHiveQueryId(conf);
    Map<String, DAGNode<Job>> nodeIdToDAGNode = reporter.getNodeIdToDAGNode();
    DAGNode<Job> dagNode = nodeIdToDAGNode.get(nodeId);
    if (dagNode == null) {
        LOG.warn("jobStartedNotification - unrecorgnized operator name found for " + "jobId " + jobIDStr);
        return;
    }
    HiveJob job = (HiveJob) dagNode.getJob();
    // a job has been started
    if (job.getId() == null) {
        // job identifier on GUI
        job.setId(AmbroseHiveUtil.asDisplayId(queryId, jobIDStr, nodeId));
        reporter.addJobIdToNodeId(jobIDStr, nodeId);
        reporter.pushEvent(queryId, new Event.JobStartedEvent(dagNode));
    }
    try {
        boolean isUpdated = updateJobState();
        if (isUpdated && !reporter.getCompletedJobIds().contains(jobIDStr)) {
            Event<DAGNode<? extends Job>> event = null;
            job.setMapReduceJobState(jobProgress);
            if (jobProgress.isComplete()) {
                event = new Event.JobFinishedEvent(dagNode);
                // update reduce progress to 1 if we have no reducers
                int reducers = jobProgress.getTotalReducers();
                if (reducers == 0) {
                    jobProgress.setReduceProgress(1.0f);
                }
                // update job state
                job.setConfiguration(((HiveConf) conf).getAllProperties());
                job.setCounterGroupMap(AmbroseHiveUtil.counterGroupInfoMap(counterValues));
                reporter.addCompletedJobIds(jobIDStr);
                reporter.addJob(job);
            } else {
                event = new Event.JobProgressEvent(dagNode);
            }
            reporter.addJobIdToProgress(jobIDStr, getJobProgress());
            pushWorkflowProgress(queryId, reporter);
            reporter.pushEvent(queryId, event);
        }
    } catch (IOException e) {
        LOG.error("Error getting job info!", e);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) DAGNode(com.twitter.ambrose.model.DAGNode) Event(com.twitter.ambrose.model.Event) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(com.twitter.ambrose.model.Job) EmbeddedAmbroseHiveProgressReporter(com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter)

Aggregations

EmbeddedAmbroseHiveProgressReporter (com.twitter.ambrose.hive.reporter.EmbeddedAmbroseHiveProgressReporter)5 DAGNode (com.twitter.ambrose.model.DAGNode)2 Event (com.twitter.ambrose.model.Event)2 Job (com.twitter.ambrose.model.Job)2 Configuration (org.apache.hadoop.conf.Configuration)2 Workflow (com.twitter.ambrose.model.Workflow)1 MapReduceJobState (com.twitter.ambrose.model.hadoop.MapReduceJobState)1 IOException (java.io.IOException)1 Field (java.lang.reflect.Field)1 Properties (java.util.Properties)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 MapRedStats (org.apache.hadoop.hive.ql.MapRedStats)1 TaskResult (org.apache.hadoop.hive.ql.exec.TaskResult)1 TaskRunner (org.apache.hadoop.hive.ql.exec.TaskRunner)1 RunningJob (org.apache.hadoop.mapred.RunningJob)1