Search in sources :

Example 1 with JobTraceReader

use of org.apache.hadoop.tools.rumen.JobTraceReader in project hadoop by apache.

the class SLSRunner method startAMFromRumenTraces.

/**
   * parse workload information from rumen trace files
   */
@SuppressWarnings("unchecked")
private void startAMFromRumenTraces(Resource containerResource, int heartbeatInterval) throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "file:///");
    long baselineTimeMS = 0;
    for (String inputTrace : inputTraces) {
        File fin = new File(inputTrace);
        JobTraceReader reader = new JobTraceReader(new Path(fin.getAbsolutePath()), conf);
        try {
            LoggedJob job = null;
            while ((job = reader.getNext()) != null) {
                // only support MapReduce currently
                String jobType = "mapreduce";
                String user = job.getUser() == null ? "default" : job.getUser().getValue();
                String jobQueue = job.getQueue().getValue();
                String oldJobId = job.getJobID().toString();
                long jobStartTimeMS = job.getSubmitTime();
                long jobFinishTimeMS = job.getFinishTime();
                if (baselineTimeMS == 0) {
                    baselineTimeMS = jobStartTimeMS;
                }
                jobStartTimeMS -= baselineTimeMS;
                jobFinishTimeMS -= baselineTimeMS;
                if (jobStartTimeMS < 0) {
                    LOG.warn("Warning: reset job " + oldJobId + " start time to 0.");
                    jobFinishTimeMS = jobFinishTimeMS - jobStartTimeMS;
                    jobStartTimeMS = 0;
                }
                boolean isTracked = trackedApps.contains(oldJobId);
                int queueSize = queueAppNumMap.containsKey(jobQueue) ? queueAppNumMap.get(jobQueue) : 0;
                queueSize++;
                queueAppNumMap.put(jobQueue, queueSize);
                List<ContainerSimulator> containerList = new ArrayList<ContainerSimulator>();
                // map tasks
                for (LoggedTask mapTask : job.getMapTasks()) {
                    if (mapTask.getAttempts().size() == 0) {
                        continue;
                    }
                    LoggedTaskAttempt taskAttempt = mapTask.getAttempts().get(mapTask.getAttempts().size() - 1);
                    String hostname = taskAttempt.getHostName().getValue();
                    long containerLifeTime = taskAttempt.getFinishTime() - taskAttempt.getStartTime();
                    containerList.add(new ContainerSimulator(containerResource, containerLifeTime, hostname, 10, "map"));
                }
                // reduce tasks
                for (LoggedTask reduceTask : job.getReduceTasks()) {
                    if (reduceTask.getAttempts().size() == 0) {
                        continue;
                    }
                    LoggedTaskAttempt taskAttempt = reduceTask.getAttempts().get(reduceTask.getAttempts().size() - 1);
                    String hostname = taskAttempt.getHostName().getValue();
                    long containerLifeTime = taskAttempt.getFinishTime() - taskAttempt.getStartTime();
                    containerList.add(new ContainerSimulator(containerResource, containerLifeTime, hostname, 20, "reduce"));
                }
                // create a new AM
                AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(amClassMap.get(jobType), conf);
                if (amSim != null) {
                    amSim.init(AM_ID++, heartbeatInterval, containerList, rm, this, jobStartTimeMS, jobFinishTimeMS, user, jobQueue, isTracked, oldJobId);
                    runner.schedule(amSim);
                    maxRuntime = Math.max(maxRuntime, jobFinishTimeMS);
                    numTasks += containerList.size();
                    amMap.put(oldJobId, amSim);
                }
            }
        } finally {
            reader.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SLSConfiguration(org.apache.hadoop.yarn.sls.conf.SLSConfiguration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) JobTraceReader(org.apache.hadoop.tools.rumen.JobTraceReader) ArrayList(java.util.ArrayList) LoggedJob(org.apache.hadoop.tools.rumen.LoggedJob) ContainerSimulator(org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator) LoggedTask(org.apache.hadoop.tools.rumen.LoggedTask) AMSimulator(org.apache.hadoop.yarn.sls.appmaster.AMSimulator) LoggedTaskAttempt(org.apache.hadoop.tools.rumen.LoggedTaskAttempt) File(java.io.File)

Example 2 with JobTraceReader

use of org.apache.hadoop.tools.rumen.JobTraceReader in project hadoop by apache.

the class SLSUtils method parseNodesFromRumenTrace.

/**
   * parse the rumen trace file, return each host name
   */
public static Set<String> parseNodesFromRumenTrace(String jobTrace) throws IOException {
    Set<String> nodeSet = new HashSet<String>();
    File fin = new File(jobTrace);
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "file:///");
    JobTraceReader reader = new JobTraceReader(new Path(fin.getAbsolutePath()), conf);
    try {
        LoggedJob job = null;
        while ((job = reader.getNext()) != null) {
            for (LoggedTask mapTask : job.getMapTasks()) {
                // select the last attempt
                if (mapTask.getAttempts().size() == 0) {
                    continue;
                }
                LoggedTaskAttempt taskAttempt = mapTask.getAttempts().get(mapTask.getAttempts().size() - 1);
                nodeSet.add(taskAttempt.getHostName().getValue());
            }
            for (LoggedTask reduceTask : job.getReduceTasks()) {
                if (reduceTask.getAttempts().size() == 0) {
                    continue;
                }
                LoggedTaskAttempt taskAttempt = reduceTask.getAttempts().get(reduceTask.getAttempts().size() - 1);
                nodeSet.add(taskAttempt.getHostName().getValue());
            }
        }
    } finally {
        reader.close();
    }
    return nodeSet;
}
Also used : Path(org.apache.hadoop.fs.Path) LoggedTask(org.apache.hadoop.tools.rumen.LoggedTask) Configuration(org.apache.hadoop.conf.Configuration) JobTraceReader(org.apache.hadoop.tools.rumen.JobTraceReader) LoggedTaskAttempt(org.apache.hadoop.tools.rumen.LoggedTaskAttempt) File(java.io.File) LoggedJob(org.apache.hadoop.tools.rumen.LoggedJob) HashSet(java.util.HashSet)

Aggregations

File (java.io.File)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 JobTraceReader (org.apache.hadoop.tools.rumen.JobTraceReader)2 LoggedJob (org.apache.hadoop.tools.rumen.LoggedJob)2 LoggedTask (org.apache.hadoop.tools.rumen.LoggedTask)2 LoggedTaskAttempt (org.apache.hadoop.tools.rumen.LoggedTaskAttempt)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)1 AMSimulator (org.apache.hadoop.yarn.sls.appmaster.AMSimulator)1 SLSConfiguration (org.apache.hadoop.yarn.sls.conf.SLSConfiguration)1 ContainerSimulator (org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator)1