Search in sources :

Example 1 with OrbExceptionEvent

use of org.goldenorb.event.OrbExceptionEvent in project goldenorb by jzachr.

the class JobManager method launchJob.

/**
 * 
 * @param  OrbJob job
 */
private void launchJob(OrbJob job) {
    try {
        ZookeeperUtils.notExistCreateNode(zk, jobsInProgressPath + "/" + job.getJobNumber());
        ZookeeperUtils.notExistCreateNode(zk, jobsInProgressPath + "/" + job.getJobNumber() + "/OrbPartitionLeaderGroup");
        ZookeeperUtils.notExistCreateNode(zk, jobsInProgressPath + "/" + job.getJobNumber() + "/messages");
        ZookeeperUtils.tryToCreateNode(zk, jobsInProgressPath + "/" + job.getJobNumber() + "/messages/heartbeat", new LongWritable(0), CreateMode.PERSISTENT);
        // allocate resources and if enough, start the job
        logger.info("checking for available OrbTracker resources");
        Map<M, Integer[]> assignments = null;
        try {
            assignments = resourceAllocator.assignResources(job.getOrbConf());
        } catch (InvalidJobConfException e) {
            logger.error(e.getMessage());
        }
        logger.info("Starting Job");
        logger.info("********** Job {} started: {}", job.getJobNumber(), new Date().getTime());
        if (assignments != null) {
            logger.info("Allocating partitions");
            int basePartitionID = 0;
            for (M tracker : orbTrackerMembers) {
                logger.debug("OrbTracker - " + tracker.getHostname() + ":" + tracker.getPort());
                Integer[] assignment = assignments.get(tracker);
                tracker.initProxy(getOrbConf());
                try {
                    logger.debug("jobConf().getHDFSdistributedFiles(): {}", job.getOrbConf().getHDFSdistributedFiles());
                    tracker.getRequiredFiles(job.getOrbConf());
                } catch (OrbZKFailure e) {
                    logger.error("EXCEPTION : An OrbTrackerMember failed to copy files from HDFS to local machine");
                    logger.error(e.getMessage());
                    throw e;
                }
                PartitionRequest request = new PartitionRequest();
                request.setActivePartitions(assignment[ResourceAllocator.TRACKER_AVAILABLE]);
                request.setReservedPartitions(assignment[ResourceAllocator.TRACKER_RESERVED]);
                request.setJobID(job.getJobNumber());
                request.setBasePartitionID(basePartitionID);
                request.setJobConf(job.getOrbConf());
                basePartitionID += assignment[ResourceAllocator.TRACKER_AVAILABLE];
                logger.debug("requesting partitions");
                tracker.requestPartitions(request);
                logger.info(request.toString());
                JobStillActiveCheck jobStillActiveCheck = new JobStillActiveCheck(job);
                job.setJobStillActiveInterface(jobStillActiveCheck);
                new Thread(jobStillActiveCheck).start();
                activeJobs.add(job.getJobNumber());
                checkForDeathComplete(job);
                heartbeat(job);
            }
        } else {
            logger.error("not enough capacity for this job");
            jobComplete(job);
        }
    } catch (OrbZKFailure e) {
        e.printStackTrace();
        logger.error(e.getMessage());
        fireEvent(new OrbExceptionEvent(e));
    }
//catch (IOException e) {
//      e.printStackTrace();
//      logger.error(e.getMessage());
//    }
}
Also used : OrbExceptionEvent(org.goldenorb.event.OrbExceptionEvent) PartitionRequest(org.goldenorb.jet.PartitionRequest) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) Date(java.util.Date) OrbZKFailure(org.goldenorb.zookeeper.OrbZKFailure) LongWritable(org.apache.hadoop.io.LongWritable)

Example 2 with OrbExceptionEvent

use of org.goldenorb.event.OrbExceptionEvent in project goldenorb by jzachr.

the class JobManager method getJobsInQueue.

/**
 * Return the jobsInQueue
 */
private void getJobsInQueue() {
    logger.info("getting jobs in queue.");
    synchronized (jobs) {
        List<String> jobQueueChildren = null;
        try {
            jobQueueChildren = zk.getChildren(jobQueuePath, jobsInQueueWatcher);
        } catch (KeeperException e) {
            fireEvent(new OrbExceptionEvent(e));
        } catch (InterruptedException e) {
            fireEvent(new OrbExceptionEvent(e));
        }
        List<String> jobsToRemove = new ArrayList<String>();
        for (String jobPath : jobs.keySet()) {
            if (!jobQueueChildren.contains(jobPath)) {
                jobsToRemove.add(jobPath);
            // Either a job has completed or been removed by someone else this should fire an event.
            // This should really not occur since it should only be removed by the JobManager itself.
            // In reality does an event really even need to be thrown?
            }
        }
        for (String job : jobsToRemove) {
            logger.debug("Removing job: " + job);
            jobs.remove(job);
            activeJobs.remove(job);
        }
        for (String jobPath : jobQueueChildren) {
            OrbConfiguration jobConf;
            try {
                jobConf = (OrbConfiguration) ZookeeperUtils.getNodeWritable(zk, jobQueuePath + "/" + jobPath, OrbConfiguration.class, orbConf);
                if (jobConf != null) {
                    if (!jobs.containsKey(jobPath)) {
                        logger.debug("Adding job: " + jobPath);
                        jobs.put(jobPath, new OrbJob(jobPath, jobConf));
                    // Here we have a new job--once again an event should be fired.
                    // Although I am not sure that an event really needs to be fired at this point. We will see.
                    }
                } else {
                    logger.debug("Job is not a valid job.");
                }
            } catch (OrbZKFailure e) {
                fireEvent(new OrbExceptionEvent(e));
            }
        }
    }
    tryToLaunchJob();
}
Also used : OrbExceptionEvent(org.goldenorb.event.OrbExceptionEvent) OrbConfiguration(org.goldenorb.conf.OrbConfiguration) ArrayList(java.util.ArrayList) OrbZKFailure(org.goldenorb.zookeeper.OrbZKFailure) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

OrbExceptionEvent (org.goldenorb.event.OrbExceptionEvent)2 OrbZKFailure (org.goldenorb.zookeeper.OrbZKFailure)2 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 LongWritable (org.apache.hadoop.io.LongWritable)1 InvalidJobConfException (org.apache.hadoop.mapred.InvalidJobConfException)1 KeeperException (org.apache.zookeeper.KeeperException)1 OrbConfiguration (org.goldenorb.conf.OrbConfiguration)1 PartitionRequest (org.goldenorb.jet.PartitionRequest)1