Search in sources :

Example 1 with JobClusterAlreadyExistsException

use of io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException in project mantis by Netflix.

the class JobClusterActor method onJobClusterInitialize.

/**
 * Initialize cluster request sent by JCM. Called in following cases.
 * 1. Master bootup : Already exists in DB
 * 2. new cluster is being created : Requires the createInStore flag to be set. If writing to DB fails a
 * failure message is sent back. The caller should then kill this
 *      * actor and inform upstream of the failure
 *
 * @param initReq
 */
@Override
public void onJobClusterInitialize(JobClusterProto.InitializeJobClusterRequest initReq) {
    ActorRef sender = getSender();
    logger.info("In onJobClusterInitialize {}", this.name);
    if (logger.isDebugEnabled()) {
        logger.debug("Init Request {}", initReq);
    }
    jobClusterMetadata = new JobClusterMetadataImpl.Builder().withLastJobCount(initReq.lastJobNumber).withIsDisabled(initReq.isDisabled).withJobClusterDefinition(initReq.jobClusterDefinition).build();
    // create sla enforcer
    slaEnforcer = new SLAEnforcer(jobClusterMetadata.getJobClusterDefinition().getSLA());
    long expireFrequency = ConfigurationProvider.getConfig().getCompletedJobPurgeFrequencySeqs();
    // If cluster is disabled
    if (jobClusterMetadata.isDisabled()) {
        logger.info("Cluster {} initialized but is Disabled", jobClusterMetadata.getJobClusterDefinition().getName());
        // add completed jobs to cache to use when / if cluster is reenabled
        jobManager.addCompletedJobsToCache(initReq.completedJobsList);
        int count = 50;
        if (!initReq.jobList.isEmpty()) {
            logger.info("Cluster {} is disabled however it has {} active/accepted jobs", jobClusterMetadata.getJobClusterDefinition().getName(), initReq.jobList.size());
            for (IMantisJobMetadata jobMeta : initReq.jobList) {
                try {
                    if (count == 0) {
                        logger.info("Max cleanup limit of 50 reached abort");
                        break;
                    }
                    if (!JobState.isTerminalState(jobMeta.getState())) {
                        logger.info("Job {} is in non terminal state {} for disabled cluster {}." + "Marking it complete", jobMeta.getJobId(), jobMeta.getState(), jobClusterMetadata.getJobClusterDefinition().getName());
                        count--;
                        jobManager.markCompletedDuringStartup(jobMeta.getJobId(), System.currentTimeMillis(), jobMeta, JobState.Completed);
                        jobStore.archiveJob(jobMeta);
                    }
                } catch (Exception e) {
                    logger.error("Exception {} archiving job {} during init ", e.getMessage(), jobMeta.getJobId());
                }
            }
        }
        sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, SUCCESS, String.format("JobCluster %s initialized successfully. But is currently disabled", initReq.jobClusterDefinition.getName()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
        logger.info("Job expiry check frquency set to {}", expireFrequency);
        setExpiredJobsTimer(expireFrequency);
        getContext().become(disabledBehavior);
        return;
    } else {
        // new cluster initialization
        if (initReq.createInStore) {
            try {
                jobStore.createJobCluster(jobClusterMetadata);
                eventPublisher.publishAuditEvent(new LifecycleEventsProto.AuditEvent(LifecycleEventsProto.AuditEvent.AuditEventType.JOB_CLUSTER_CREATE, jobClusterMetadata.getJobClusterDefinition().getName(), "saved job cluster " + name));
                logger.info("successfully saved job cluster {}", name);
                numJobClustersInitialized.increment();
            } catch (final JobClusterAlreadyExistsException exists) {
                numJobClusterInitializeFailures.increment();
                logger.error("job cluster not created");
                sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, CLIENT_ERROR, String.format("JobCluster %s already exists", initReq.jobClusterDefinition.getName()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
                // TODO: handle case when job cluster exists in store but Job cluster actor is not running
                return;
            } catch (final Exception e) {
                numJobClusterInitializeFailures.increment();
                logger.error("job cluster not created due to {}", e.getMessage(), e);
                sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, SERVER_ERROR, String.format("JobCluster %s not created due to %s", initReq.jobClusterDefinition.getName(), e.getMessage()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
                // so we don't send back 2 InitJobClusterResponses
                return;
            }
        }
        try {
            cronManager = new CronManager(name, getSelf(), jobClusterMetadata.getJobClusterDefinition().getSLA());
        } catch (Exception e) {
            logger.warn("Exception initializing cron {}", e);
        }
        initRunningJobs(initReq, sender);
        setExpiredJobsTimer(expireFrequency);
        logger.info("Job expiry check frquency set to {}", expireFrequency);
        try {
            jobManager.addCompletedJobsToCache(initReq.completedJobsList);
        } catch (Exception e) {
            logger.warn("Exception initializing completed jobs " + e.getMessage());
        }
    }
}
Also used : JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) ActorRef(akka.actor.ActorRef) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException) TriggerNotFoundException(com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException) SchedulerException(com.netflix.fenzo.triggers.exceptions.SchedulerException) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException) LifecycleEventsProto(io.mantisrx.master.events.LifecycleEventsProto)

Example 2 with JobClusterAlreadyExistsException

use of io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException in project mantis by Netflix.

the class SimpleCachedFileStorageProvider method createJobCluster.

@Override
public void createJobCluster(IJobClusterMetadata jobCluster) throws JobClusterAlreadyExistsException, IOException {
    String name = jobCluster.getJobClusterDefinition().getName();
    File tmpFile = new File(JOB_CLUSTERS_DIR + "/" + name);
    logger.info("Storing job cluster " + name + " to file " + tmpFile.getAbsolutePath());
    if (!tmpFile.createNewFile()) {
        throw new JobClusterAlreadyExistsException(name);
    }
    PrintWriter pwrtr = new PrintWriter(tmpFile);
    mapper.writeValue(pwrtr, jobCluster);
    logger.info("Stored job cluster " + name + " to file " + tmpFile.getAbsolutePath());
}
Also used : File(java.io.File) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException) PrintWriter(java.io.PrintWriter)

Aggregations

JobClusterAlreadyExistsException (io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)2 ActorRef (akka.actor.ActorRef)1 SchedulerException (com.netflix.fenzo.triggers.exceptions.SchedulerException)1 TriggerNotFoundException (com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException)1 LifecycleEventsProto (io.mantisrx.master.events.LifecycleEventsProto)1 IMantisJobMetadata (io.mantisrx.master.jobcluster.job.IMantisJobMetadata)1 JobClusterProto (io.mantisrx.master.jobcluster.proto.JobClusterProto)1 File (java.io.File)1 PrintWriter (java.io.PrintWriter)1