use of io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException in project mantis by Netflix.
the class JobClusterActor method onJobClusterInitialize.
/**
* Initialize cluster request sent by JCM. Called in following cases.
* 1. Master bootup : Already exists in DB
* 2. new cluster is being created : Requires the createInStore flag to be set. If writing to DB fails a
* failure message is sent back. The caller should then kill this
* * actor and inform upstream of the failure
*
* @param initReq
*/
@Override
public void onJobClusterInitialize(JobClusterProto.InitializeJobClusterRequest initReq) {
ActorRef sender = getSender();
logger.info("In onJobClusterInitialize {}", this.name);
if (logger.isDebugEnabled()) {
logger.debug("Init Request {}", initReq);
}
jobClusterMetadata = new JobClusterMetadataImpl.Builder().withLastJobCount(initReq.lastJobNumber).withIsDisabled(initReq.isDisabled).withJobClusterDefinition(initReq.jobClusterDefinition).build();
// create sla enforcer
slaEnforcer = new SLAEnforcer(jobClusterMetadata.getJobClusterDefinition().getSLA());
long expireFrequency = ConfigurationProvider.getConfig().getCompletedJobPurgeFrequencySeqs();
// If cluster is disabled
if (jobClusterMetadata.isDisabled()) {
logger.info("Cluster {} initialized but is Disabled", jobClusterMetadata.getJobClusterDefinition().getName());
// add completed jobs to cache to use when / if cluster is reenabled
jobManager.addCompletedJobsToCache(initReq.completedJobsList);
int count = 50;
if (!initReq.jobList.isEmpty()) {
logger.info("Cluster {} is disabled however it has {} active/accepted jobs", jobClusterMetadata.getJobClusterDefinition().getName(), initReq.jobList.size());
for (IMantisJobMetadata jobMeta : initReq.jobList) {
try {
if (count == 0) {
logger.info("Max cleanup limit of 50 reached abort");
break;
}
if (!JobState.isTerminalState(jobMeta.getState())) {
logger.info("Job {} is in non terminal state {} for disabled cluster {}." + "Marking it complete", jobMeta.getJobId(), jobMeta.getState(), jobClusterMetadata.getJobClusterDefinition().getName());
count--;
jobManager.markCompletedDuringStartup(jobMeta.getJobId(), System.currentTimeMillis(), jobMeta, JobState.Completed);
jobStore.archiveJob(jobMeta);
}
} catch (Exception e) {
logger.error("Exception {} archiving job {} during init ", e.getMessage(), jobMeta.getJobId());
}
}
}
sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, SUCCESS, String.format("JobCluster %s initialized successfully. But is currently disabled", initReq.jobClusterDefinition.getName()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
logger.info("Job expiry check frquency set to {}", expireFrequency);
setExpiredJobsTimer(expireFrequency);
getContext().become(disabledBehavior);
return;
} else {
// new cluster initialization
if (initReq.createInStore) {
try {
jobStore.createJobCluster(jobClusterMetadata);
eventPublisher.publishAuditEvent(new LifecycleEventsProto.AuditEvent(LifecycleEventsProto.AuditEvent.AuditEventType.JOB_CLUSTER_CREATE, jobClusterMetadata.getJobClusterDefinition().getName(), "saved job cluster " + name));
logger.info("successfully saved job cluster {}", name);
numJobClustersInitialized.increment();
} catch (final JobClusterAlreadyExistsException exists) {
numJobClusterInitializeFailures.increment();
logger.error("job cluster not created");
sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, CLIENT_ERROR, String.format("JobCluster %s already exists", initReq.jobClusterDefinition.getName()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
// TODO: handle case when job cluster exists in store but Job cluster actor is not running
return;
} catch (final Exception e) {
numJobClusterInitializeFailures.increment();
logger.error("job cluster not created due to {}", e.getMessage(), e);
sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, SERVER_ERROR, String.format("JobCluster %s not created due to %s", initReq.jobClusterDefinition.getName(), e.getMessage()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
// so we don't send back 2 InitJobClusterResponses
return;
}
}
try {
cronManager = new CronManager(name, getSelf(), jobClusterMetadata.getJobClusterDefinition().getSLA());
} catch (Exception e) {
logger.warn("Exception initializing cron {}", e);
}
initRunningJobs(initReq, sender);
setExpiredJobsTimer(expireFrequency);
logger.info("Job expiry check frquency set to {}", expireFrequency);
try {
jobManager.addCompletedJobsToCache(initReq.completedJobsList);
} catch (Exception e) {
logger.warn("Exception initializing completed jobs " + e.getMessage());
}
}
}
use of io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException in project mantis by Netflix.
the class SimpleCachedFileStorageProvider method createJobCluster.
@Override
public void createJobCluster(IJobClusterMetadata jobCluster) throws JobClusterAlreadyExistsException, IOException {
String name = jobCluster.getJobClusterDefinition().getName();
File tmpFile = new File(JOB_CLUSTERS_DIR + "/" + name);
logger.info("Storing job cluster " + name + " to file " + tmpFile.getAbsolutePath());
if (!tmpFile.createNewFile()) {
throw new JobClusterAlreadyExistsException(name);
}
PrintWriter pwrtr = new PrintWriter(tmpFile);
mapper.writeValue(pwrtr, jobCluster);
logger.info("Stored job cluster " + name + " to file " + tmpFile.getAbsolutePath());
}
Aggregations