use of io.mantisrx.master.jobcluster.job.IMantisJobMetadata in project mantis by Netflix.
the class SimpleCachedFileStorageProvider method loadAllArchivedJobs.
@Override
public Observable<IMantisJobMetadata> loadAllArchivedJobs() {
List<IMantisJobMetadata> jobList = Lists.newArrayList();
createDir(ARCHIVE_DIR);
File archiveDirFile = new File(ARCHIVE_DIR);
for (File jobFile : archiveDirFile.listFiles((dir, name) -> {
return name.startsWith("Job-");
})) {
try {
String jobId = jobFile.getName().substring("Job-".length());
Optional<IMantisJobMetadata> jobMetaOp = loadJob(ARCHIVE_DIR, jobId);
if (jobMetaOp.isPresent()) {
jobList.add(jobMetaOp.get());
}
} catch (IOException e) {
logger.error("Error reading job metadata - " + e.getMessage());
}
}
return Observable.from(jobList);
}
use of io.mantisrx.master.jobcluster.job.IMantisJobMetadata in project mantis by Netflix.
the class SimpleCachedFileStorageProvider method loadAllJobs.
//
@Override
public List<IMantisJobMetadata> loadAllJobs() {
List<IMantisJobMetadata> jobList = Lists.newArrayList();
createDir(SPOOL_DIR);
createDir(ARCHIVE_DIR);
File spoolDirFile = new File(SPOOL_DIR);
for (File jobFile : spoolDirFile.listFiles((dir, name) -> {
return name.startsWith("Job-");
})) {
try {
String jobId = jobFile.getName().substring("Job-".length());
Optional<IMantisJobMetadata> jobMetaOp = loadJob(SPOOL_DIR, jobId);
if (jobMetaOp.isPresent()) {
jobList.add(jobMetaOp.get());
}
} catch (IOException e) {
logger.error("Error reading job metadata - " + e.getMessage());
}
}
return jobList;
}
use of io.mantisrx.master.jobcluster.job.IMantisJobMetadata in project mantis by Netflix.
the class TestHelpers method createFakeScheduleRequest.
public static ScheduleRequest createFakeScheduleRequest(final WorkerId workerId, final int stageNum, final int numStages, final MachineDefinition machineDefinition) {
try {
JobDefinition jobDefinition = new JobDefinition.Builder().withArtifactName("jar").withSchedulingInfo(new SchedulingInfo(Collections.singletonMap(0, StageSchedulingInfo.builder().numberOfInstances(1).machineDefinition(machineDefinition).hardConstraints(Collections.emptyList()).softConstraints(Collections.emptyList()).build()))).withJobSla(new JobSla(0, 0, null, MantisJobDurationType.Perpetual, null)).build();
IMantisJobMetadata mantisJobMetadata = new MantisJobMetadataImpl.Builder().withJobId(JobId.fromId(workerId.getJobId()).get()).withJobDefinition(jobDefinition).build();
return new ScheduleRequest(workerId, stageNum, numStages, new JobMetadata(mantisJobMetadata.getJobId().getId(), mantisJobMetadata.getJobJarUrl(), mantisJobMetadata.getTotalStages(), mantisJobMetadata.getUser(), mantisJobMetadata.getSchedulingInfo(), mantisJobMetadata.getParameters(), mantisJobMetadata.getSubscriptionTimeoutSecs(), mantisJobMetadata.getMinRuntimeSecs()), mantisJobMetadata.getSla().get().getDurationType(), machineDefinition, Collections.emptyList(), Collections.emptyList(), 0, Optional.empty());
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
use of io.mantisrx.master.jobcluster.job.IMantisJobMetadata in project mantis by Netflix.
the class JobClusterActor method onJobClusterInitialize.
/**
* Initialize cluster request sent by JCM. Called in following cases.
* 1. Master bootup : Already exists in DB
* 2. new cluster is being created : Requires the createInStore flag to be set. If writing to DB fails a
* failure message is sent back. The caller should then kill this
* * actor and inform upstream of the failure
*
* @param initReq
*/
@Override
public void onJobClusterInitialize(JobClusterProto.InitializeJobClusterRequest initReq) {
ActorRef sender = getSender();
logger.info("In onJobClusterInitialize {}", this.name);
if (logger.isDebugEnabled()) {
logger.debug("Init Request {}", initReq);
}
jobClusterMetadata = new JobClusterMetadataImpl.Builder().withLastJobCount(initReq.lastJobNumber).withIsDisabled(initReq.isDisabled).withJobClusterDefinition(initReq.jobClusterDefinition).build();
// create sla enforcer
slaEnforcer = new SLAEnforcer(jobClusterMetadata.getJobClusterDefinition().getSLA());
long expireFrequency = ConfigurationProvider.getConfig().getCompletedJobPurgeFrequencySeqs();
// If cluster is disabled
if (jobClusterMetadata.isDisabled()) {
logger.info("Cluster {} initialized but is Disabled", jobClusterMetadata.getJobClusterDefinition().getName());
// add completed jobs to cache to use when / if cluster is reenabled
jobManager.addCompletedJobsToCache(initReq.completedJobsList);
int count = 50;
if (!initReq.jobList.isEmpty()) {
logger.info("Cluster {} is disabled however it has {} active/accepted jobs", jobClusterMetadata.getJobClusterDefinition().getName(), initReq.jobList.size());
for (IMantisJobMetadata jobMeta : initReq.jobList) {
try {
if (count == 0) {
logger.info("Max cleanup limit of 50 reached abort");
break;
}
if (!JobState.isTerminalState(jobMeta.getState())) {
logger.info("Job {} is in non terminal state {} for disabled cluster {}." + "Marking it complete", jobMeta.getJobId(), jobMeta.getState(), jobClusterMetadata.getJobClusterDefinition().getName());
count--;
jobManager.markCompletedDuringStartup(jobMeta.getJobId(), System.currentTimeMillis(), jobMeta, JobState.Completed);
jobStore.archiveJob(jobMeta);
}
} catch (Exception e) {
logger.error("Exception {} archiving job {} during init ", e.getMessage(), jobMeta.getJobId());
}
}
}
sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, SUCCESS, String.format("JobCluster %s initialized successfully. But is currently disabled", initReq.jobClusterDefinition.getName()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
logger.info("Job expiry check frquency set to {}", expireFrequency);
setExpiredJobsTimer(expireFrequency);
getContext().become(disabledBehavior);
return;
} else {
// new cluster initialization
if (initReq.createInStore) {
try {
jobStore.createJobCluster(jobClusterMetadata);
eventPublisher.publishAuditEvent(new LifecycleEventsProto.AuditEvent(LifecycleEventsProto.AuditEvent.AuditEventType.JOB_CLUSTER_CREATE, jobClusterMetadata.getJobClusterDefinition().getName(), "saved job cluster " + name));
logger.info("successfully saved job cluster {}", name);
numJobClustersInitialized.increment();
} catch (final JobClusterAlreadyExistsException exists) {
numJobClusterInitializeFailures.increment();
logger.error("job cluster not created");
sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, CLIENT_ERROR, String.format("JobCluster %s already exists", initReq.jobClusterDefinition.getName()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
// TODO: handle case when job cluster exists in store but Job cluster actor is not running
return;
} catch (final Exception e) {
numJobClusterInitializeFailures.increment();
logger.error("job cluster not created due to {}", e.getMessage(), e);
sender.tell(new JobClusterProto.InitializeJobClusterResponse(initReq.requestId, SERVER_ERROR, String.format("JobCluster %s not created due to %s", initReq.jobClusterDefinition.getName(), e.getMessage()), initReq.jobClusterDefinition.getName(), initReq.requestor), getSelf());
// so we don't send back 2 InitJobClusterResponses
return;
}
}
try {
cronManager = new CronManager(name, getSelf(), jobClusterMetadata.getJobClusterDefinition().getSLA());
} catch (Exception e) {
logger.warn("Exception initializing cron {}", e);
}
initRunningJobs(initReq, sender);
setExpiredJobsTimer(expireFrequency);
logger.info("Job expiry check frquency set to {}", expireFrequency);
try {
jobManager.addCompletedJobsToCache(initReq.completedJobsList);
} catch (Exception e) {
logger.warn("Exception initializing completed jobs " + e.getMessage());
}
}
}
use of io.mantisrx.master.jobcluster.job.IMantisJobMetadata in project mantis by Netflix.
the class JobClusterActor method getFilteredTerminalJobList.
/**
* JobState ActiveOnly Execute?
* None None Y
* None TRUE N
* None FALSE Y
* Active None N
* Active TRUE N
* Active FALSE N
* Terminal None Y
* Terminal TRUE Y
* Terminal FALSE Y
* @param request
* @return
*/
private Observable<MantisJobMetadataView> getFilteredTerminalJobList(ListJobCriteria request, Set<JobId> jobIdSet) {
if (logger.isTraceEnabled()) {
logger.trace("JobClusterActor:getFilteredTerminalJobList");
}
if ((request.getJobState().isPresent() && !request.getJobState().get().equals(JobState.MetaState.Terminal))) {
if (logger.isTraceEnabled()) {
logger.trace("Exit JobClusterActor:getFilteredTerminalJobList with empty");
}
return Observable.empty();
} else if (!request.getJobState().isPresent() && (request.getActiveOnly().isPresent() && request.getActiveOnly().get())) {
if (logger.isTraceEnabled()) {
logger.trace("Exit JobClusterActor:getFilteredTerminalJobList with empty");
}
return Observable.empty();
}
List<CompletedJob> jobInfoList;
if (!jobIdSet.isEmpty()) {
jobInfoList = jobIdSet.stream().map((jId) -> jobManager.getCompletedJob(jId)).filter((compJobOp) -> compJobOp.isPresent()).map((compJobOp) -> compJobOp.get()).collect(Collectors.toList());
} else {
jobInfoList = jobManager.getCompletedJobsList();
}
List<CompletedJob> shortenedList = jobInfoList.subList(0, Math.min(jobInfoList.size(), request.getLimit().orElse(DEFAULT_LIMIT)));
return Observable.from(shortenedList).flatMap((cJob) -> {
try {
if (logger.isDebugEnabled()) {
logger.debug("Fetching details for completed job {}", cJob);
}
Optional<IMantisJobMetadata> metaOp = jobManager.getJobDataForCompletedJob(cJob.getJobId());
if (metaOp.isPresent()) {
if (logger.isDebugEnabled()) {
logger.debug("Fetched details for completed job {} -> {}", cJob, metaOp.get());
}
return Observable.just(new MantisJobMetadataView(metaOp.get(), cJob.getTerminatedAt(), request.getStageNumberList(), request.getWorkerIndexList(), request.getWorkerNumberList(), request.getWorkerStateList(), false));
}
} catch (Exception e) {
logger.error("caught exception", e);
return Observable.empty();
}
return Observable.empty();
});
}
Aggregations