use of io.mantisrx.master.jobcluster.IJobClusterMetadata in project mantis by Netflix.
the class SimpleCachedFileStorageProvider method loadJobCluster.
// @Override
public Optional<IJobClusterMetadata> loadJobCluster(String clusterName) {
File jobClusterFile = new File(JOB_CLUSTERS_DIR + "/" + clusterName);
if (jobClusterFile.exists()) {
try (FileInputStream fis = new FileInputStream(jobClusterFile)) {
IJobClusterMetadata jobClustermeta = mapper.readValue(fis, JobClusterMetadataImpl.class);
return Optional.ofNullable(jobClustermeta);
} catch (Exception e) {
logger.error("skipped file {} due to exception when loading job cluster", jobClusterFile.getName(), e);
}
}
logger.warn("No such job cluster {} ", clusterName);
return Optional.empty();
}
use of io.mantisrx.master.jobcluster.IJobClusterMetadata in project mantis by Netflix.
the class JobClustersManagerActor method initialize.
private void initialize(JobClustersManagerInitialize initMsg) {
ActorRef sender = getSender();
try {
logger.info("In JobClustersManagerActor:initialize");
this.jobListHelperActor = getContext().actorOf(JobListHelperActor.props(), "JobListHelperActor");
getContext().watch(jobListHelperActor);
mantisScheduler = initMsg.getScheduler();
Map<String, IJobClusterMetadata> jobClusterMap = new HashMap<>();
this.jobClusterInfoManager = new JobClusterInfoManager(jobStore, mantisScheduler, eventPublisher);
if (!initMsg.isLoadJobsFromStore()) {
getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
} else {
List<IJobClusterMetadata> jobClusters = jobStore.loadAllJobClusters();
logger.info("Read {} job clusters from storage", jobClusters.size());
List<IMantisJobMetadata> activeJobs = jobStore.loadAllActiveJobs();
logger.info("Read {} jobs from storage", activeJobs.size());
List<CompletedJob> completedJobs = jobStore.loadAllCompletedJobs();
logger.info("Read {} completed jobs from storage", completedJobs.size());
for (IJobClusterMetadata jobClusterMeta : jobClusters) {
String clusterName = jobClusterMeta.getJobClusterDefinition().getName();
jobClusterMap.put(clusterName, jobClusterMeta);
}
Map<String, List<IMantisJobMetadata>> clusterToJobMap = new HashMap<>();
Map<String, List<CompletedJob>> clusterToCompletedJobMap = new HashMap<>();
// group jobs by cluster
for (IMantisJobMetadata jobMeta : activeJobs) {
String clusterName = jobMeta.getClusterName();
clusterToJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
}
for (CompletedJob jobMeta : completedJobs) {
String clusterName = jobMeta.getName();
clusterToCompletedJobMap.computeIfAbsent(clusterName, k -> new ArrayList<>()).add(jobMeta);
}
long masterInitTimeoutSecs = ConfigurationProvider.getConfig().getMasterInitTimeoutSecs();
long timeout = ((masterInitTimeoutSecs - 60)) > 0 ? (masterInitTimeoutSecs - 60) : masterInitTimeoutSecs;
Observable.from(jobClusterMap.values()).filter((jobClusterMeta) -> jobClusterMeta != null && jobClusterMeta.getJobClusterDefinition() != null).flatMap((jobClusterMeta) -> {
Duration t = Duration.ofSeconds(timeout);
Optional<JobClusterInfo> jobClusterInfoO = jobClusterInfoManager.createClusterActorAndRegister(jobClusterMeta.getJobClusterDefinition());
if (!jobClusterInfoO.isPresent()) {
logger.info("skipping job cluster {} on bootstrap as actor creating failed", jobClusterMeta.getJobClusterDefinition().getName());
return Observable.empty();
}
JobClusterInfo jobClusterInfo = jobClusterInfoO.get();
List<IMantisJobMetadata> jobList = Lists.newArrayList();
List<IMantisJobMetadata> jList = clusterToJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
if (jList != null) {
jobList.addAll(jList);
}
List<CompletedJob> completedJobsList = Lists.newArrayList();
List<CompletedJob> cList = clusterToCompletedJobMap.get(jobClusterMeta.getJobClusterDefinition().getName());
if (cList != null) {
completedJobsList.addAll(cList);
}
JobClusterProto.InitializeJobClusterRequest req = new JobClusterProto.InitializeJobClusterRequest((JobClusterDefinitionImpl) jobClusterMeta.getJobClusterDefinition(), jobClusterMeta.isDisabled(), jobClusterMeta.getLastJobCount(), jobList, completedJobsList, "system", getSelf(), false);
return jobClusterInfoManager.initializeCluster(jobClusterInfo, req, t);
}).filter(Objects::nonNull).toBlocking().subscribe((clusterInit) -> {
logger.info("JobCluster {} inited with code {}", clusterInit.jobClusterName, clusterInit.responseCode);
numJobClusterInitSuccesses.increment();
}, (error) -> {
logger.warn("Exception initializing clusters {}", error.getMessage(), error);
logger.error("JobClusterManagerActor had errors during initialization NOT transitioning to initialized behavior");
// getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, "JobClustersManager inited with errors"), getSelf());
}, () -> {
logger.info("JobClusterManagerActor transitioning to initialized behavior");
getContext().become(initializedBehavior);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SUCCESS, "JobClustersManager successfully inited"), getSelf());
});
getTimers().startPeriodicTimer(CHECK_CLUSTERS_TIMER_KEY, new ReconcileJobCluster(), Duration.ofSeconds(checkAgainInSecs));
// kick off loading of archived jobs
logger.info("Kicking off archived job load asynchronously");
jobStore.loadAllArchivedJobsAsync();
}
} catch (Exception e) {
logger.error("caught exception", e);
sender.tell(new JobClustersManagerInitializeResponse(initMsg.requestId, SERVER_ERROR, e.getMessage()), getSelf());
}
logger.info("JobClustersManagerActor:initialize ends");
}
use of io.mantisrx.master.jobcluster.IJobClusterMetadata in project mantis by Netflix.
the class SimpleCachedFileStorageProviderTest method testGetAllJobClusters.
@Test
public void testGetAllJobClusters() throws IOException, JobClusterAlreadyExistsException {
SimpleCachedFileStorageProvider sProvider = new SimpleCachedFileStorageProvider();
String clusterPrefix = "testGetAllJobClustersCluster";
for (int i = 0; i < 5; i++) {
JobClusterDefinitionImpl jobClusterDefn = createFakeJobClusterDefn(clusterPrefix + "_" + i, Lists.newArrayList());
IJobClusterMetadata jobCluster = new JobClusterMetadataImpl.Builder().withLastJobCount(0).withJobClusterDefinition(jobClusterDefn).build();
sProvider.createJobCluster(jobCluster);
}
List<IJobClusterMetadata> jobClusterList = sProvider.loadAllJobClusters();
assertTrue(jobClusterList.size() >= 5);
Map<String, IJobClusterMetadata> clustersMap = Maps.newHashMap();
for (IJobClusterMetadata cluster : jobClusterList) {
clustersMap.put(cluster.getJobClusterDefinition().getName(), cluster);
}
for (int i = 0; i < 5; i++) {
assertTrue(clustersMap.containsKey(clusterPrefix + "_" + i));
}
}
use of io.mantisrx.master.jobcluster.IJobClusterMetadata in project mantis by Netflix.
the class DataFormatAdapterTest method jobClusterMetadataConversionTest.
@Test
public void jobClusterMetadataConversionTest() {
String artifactName = "artifact1";
String version = "0.0.1";
List<Parameter> parameterList = new ArrayList<>();
Parameter parameter = new Parameter("param1", "value1");
parameterList.add(parameter);
List<Label> labels = new ArrayList<>();
Label label = new Label("label1", "labelvalue1");
labels.add(label);
long uAt = 1234l;
JobClusterConfig jobClusterConfig = new JobClusterConfig.Builder().withArtifactName(artifactName).withSchedulingInfo(DEFAULT_SCHED_INFO).withVersion(version).withUploadedAt(uAt).build();
String clusterName = "clusterName1";
JobOwner owner = new JobOwner("Neeraj", "Mantis", "desc", "nma@netflix.com", "repo");
boolean isReadyForMaster = true;
SLA sla = new SLA(1, 10, null, null);
JobClusterDefinitionImpl clusterDefn = new JobClusterDefinitionImpl.Builder().withJobClusterConfig(jobClusterConfig).withName(clusterName).withUser("user1").withIsReadyForJobMaster(isReadyForMaster).withOwner(owner).withMigrationConfig(WorkerMigrationConfig.DEFAULT).withSla(sla).withParameters(parameterList).withLabels(labels).build();
int lastJobCnt = 10;
boolean disabled = false;
IJobClusterMetadata clusterMeta = new JobClusterMetadataImpl.Builder().withJobClusterDefinition(clusterDefn).withLastJobCount(lastJobCnt).withIsDisabled(disabled).build();
NamedJob namedJob = DataFormatAdapter.convertJobClusterMetadataToNamedJob(clusterMeta);
assertEquals(disabled, namedJob.getDisabled());
assertEquals(clusterName, namedJob.getName());
assertEquals(lastJobCnt, namedJob.getLastJobCount());
assertEquals(1, namedJob.getLabels().size());
assertEquals(label, namedJob.getLabels().get(0));
assertEquals(owner, namedJob.getOwner());
assertEquals(isReadyForMaster, namedJob.getIsReadyForJobMaster());
assertEquals(WorkerMigrationConfig.DEFAULT, namedJob.getMigrationConfig());
// assert parameters
assertEquals(parameterList.size(), namedJob.getParameters().size());
assertEquals(parameter, namedJob.getParameters().get(0));
// assert sla
assertEquals(sla.getMin(), namedJob.getSla().getMin());
assertEquals(sla.getMax(), namedJob.getSla().getMax());
// assert jar info
assertEquals(1, namedJob.getJars().size());
// jar info
NamedJob.Jar jar = namedJob.getJars().get(0);
assertEquals(uAt, jar.getUploadedAt());
assertEquals(DEFAULT_SCHED_INFO, jar.getSchedulingInfo());
assertEquals(version, jar.getVersion());
assertEquals(artifactName, DataFormatAdapter.extractArtifactName(jar.getUrl()).orElse(""));
IJobClusterMetadata reconvertedJobCluster = DataFormatAdapter.convertNamedJobToJobClusterMetadata(namedJob);
assertEquals(disabled, reconvertedJobCluster.isDisabled());
assertEquals(clusterName, reconvertedJobCluster.getJobClusterDefinition().getName());
assertEquals(lastJobCnt, reconvertedJobCluster.getLastJobCount());
assertEquals(1, reconvertedJobCluster.getJobClusterDefinition().getLabels().size());
assertEquals(label, reconvertedJobCluster.getJobClusterDefinition().getLabels().get(0));
assertEquals(owner, reconvertedJobCluster.getJobClusterDefinition().getOwner());
assertEquals(isReadyForMaster, reconvertedJobCluster.getJobClusterDefinition().getIsReadyForJobMaster());
assertEquals(WorkerMigrationConfig.DEFAULT, reconvertedJobCluster.getJobClusterDefinition().getWorkerMigrationConfig());
assertEquals(parameterList.size(), reconvertedJobCluster.getJobClusterDefinition().getParameters().size());
assertEquals(parameter, reconvertedJobCluster.getJobClusterDefinition().getParameters().get(0));
assertEquals(sla.getMin(), reconvertedJobCluster.getJobClusterDefinition().getSLA().getMin());
assertEquals(sla.getMax(), reconvertedJobCluster.getJobClusterDefinition().getSLA().getMax());
JobClusterConfig clusterConfig1 = reconvertedJobCluster.getJobClusterDefinition().getJobClusterConfig();
assertEquals(uAt, clusterConfig1.getUploadedAt());
assertEquals(DEFAULT_SCHED_INFO, clusterConfig1.getSchedulingInfo());
assertEquals(version, clusterConfig1.getVersion());
assertEquals(artifactName, clusterConfig1.getArtifactName());
}
use of io.mantisrx.master.jobcluster.IJobClusterMetadata in project mantis by Netflix.
the class SimpleCachedFileStorageProvider method loadAllJobClusters.
@Override
public List<IJobClusterMetadata> loadAllJobClusters() {
createDir(JOB_CLUSTERS_DIR);
File jobClustersDir = new File(JOB_CLUSTERS_DIR);
final List<IJobClusterMetadata> jobClusterMetadataList = new ArrayList<>();
for (File jobClusterFile : jobClustersDir.listFiles()) {
try (FileInputStream fis = new FileInputStream(jobClusterFile)) {
jobClusterMetadataList.add(mapper.readValue(fis, JobClusterMetadataImpl.class));
} catch (Exception e) {
logger.error("skipped file {} due to exception when loading job cluster", jobClusterFile.getName(), e);
}
}
return jobClusterMetadataList;
}
Aggregations