use of com.netflix.titus.api.jobmanager.model.job.Container in project titus-control-plane by Netflix.
the class DefaultTaintTolerationFactoryTest method newGpuJob.
private Job<BatchJobExt> newGpuJob() {
Job<BatchJobExt> template = JobGenerator.oneBatchJob();
JobDescriptor<BatchJobExt> jobDescriptor = template.getJobDescriptor();
Container container = jobDescriptor.getContainer();
return template.toBuilder().withJobDescriptor(jobDescriptor.toBuilder().withContainer(container.toBuilder().withContainerResources(container.getContainerResources().toBuilder().withGpu(1).build()).build()).build()).build();
}
use of com.netflix.titus.api.jobmanager.model.job.Container in project titus-control-plane by Netflix.
the class V1SpecPodFactoryTest method testEFSMountsHandlesDuplicateVolumes.
@Test
public void testEFSMountsHandlesDuplicateVolumes() {
Job<BatchJobExt> job = JobGenerator.oneBatchJob();
BatchJobTask task = JobGenerator.oneBatchTask();
EfsMount newEfsMount = new EfsMount("1.2.3.4", "/mountpoint", EfsMount.MountPerm.RO, "/relative");
EfsMount newEfsMount2 = new EfsMount("1.2.3.4", "/mountpoint2", EfsMount.MountPerm.RO, "/relative");
EfsMount newEfsMount3 = new EfsMount("1.2.3.4", "/mountpoint3", EfsMount.MountPerm.RW, "/relative");
Container newContainer = job.getJobDescriptor().getContainer();
ContainerResources newContainerResources = newContainer.getContainerResources();
Container newContainerWithEFS = newContainer.toBuilder().withContainerResources(newContainerResources.newBuilder().withEfsMounts(Arrays.asList(newEfsMount, newEfsMount2, newEfsMount3)).build()).build();
job = job.toBuilder().withJobDescriptor(job.getJobDescriptor().toBuilder().withContainer(newContainerWithEFS).build()).build();
when(podAffinityFactory.buildV1Affinity(job, task)).thenReturn(Pair.of(new V1Affinity(), new HashMap<>()));
V1Pod pod = podFactory.buildV1Pod(job, task);
// Part 1: There should only be *one* EFS volume to share
List<V1Volume> volumes = pod.getSpec().getVolumes();
// one for nfs, one for shm
assertThat(volumes.size()).isEqualTo(2);
V1Volume v1NFSVolume = volumes.get(0);
assertThat(v1NFSVolume.getName()).isEqualTo("1-2-3-4-relative-vol");
assertThat(v1NFSVolume.getNfs().getServer()).isEqualTo("1.2.3.4");
assertThat(v1NFSVolume.getNfs().getPath()).isEqualTo("/relative");
// All NFS volumes that are generated like this should be RW, and
// delegating the actual RO/RW state to the volume *mount*.
assertThat(v1NFSVolume.getNfs().getReadOnly()).isEqualTo(false);
// Part 2: there should be *3* volume mounts, all sharing the volume
List<V1VolumeMount> vms = pod.getSpec().getContainers().get(0).getVolumeMounts();
// 3 for nfs, one for shm
assertThat(vms.size()).isEqualTo(4);
V1VolumeMount v1NFSvm1 = vms.get(0);
assertThat(v1NFSvm1.getName()).isEqualTo("1-2-3-4-relative-vol");
assertThat(v1NFSvm1.getMountPath()).isEqualTo("/mountpoint");
assertThat(v1NFSvm1.getReadOnly()).isTrue();
V1VolumeMount v1NFSvm2 = vms.get(1);
assertThat(v1NFSvm2.getName()).isEqualTo("1-2-3-4-relative-vol");
assertThat(v1NFSvm2.getMountPath()).isEqualTo("/mountpoint2");
assertThat(v1NFSvm2.getReadOnly()).isTrue();
V1VolumeMount v1NFSvm3 = vms.get(2);
assertThat(v1NFSvm3.getName()).isEqualTo("1-2-3-4-relative-vol");
assertThat(v1NFSvm3.getMountPath()).isEqualTo("/mountpoint3");
assertThat(v1NFSvm3.getReadOnly()).isFalse();
}
use of com.netflix.titus.api.jobmanager.model.job.Container in project titus-control-plane by Netflix.
the class V1SpecPodFactoryTest method testEFSMountsGetTransformedSafely.
@Test
public void testEFSMountsGetTransformedSafely() {
Job<BatchJobExt> job = JobGenerator.oneBatchJob();
BatchJobTask task = JobGenerator.oneBatchTask();
EfsMount newEfsMount = new EfsMount("1.2.3.4", "/mountpoint", EfsMount.MountPerm.RO, "/relative/");
Container newContainer = job.getJobDescriptor().getContainer();
ContainerResources newContainerResources = newContainer.getContainerResources();
Container newContainerWithEFS = newContainer.toBuilder().withContainerResources(newContainerResources.newBuilder().withEfsMounts(Collections.singletonList(newEfsMount)).build()).build();
job = job.toBuilder().withJobDescriptor(job.getJobDescriptor().toBuilder().withContainer(newContainerWithEFS).build()).build();
when(podAffinityFactory.buildV1Affinity(job, task)).thenReturn(Pair.of(new V1Affinity(), new HashMap<>()));
V1Pod pod = podFactory.buildV1Pod(job, task);
// Part 1: the volume section needs to be well-formed
List<V1Volume> volumes = pod.getSpec().getVolumes();
// one for nfs, one for shm
assertThat(volumes.size()).isEqualTo(2);
V1Volume v1NFSVolume = volumes.get(0);
assertThat(v1NFSVolume.getName()).isEqualTo("1-2-3-4-relative--vol");
assertThat(v1NFSVolume.getNfs().getServer()).isEqualTo("1.2.3.4");
assertThat(v1NFSVolume.getNfs().getPath()).isEqualTo("/relative/");
assertThat(v1NFSVolume.getNfs().getReadOnly()).isEqualTo(false);
// Part 2: the volume mount section needs to applied to the first container in the podspec
List<V1VolumeMount> vms = pod.getSpec().getContainers().get(0).getVolumeMounts();
// one for nfs, one for shm
assertThat(vms.size()).isEqualTo(2);
V1VolumeMount v1NFSvm = vms.get(0);
assertThat(v1NFSvm.getName()).isEqualTo("1-2-3-4-relative--vol");
assertThat(v1NFSvm.getMountPath()).isEqualTo("/mountpoint");
assertThat(v1NFSvm.getReadOnly()).isEqualTo(true);
}
use of com.netflix.titus.api.jobmanager.model.job.Container in project titus-control-plane by Netflix.
the class TaskDocument method fromV3Task.
public static TaskDocument fromV3Task(Task task, Job job, SimpleDateFormat dateFormat, Map<String, String> context) {
TaskDocument taskDocument = new TaskDocument();
JobDescriptor jobDescriptor = job.getJobDescriptor();
Container container = jobDescriptor.getContainer();
Image image = container.getImage();
ContainerResources containerResources = container.getContainerResources();
JobGroupInfo jobGroupInfo = jobDescriptor.getJobGroupInfo();
taskDocument.name = jobDescriptor.getApplicationName();
taskDocument.applicationName = image.getName();
taskDocument.appName = jobDescriptor.getApplicationName();
taskDocument.user = jobDescriptor.getOwner().getTeamEmail();
taskDocument.labels = sanitizeMap(container.getAttributes());
taskDocument.version = image.getTag();
taskDocument.digest = image.getDigest();
taskDocument.entryPoint = StringExt.concatenate(container.getEntryPoint(), " ");
taskDocument.cpu = containerResources.getCpu();
taskDocument.memory = containerResources.getMemoryMB();
taskDocument.networkMbps = containerResources.getNetworkMbps();
taskDocument.disk = containerResources.getDiskMB();
taskDocument.gpu = containerResources.getGpu();
taskDocument.shm = containerResources.getShmMB();
taskDocument.allocateIpAddress = containerResources.isAllocateIP();
taskDocument.env = sanitizeMap(container.getEnv());
taskDocument.iamProfile = container.getSecurityProfile().getIamRole();
taskDocument.securityGroups = container.getSecurityProfile().getSecurityGroups();
taskDocument.softConstraints = new ArrayList<>(container.getSoftConstraints().keySet());
taskDocument.hardConstraints = new ArrayList<>(container.getHardConstraints().keySet());
taskDocument.capacityGroup = jobDescriptor.getCapacityGroup();
taskDocument.jobGroupStack = jobGroupInfo.getStack();
taskDocument.jobGroupDetail = jobGroupInfo.getDetail();
taskDocument.jobGroupSequence = jobGroupInfo.getSequence();
JobDescriptor.JobDescriptorExt jobDescriptorExt = jobDescriptor.getExtensions();
if (jobDescriptorExt instanceof BatchJobExt) {
BatchJobExt batchJobExt = (BatchJobExt) jobDescriptorExt;
taskDocument.runtimeLimitSecs = batchJobExt.getRuntimeLimitMs();
taskDocument.type = TitusJobType.batch;
taskDocument.inService = false;
taskDocument.instances = batchJobExt.getSize();
taskDocument.instancesMin = batchJobExt.getSize();
taskDocument.instancesMax = batchJobExt.getSize();
taskDocument.instancesDesired = batchJobExt.getSize();
taskDocument.retries = batchJobExt.getRetryPolicy().getRetries();
taskDocument.restartOnSuccess = false;
} else if (jobDescriptorExt instanceof ServiceJobExt) {
ServiceJobExt serviceJobExt = (ServiceJobExt) jobDescriptorExt;
taskDocument.runtimeLimitSecs = 0L;
taskDocument.type = TitusJobType.service;
taskDocument.inService = serviceJobExt.isEnabled();
Capacity capacity = serviceJobExt.getCapacity();
taskDocument.instances = capacity.getDesired();
taskDocument.instancesMin = capacity.getMin();
taskDocument.instancesMax = capacity.getMax();
taskDocument.instancesDesired = capacity.getDesired();
taskDocument.retries = serviceJobExt.getRetryPolicy().getRetries();
taskDocument.restartOnSuccess = false;
}
Map<String, String> taskContext = task.getTaskContext();
taskDocument.id = task.getId();
taskDocument.instanceId = task.getId();
taskDocument.jobId = task.getJobId();
taskDocument.state = toV2TaskState(task.getStatus()).name();
taskDocument.jobLabels = sanitizeMap(job.getJobDescriptor().getAttributes());
taskDocument.host = taskContext.get(TASK_ATTRIBUTES_AGENT_HOST);
taskDocument.tier = taskContext.getOrDefault(TASK_ATTRIBUTES_TIER, "Unknown");
taskDocument.computedFields = new ComputedFields();
final String region = taskContext.get(TASK_ATTRIBUTES_AGENT_REGION);
if (region != null) {
taskDocument.region = region;
}
final String zone = taskContext.get(TASK_ATTRIBUTES_AGENT_ZONE);
if (zone != null) {
taskDocument.zone = zone;
}
final String asg = taskContext.get(TASK_ATTRIBUTES_AGENT_ASG);
if (asg != null) {
taskDocument.asg = asg;
}
final String instanceType = taskContext.get(TASK_ATTRIBUTES_AGENT_ITYPE);
if (instanceType != null) {
taskDocument.instanceType = instanceType;
}
final String instanceId = taskContext.get(TASK_ATTRIBUTES_AGENT_INSTANCE_ID);
if (instanceId != null) {
taskDocument.hostInstanceId = instanceId;
}
final String ipAddressAllocationId = taskContext.get(TASK_ATTRIBUTES_IP_ALLOCATION_ID);
if (ipAddressAllocationId != null) {
taskDocument.ipAddressAllocationId = ipAddressAllocationId;
}
extractNetworkConfigurationData(taskContext, taskDocument);
long acceptedAt = findTaskStatus(task, TaskState.Accepted).map(ExecutableStatus::getTimestamp).orElse(0L);
long launchedAt = findTaskStatus(task, TaskState.Launched).map(ExecutableStatus::getTimestamp).orElse(0L);
long startingAt = findTaskStatus(task, TaskState.StartInitiated).map(ExecutableStatus::getTimestamp).orElse(0L);
long startedAt = findTaskStatus(task, TaskState.Started).map(ExecutableStatus::getTimestamp).orElse(0L);
long completedAt = findTaskStatus(task, TaskState.Finished).map(ExecutableStatus::getTimestamp).orElse(0L);
if (acceptedAt > 0) {
taskDocument.submittedAt = doSafeDateFormat(dateFormat, new Date(acceptedAt));
}
if (launchedAt > 0) {
taskDocument.launchedAt = doSafeDateFormat(dateFormat, new Date(launchedAt));
taskDocument.computedFields.msFromSubmittedToLaunched = launchedAt - acceptedAt;
}
if (startingAt > 0) {
taskDocument.startingAt = doSafeDateFormat(dateFormat, new Date(startingAt));
taskDocument.computedFields.msFromLaunchedToStarting = startingAt - launchedAt;
taskDocument.computedFields.msToStarting = startingAt - acceptedAt;
}
if (startedAt > 0) {
taskDocument.startedAt = doSafeDateFormat(dateFormat, new Date(startedAt));
taskDocument.computedFields.msFromStartingToStarted = startedAt - startingAt;
taskDocument.computedFields.msToStarted = startedAt - acceptedAt;
}
if (completedAt > 0) {
taskDocument.finishedAt = doSafeDateFormat(dateFormat, new Date(completedAt));
taskDocument.computedFields.msFromStartedToFinished = completedAt - startedAt;
taskDocument.computedFields.msToFinished = completedAt - acceptedAt;
}
taskDocument.message = task.getStatus().getReasonMessage();
taskDocument.titusContext = context;
return taskDocument;
}
use of com.netflix.titus.api.jobmanager.model.job.Container in project titus-control-plane by Netflix.
the class V1SpecPodFactory method createV1SchemaPodAnnotations.
Map<String, String> createV1SchemaPodAnnotations(Job<?> job, Task task) {
com.netflix.titus.api.jobmanager.model.job.JobDescriptor<?> jobDescriptor = job.getJobDescriptor();
Container container = jobDescriptor.getContainer();
Map<String, String> annotations = new HashMap<>();
annotations.put(POD_SCHEMA_VERSION, "1");
annotations.put(JOB_ID, job.getId());
annotations.put(JOB_TYPE, getJobType(job).name());
JobGroupInfo jobGroupInfo = jobDescriptor.getJobGroupInfo();
annotations.put(WORKLOAD_NAME, jobDescriptor.getApplicationName());
annotations.put(WORKLOAD_STACK, jobGroupInfo.getStack());
annotations.put(WORKLOAD_DETAIL, jobGroupInfo.getDetail());
annotations.put(WORKLOAD_SEQUENCE, jobGroupInfo.getSequence());
annotations.put(WORKLOAD_OWNER_EMAIL, jobDescriptor.getOwner().getTeamEmail());
Optional<JobStatus> jobStatus = JobFunctions.findJobStatus(job, JobState.Accepted);
if (jobStatus.isPresent()) {
String jobAcceptedTimestamp = String.valueOf(jobStatus.get().getTimestamp());
annotations.put(JOB_ACCEPTED_TIMESTAMP_MS, jobAcceptedTimestamp);
}
ContainerResources containerResources = container.getContainerResources();
String networkBandwidth = containerResources.getNetworkMbps() + "M";
annotations.put(EGRESS_BANDWIDTH, networkBandwidth);
annotations.put(INGRESS_BANDWIDTH, networkBandwidth);
SecurityProfile securityProfile = container.getSecurityProfile();
String securityGroups = StringExt.concatenate(securityProfile.getSecurityGroups(), ",");
annotations.put(NETWORK_SECURITY_GROUPS, securityGroups);
annotations.put(IAM_ROLE, securityProfile.getIamRole());
Evaluators.acceptNotNull(securityProfile.getAttributes().get(ATTRIBUTE_NETFLIX_APP_METADATA), appMetadata -> annotations.put(SECURITY_APP_METADATA, appMetadata));
Evaluators.acceptNotNull(securityProfile.getAttributes().get(ATTRIBUTE_NETFLIX_APP_METADATA_SIG), appMetadataSignature -> annotations.put(SECURITY_APP_METADATA_SIG, appMetadataSignature));
Evaluators.acceptNotNull(job.getJobDescriptor().getAttributes().get(JobAttributes.JOB_ATTRIBUTES_RUNTIME_PREDICTION_SEC), runtimeInSec -> annotations.put(KubeConstants.JOB_RUNTIME_PREDICTION, runtimeInSec + "s"));
Evaluators.acceptNotNull(task.getTaskContext().get(TaskAttributes.TASK_ATTRIBUTES_IP_ALLOCATION_ID), id -> annotations.put(KubeConstants.STATIC_IP_ALLOCATION_ID, id));
Evaluators.acceptNotNull(job.getJobDescriptor().getNetworkConfiguration().getNetworkModeName(), modeName -> annotations.put(KubeConstants.NETWORK_MODE, modeName));
// convert container attributes into annotations
container.getAttributes().forEach((k, v) -> {
if (StringExt.isEmpty(k) || StringExt.isEmpty(v) || !k.startsWith(TITUS_PARAMETER_AGENT_PREFIX)) {
return;
}
switch(k) {
case JOB_PARAMETER_ATTRIBUTES_ALLOW_CPU_BURSTING:
annotations.put(POD_CPU_BURSTING_ENABLED, v);
break;
case JOB_PARAMETER_ATTRIBUTES_ALLOW_NETWORK_BURSTING:
annotations.put(NETWORK_BURSTING_ENABLED, v);
break;
case JOB_PARAMETER_ATTRIBUTE_EIP_POOL:
annotations.put(NETWORK_ELASTIC_IP_POOL, v);
break;
case JOB_PARAMETER_ATTRIBUTE_EIPS:
annotations.put(NETWORK_ELASTIC_IPS, v);
break;
case JOB_PARAMETER_ATTRIBUTES_SCHED_BATCH:
annotations.put(POD_SCHED_POLICY, "batch");
break;
case JOB_CONTAINER_ATTRIBUTE_SUBNETS:
annotations.put(NETWORK_SUBNET_IDS, v);
break;
case JOB_CONTAINER_ATTRIBUTE_ACCOUNT_ID:
annotations.put(NETWORK_ACCOUNT_ID, v);
break;
case JOB_PARAMETER_ATTRIBUTES_HOSTNAME_STYLE:
annotations.put(POD_HOSTNAME_STYLE, v);
break;
case JOB_PARAMETER_ATTRIBUTES_ALLOW_NETWORK_JUMBO:
annotations.put(NETWORK_JUMBO_FRAMES_ENABLED, v);
break;
case JOB_PARAMETER_ATTRIBUTES_FUSE_ENABLED:
annotations.put(POD_FUSE_ENABLED, v);
break;
case JOB_PARAMETER_ATTRIBUTES_ASSIGN_IPV6_ADDRESS:
annotations.put(NETWORK_ASSIGN_IVP6_ADDRESS, v);
break;
case JOB_PARAMETER_ATTRIBUTES_LOG_UPLOAD_CHECK_INTERVAL:
annotations.put(LOG_UPLOAD_CHECK_INTERVAL, v);
break;
case JOB_PARAMETER_ATTRIBUTES_LOG_STDIO_CHECK_INTERVAL:
annotations.put(LOG_STDIO_CHECK_INTERVAL, v);
break;
case JOB_PARAMETER_ATTRIBUTES_LOG_UPLOAD_THRESHOLD_TIME:
annotations.put(LOG_UPLOAD_THRESHOLD_TIME, v);
break;
case JOB_PARAMETER_ATTRIBUTES_LOG_KEEP_LOCAL_FILE_AFTER_UPLOAD:
annotations.put(LOG_KEEP_LOCAL_FILE, v);
break;
case JOB_PARAMETER_ATTRIBUTES_LOG_UPLOAD_REGEXP:
annotations.put(LOG_UPLOAD_REGEXP, v);
break;
case JOB_CONTAINER_ATTRIBUTE_S3_PATH_PREFIX:
annotations.put(LOG_S3_PATH_PREFIX, v);
break;
case JOB_CONTAINER_ATTRIBUTE_SECCOMP_AGENT_PERF_ENABLED:
annotations.put(POD_SECCOMP_AGENT_PERF_ENABLED, v);
break;
case JOB_CONTAINER_ATTRIBUTE_SECCOMP_AGENT_NET_ENABLED:
annotations.put(POD_SECCOMP_AGENT_NET_ENABLED, v);
break;
case JOB_CONTAINER_ATTRIBUTE_IMDS_REQUIRE_TOKEN:
annotations.put(NETWORK_IMDS_REQUIRE_TOKEN, v);
break;
default:
annotations.put(k, v);
break;
}
});
appendS3WriterRole(annotations, job, task);
annotations.putAll(createEbsPodAnnotations(job, task));
annotations.putAll(PerformanceToolUtil.toAnnotations(job));
annotations.putAll(createPlatformSidecarAnnotations(job));
return annotations;
}
Aggregations