use of com.hartwig.pipeline.execution.PipelineStatus in project pipeline5 by hartwigmedical.
the class StageRunner method run.
public <T extends StageOutput> T run(final M metadata, final Stage<T, M> stage) {
final List<BashCommand> commands = commands(mode, metadata, stage);
if (stage.shouldRun(arguments) && !commands.isEmpty()) {
if (!startingPoint.usePersisted(stage.namespace())) {
StageTrace trace = new StageTrace(stage.namespace(), metadata.name(), StageTrace.ExecutorType.COMPUTE_ENGINE);
RuntimeBucket bucket = RuntimeBucket.from(storage, stage.namespace(), metadata, arguments, labels);
BashStartupScript bash = BashStartupScript.of(bucket.name());
bash.addCommands(stage.inputs()).addCommands(OverrideReferenceGenomeCommand.overrides(arguments)).addCommands(commands).addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
PipelineStatus status = Failsafe.with(DefaultBackoffPolicy.of(String.format("[%s] stage [%s]", metadata.name(), stage.namespace()))).get(() -> computeEngine.submit(bucket, stage.vmDefinition(bash, resultsDirectory)));
trace.stop();
return stage.output(metadata, status, bucket, resultsDirectory);
}
return stage.persistedOutput(metadata);
}
return stage.skippedOutput(metadata);
}
use of com.hartwig.pipeline.execution.PipelineStatus in project pipeline5 by hartwigmedical.
the class SageCaller method outputBuilder.
protected ImmutableSageOutput.Builder outputBuilder(final SomaticRunMetadata metadata, final PipelineStatus jobStatus, final RuntimeBucket bucket, final ResultsDirectory resultsDirectory) {
final String filteredOutputFile = sageConfiguration.filteredTemplate().apply(metadata);
final String unfilteredOutputFile = sageConfiguration.unfilteredTemplate().apply(metadata);
final String geneCoverageFile = sageConfiguration.geneCoverageTemplate().apply(metadata);
final Optional<String> somaticRefSampleBqrPlot = referenceSampleBqrPlot(metadata);
final Optional<String> somaticTumorSampleBqrPlot = tumorSampleBqrPlot(metadata);
final ImmutableSageOutput.Builder builder = SageOutput.builder(namespace()).status(jobStatus);
somaticRefSampleBqrPlot.ifPresent(s -> builder.maybeSomaticRefSampleBqrPlot(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(s))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.reference().sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.reference().sampleName())));
somaticTumorSampleBqrPlot.ifPresent(s -> builder.maybeSomaticTumorSampleBqrPlot(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(s))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.tumor().sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.tumor().sampleName())));
return builder.addFailedLogLocations(GoogleStorageLocation.of(bucket.name(), RunLogComponent.LOG_FILE)).maybeGermlineGeneCoverage(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(geneCoverageFile))).maybeSomaticTumorSampleBqrPlot(somaticTumorSampleBqrPlot.map(t -> GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(t)))).maybeVariants(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(filteredOutputFile))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.sampleName())).addReportComponents(vcfComponent(unfilteredOutputFile, bucket, resultsDirectory)).addReportComponents(vcfComponent(filteredOutputFile, bucket, resultsDirectory)).addReportComponents(singleFileComponent(geneCoverageFile, bucket, resultsDirectory)).addReportComponents(new RunLogComponent(bucket, namespace(), Folder.root(), resultsDirectory)).addReportComponents(new StartupScriptComponent(bucket, namespace(), Folder.root())).addDatatypes(new AddDatatype(sageConfiguration.vcfDatatype(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), filteredOutputFile))).addDatatypes(new AddDatatype(sageConfiguration.geneCoverageDatatype(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), geneCoverageFile))).addAllDatatypes(somaticRefSampleBqrPlot.stream().map(r -> new AddDatatype(sageConfiguration.tumorSampleBqrPlot(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), r))).collect(Collectors.toList())).addAllDatatypes(somaticTumorSampleBqrPlot.stream().map(t -> new AddDatatype(sageConfiguration.refSampleBqrPlot(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), t))).collect(Collectors.toList()));
}
use of com.hartwig.pipeline.execution.PipelineStatus in project pipeline5 by hartwigmedical.
the class BwaAligner method run.
public AlignmentOutput run(final SingleSampleRunMetadata metadata) throws Exception {
StageTrace trace = new StageTrace(NAMESPACE, metadata.sampleName(), StageTrace.ExecutorType.COMPUTE_ENGINE).start();
RuntimeBucket rootBucket = RuntimeBucket.from(storage, NAMESPACE, metadata, arguments, labels);
Sample sample = sampleSource.sample(metadata);
if (sample.bam().isPresent()) {
String noPrefix = sample.bam().orElseThrow().replace("gs://", "");
int firstSlash = noPrefix.indexOf("/");
String bucket = noPrefix.substring(0, firstSlash);
String path = noPrefix.substring(firstSlash + 1);
return AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.PROVIDED).maybeAlignments(GoogleStorageLocation.of(bucket, path)).build();
}
final ResourceFiles resourceFiles = buildResourceFiles(arguments);
sampleUpload.run(sample, rootBucket);
List<Future<PipelineStatus>> futures = new ArrayList<>();
List<GoogleStorageLocation> perLaneBams = new ArrayList<>();
List<ReportComponent> laneLogComponents = new ArrayList<>();
List<GoogleStorageLocation> laneFailedLogs = new ArrayList<>();
for (Lane lane : sample.lanes()) {
RuntimeBucket laneBucket = RuntimeBucket.from(storage, laneNamespace(lane), metadata, arguments, labels);
BashStartupScript bash = BashStartupScript.of(laneBucket.name());
InputDownload first = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.firstOfPairPath())));
InputDownload second = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.secondOfPairPath())));
bash.addCommand(first).addCommand(second);
bash.addCommands(OverrideReferenceGenomeCommand.overrides(arguments));
SubStageInputOutput alignment = new LaneAlignment(arguments.sbpApiRunId().isPresent(), resourceFiles.refGenomeFile(), first.getLocalTargetPath(), second.getLocalTargetPath(), metadata.sampleName(), lane).apply(SubStageInputOutput.empty(metadata.sampleName()));
perLaneBams.add(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path(alignment.outputFile().fileName())));
bash.addCommands(alignment.bash()).addCommand(new OutputUpload(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
futures.add(executorService.submit(() -> runWithRetries(metadata, laneBucket, VirtualMachineJobDefinition.alignment(laneId(lane).toLowerCase(), bash, resultsDirectory))));
laneLogComponents.add(new RunLogComponent(laneBucket, laneNamespace(lane), Folder.from(metadata), resultsDirectory));
laneFailedLogs.add(GoogleStorageLocation.of(laneBucket.name(), RunLogComponent.LOG_FILE));
}
AlignmentOutput output;
if (lanesSuccessfullyComplete(futures)) {
List<InputDownload> laneBams = perLaneBams.stream().map(InputDownload::new).collect(Collectors.toList());
BashStartupScript mergeMarkdupsBash = BashStartupScript.of(rootBucket.name());
laneBams.forEach(mergeMarkdupsBash::addCommand);
SubStageInputOutput merged = new MergeMarkDups(laneBams.stream().map(InputDownload::getLocalTargetPath).filter(path -> path.endsWith("bam")).collect(Collectors.toList())).apply(SubStageInputOutput.empty(metadata.sampleName()));
mergeMarkdupsBash.addCommands(merged.bash());
mergeMarkdupsBash.addCommand(new OutputUpload(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
PipelineStatus status = runWithRetries(metadata, rootBucket, VirtualMachineJobDefinition.mergeMarkdups(mergeMarkdupsBash, resultsDirectory));
ImmutableAlignmentOutput.Builder outputBuilder = AlignmentOutput.builder().sample(metadata.sampleName()).status(status).maybeAlignments(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path(merged.outputFile().fileName()))).addAllReportComponents(laneLogComponents).addAllFailedLogLocations(laneFailedLogs).addFailedLogLocations(GoogleStorageLocation.of(rootBucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new RunLogComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), resultsDirectory));
if (!arguments.outputCram()) {
outputBuilder.addReportComponents(new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bam(metadata.sampleName()), bam(metadata.sampleName()), resultsDirectory), new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bai(bam(metadata.sampleName())), bai(bam(metadata.sampleName())), resultsDirectory)).addDatatypes(new AddDatatype(DataType.ALIGNED_READS, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bam(metadata.sampleName()))), new AddDatatype(DataType.ALIGNED_READS_INDEX, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bai(metadata.sampleName()))));
}
output = outputBuilder.build();
} else {
output = AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.FAILED).build();
}
trace.stop();
executorService.shutdown();
return output;
}
use of com.hartwig.pipeline.execution.PipelineStatus in project pipeline5 by hartwigmedical.
the class QuotaConstrainedComputeEngineTest method ensuresVmsDontExceedMaxCPUForRegion.
@Test
public void ensuresVmsDontExceedMaxCPUForRegion() throws Exception {
ComputeEngine decorated = mock(ComputeEngine.class);
ServiceUsage serviceUsage = mock(ServiceUsage.class);
ServiceUsage.Services services = mock(ServiceUsage.Services.class);
ServiceUsage.Services.ConsumerQuotaMetrics consumerQuotaMetrics = mock(ServiceUsage.Services.ConsumerQuotaMetrics.class);
ServiceUsage.Services.ConsumerQuotaMetrics.Limits limits = mock(ServiceUsage.Services.ConsumerQuotaMetrics.Limits.class);
ServiceUsage.Services.ConsumerQuotaMetrics.Limits.Get limitsGet = mock(ServiceUsage.Services.ConsumerQuotaMetrics.Limits.Get.class);
when(serviceUsage.services()).thenReturn(services);
when(services.consumerQuotaMetrics()).thenReturn(consumerQuotaMetrics);
when(consumerQuotaMetrics.limits()).thenReturn(limits);
ArgumentCaptor<String> quotaName = ArgumentCaptor.forClass(String.class);
when(limits.get(quotaName.capture())).thenReturn(limitsGet);
ConsumerQuotaLimit limit = new ConsumerQuotaLimit().setQuotaBuckets(List.of(new QuotaBucket().setEffectiveLimit(10L).setDimensions(Map.of(REGION, REGION))));
when(limitsGet.execute()).thenReturn(limit);
VirtualMachineJobDefinition jobDefinition = VirtualMachineJobDefinition.builder().name("test").namespacedResults(ResultsDirectory.defaultDirectory()).startupCommand(BashStartupScript.of("empty")).performanceProfile(VirtualMachinePerformanceProfile.custom(10, 10)).build();
ArgumentCaptor<VirtualMachineJobDefinition> constrained = ArgumentCaptor.forClass(VirtualMachineJobDefinition.class);
when(decorated.submit(any(), constrained.capture(), any())).thenReturn(PipelineStatus.SUCCESS);
QuotaConstrainedComputeEngine victim = new QuotaConstrainedComputeEngine(decorated, serviceUsage, REGION, PROJECT, 0.6);
PipelineStatus result = victim.submit(MockRuntimeBucket.test().getRuntimeBucket(), jobDefinition);
assertThat(result).isEqualTo(PipelineStatus.SUCCESS);
MachineType machineType = constrained.getValue().performanceProfile().machineType();
assertThat(machineType.cpus()).isEqualTo(6);
assertThat(machineType.memoryGB()).isEqualTo(6);
}
use of com.hartwig.pipeline.execution.PipelineStatus in project pipeline5 by hartwigmedical.
the class GoogleComputeEngine method submit.
public PipelineStatus submit(final RuntimeBucket bucket, final VirtualMachineJobDefinition jobDefinition, final String discriminator) {
String vmName = format("%s%s-%s", bucket.runId(), discriminator.isEmpty() ? "" : "-" + discriminator, jobDefinition.name());
RuntimeFiles flags = RuntimeFiles.of(discriminator);
PipelineStatus status = PipelineStatus.FAILED;
try {
BucketCompletionWatcher.State currentState = bucketWatcher.currentState(bucket, flags);
if (currentState == BucketCompletionWatcher.State.SUCCESS) {
LOGGER.info("Compute engine job [{}] already exists, and succeeded. Skipping job.", vmName);
return PipelineStatus.SKIPPED;
} else if (currentState == BucketCompletionWatcher.State.FAILURE) {
LOGGER.info("Compute engine job [{}] already exists, but failed. Deleting state and restarting.", vmName);
bucket.delete(flags.failure());
bucket.delete(jobDefinition.namespacedResults().path());
}
String project = arguments.project();
List<Zone> zones = fetchZones();
zoneRandomizer.accept(zones);
int index = 0;
boolean keepTrying = !zones.isEmpty();
while (keepTrying) {
Zone currentZone = zones.get(index % zones.size());
Instance instance = lifecycleManager.newInstance();
instance.setName(vmName);
instance.setZone(currentZone.getName());
instance.setTags(new Tags().setItems(arguments.tags()));
if (arguments.usePreemptibleVms()) {
instance.setScheduling(new Scheduling().setPreemptible(true));
}
instance.setMachineType(machineType(currentZone.getName(), jobDefinition.performanceProfile().uri(), project));
final Map<String, String> labelMap = labels.asMap(List.of(Map.entry("job_name", jobDefinition.name())));
instance.setLabels(labelMap);
addServiceAccount(instance);
Image image = attachDisks(compute, instance, jobDefinition, project, vmName, currentZone.getName(), arguments.imageName().isPresent() ? compute.images().get(arguments.imageProject().orElse(VirtualMachineJobDefinition.HMF_IMAGE_PROJECT), arguments.imageName().get()).execute() : resolveLatestImage(compute, jobDefinition.imageFamily(), arguments.imageProject().orElse(project)), labelMap);
LOGGER.info("Submitting compute engine job [{}] using image [{}] in zone [{}]", vmName, image.getName(), currentZone.getName());
String startupScript = arguments.useLocalSsds() ? jobDefinition.startupCommand().asUnixString(new LocalSsdStorageStrategy(jobDefinition.localSsdCount())) : jobDefinition.startupCommand().asUnixString(new PersistentStorageStrategy());
addStartupCommand(instance, bucket, flags, startupScript);
addNetworkInterface(instance, project);
Operation result = lifecycleManager.deleteOldInstancesAndStart(instance, currentZone.getName(), vmName);
if (result.getError() == null) {
LOGGER.debug("Successfully initialised [{}]", vmName);
status = waitForCompletion(bucket, flags, currentZone, instance);
if (status != PipelineStatus.PREEMPTED) {
if (arguments.useLocalSsds()) {
// Instances with local SSDs cannot be stopped or restarted
lifecycleManager.delete(currentZone.getName(), vmName);
} else {
lifecycleManager.stop(currentZone.getName(), vmName);
if (status == PipelineStatus.SUCCESS) {
lifecycleManager.delete(currentZone.getName(), vmName);
} else {
lifecycleManager.disableStartupScript(currentZone.getName(), instance.getName());
}
}
LOGGER.info("Compute engine job [{}] is complete with status [{}]", vmName, status);
keepTrying = false;
} else {
LOGGER.info("Instance [{}] in [{}] was pre-empted", vmName, currentZone.getName());
}
} else if (anyErrorMatch(result, ZONE_EXHAUSTED_ERROR_CODE)) {
LOGGER.warn("Zone [{}] has insufficient resources to fulfill the request for [{}]. Trying next zone", currentZone.getName(), vmName);
} else if (anyErrorMatch(result, UNSUPPORTED_OPERATION_ERROR_CODE)) {
LOGGER.warn("Received unsupported operation from GCE for [{}], this likely means the instance was pre-empted before it could " + "start, or another operation has yet to complete. Trying next zone.", vmName);
} else if (anyErrorMatch(result, QUOTA_EXCEEDED)) {
throw new RuntimeException(String.format("Quota exceeded for [%s], will keep trying until resources are available. Quota [%s]", vmName, result.getError().getErrors().get(0).getMessage()));
} else {
throw new RuntimeException(result.getError().toPrettyString());
}
index++;
}
} catch (IOException e) {
String message = format("An error occurred running job on compute engine [%s]", vmName);
LOGGER.error(message, e);
return PipelineStatus.FAILED;
}
return status;
}
Aggregations