use of com.hartwig.pipeline.storage.RuntimeBucket in project pipeline5 by hartwigmedical.
the class StageRunner method run.
public <T extends StageOutput> T run(final M metadata, final Stage<T, M> stage) {
final List<BashCommand> commands = commands(mode, metadata, stage);
if (stage.shouldRun(arguments) && !commands.isEmpty()) {
if (!startingPoint.usePersisted(stage.namespace())) {
StageTrace trace = new StageTrace(stage.namespace(), metadata.name(), StageTrace.ExecutorType.COMPUTE_ENGINE);
RuntimeBucket bucket = RuntimeBucket.from(storage, stage.namespace(), metadata, arguments, labels);
BashStartupScript bash = BashStartupScript.of(bucket.name());
bash.addCommands(stage.inputs()).addCommands(OverrideReferenceGenomeCommand.overrides(arguments)).addCommands(commands).addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
PipelineStatus status = Failsafe.with(DefaultBackoffPolicy.of(String.format("[%s] stage [%s]", metadata.name(), stage.namespace()))).get(() -> computeEngine.submit(bucket, stage.vmDefinition(bash, resultsDirectory)));
trace.stop();
return stage.output(metadata, status, bucket, resultsDirectory);
}
return stage.persistedOutput(metadata);
}
return stage.skippedOutput(metadata);
}
use of com.hartwig.pipeline.storage.RuntimeBucket in project pipeline5 by hartwigmedical.
the class SageCaller method outputBuilder.
protected ImmutableSageOutput.Builder outputBuilder(final SomaticRunMetadata metadata, final PipelineStatus jobStatus, final RuntimeBucket bucket, final ResultsDirectory resultsDirectory) {
final String filteredOutputFile = sageConfiguration.filteredTemplate().apply(metadata);
final String unfilteredOutputFile = sageConfiguration.unfilteredTemplate().apply(metadata);
final String geneCoverageFile = sageConfiguration.geneCoverageTemplate().apply(metadata);
final Optional<String> somaticRefSampleBqrPlot = referenceSampleBqrPlot(metadata);
final Optional<String> somaticTumorSampleBqrPlot = tumorSampleBqrPlot(metadata);
final ImmutableSageOutput.Builder builder = SageOutput.builder(namespace()).status(jobStatus);
somaticRefSampleBqrPlot.ifPresent(s -> builder.maybeSomaticRefSampleBqrPlot(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(s))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.reference().sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.reference().sampleName())));
somaticTumorSampleBqrPlot.ifPresent(s -> builder.maybeSomaticTumorSampleBqrPlot(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(s))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.tumor().sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.tumor().sampleName())));
return builder.addFailedLogLocations(GoogleStorageLocation.of(bucket.name(), RunLogComponent.LOG_FILE)).maybeGermlineGeneCoverage(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(geneCoverageFile))).maybeSomaticTumorSampleBqrPlot(somaticTumorSampleBqrPlot.map(t -> GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(t)))).maybeVariants(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(filteredOutputFile))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.sampleName())).addReportComponents(vcfComponent(unfilteredOutputFile, bucket, resultsDirectory)).addReportComponents(vcfComponent(filteredOutputFile, bucket, resultsDirectory)).addReportComponents(singleFileComponent(geneCoverageFile, bucket, resultsDirectory)).addReportComponents(new RunLogComponent(bucket, namespace(), Folder.root(), resultsDirectory)).addReportComponents(new StartupScriptComponent(bucket, namespace(), Folder.root())).addDatatypes(new AddDatatype(sageConfiguration.vcfDatatype(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), filteredOutputFile))).addDatatypes(new AddDatatype(sageConfiguration.geneCoverageDatatype(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), geneCoverageFile))).addAllDatatypes(somaticRefSampleBqrPlot.stream().map(r -> new AddDatatype(sageConfiguration.tumorSampleBqrPlot(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), r))).collect(Collectors.toList())).addAllDatatypes(somaticTumorSampleBqrPlot.stream().map(t -> new AddDatatype(sageConfiguration.refSampleBqrPlot(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), t))).collect(Collectors.toList()));
}
use of com.hartwig.pipeline.storage.RuntimeBucket in project pipeline5 by hartwigmedical.
the class BwaAligner method run.
public AlignmentOutput run(final SingleSampleRunMetadata metadata) throws Exception {
StageTrace trace = new StageTrace(NAMESPACE, metadata.sampleName(), StageTrace.ExecutorType.COMPUTE_ENGINE).start();
RuntimeBucket rootBucket = RuntimeBucket.from(storage, NAMESPACE, metadata, arguments, labels);
Sample sample = sampleSource.sample(metadata);
if (sample.bam().isPresent()) {
String noPrefix = sample.bam().orElseThrow().replace("gs://", "");
int firstSlash = noPrefix.indexOf("/");
String bucket = noPrefix.substring(0, firstSlash);
String path = noPrefix.substring(firstSlash + 1);
return AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.PROVIDED).maybeAlignments(GoogleStorageLocation.of(bucket, path)).build();
}
final ResourceFiles resourceFiles = buildResourceFiles(arguments);
sampleUpload.run(sample, rootBucket);
List<Future<PipelineStatus>> futures = new ArrayList<>();
List<GoogleStorageLocation> perLaneBams = new ArrayList<>();
List<ReportComponent> laneLogComponents = new ArrayList<>();
List<GoogleStorageLocation> laneFailedLogs = new ArrayList<>();
for (Lane lane : sample.lanes()) {
RuntimeBucket laneBucket = RuntimeBucket.from(storage, laneNamespace(lane), metadata, arguments, labels);
BashStartupScript bash = BashStartupScript.of(laneBucket.name());
InputDownload first = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.firstOfPairPath())));
InputDownload second = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.secondOfPairPath())));
bash.addCommand(first).addCommand(second);
bash.addCommands(OverrideReferenceGenomeCommand.overrides(arguments));
SubStageInputOutput alignment = new LaneAlignment(arguments.sbpApiRunId().isPresent(), resourceFiles.refGenomeFile(), first.getLocalTargetPath(), second.getLocalTargetPath(), metadata.sampleName(), lane).apply(SubStageInputOutput.empty(metadata.sampleName()));
perLaneBams.add(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path(alignment.outputFile().fileName())));
bash.addCommands(alignment.bash()).addCommand(new OutputUpload(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
futures.add(executorService.submit(() -> runWithRetries(metadata, laneBucket, VirtualMachineJobDefinition.alignment(laneId(lane).toLowerCase(), bash, resultsDirectory))));
laneLogComponents.add(new RunLogComponent(laneBucket, laneNamespace(lane), Folder.from(metadata), resultsDirectory));
laneFailedLogs.add(GoogleStorageLocation.of(laneBucket.name(), RunLogComponent.LOG_FILE));
}
AlignmentOutput output;
if (lanesSuccessfullyComplete(futures)) {
List<InputDownload> laneBams = perLaneBams.stream().map(InputDownload::new).collect(Collectors.toList());
BashStartupScript mergeMarkdupsBash = BashStartupScript.of(rootBucket.name());
laneBams.forEach(mergeMarkdupsBash::addCommand);
SubStageInputOutput merged = new MergeMarkDups(laneBams.stream().map(InputDownload::getLocalTargetPath).filter(path -> path.endsWith("bam")).collect(Collectors.toList())).apply(SubStageInputOutput.empty(metadata.sampleName()));
mergeMarkdupsBash.addCommands(merged.bash());
mergeMarkdupsBash.addCommand(new OutputUpload(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
PipelineStatus status = runWithRetries(metadata, rootBucket, VirtualMachineJobDefinition.mergeMarkdups(mergeMarkdupsBash, resultsDirectory));
ImmutableAlignmentOutput.Builder outputBuilder = AlignmentOutput.builder().sample(metadata.sampleName()).status(status).maybeAlignments(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path(merged.outputFile().fileName()))).addAllReportComponents(laneLogComponents).addAllFailedLogLocations(laneFailedLogs).addFailedLogLocations(GoogleStorageLocation.of(rootBucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new RunLogComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), resultsDirectory));
if (!arguments.outputCram()) {
outputBuilder.addReportComponents(new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bam(metadata.sampleName()), bam(metadata.sampleName()), resultsDirectory), new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bai(bam(metadata.sampleName())), bai(bam(metadata.sampleName())), resultsDirectory)).addDatatypes(new AddDatatype(DataType.ALIGNED_READS, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bam(metadata.sampleName()))), new AddDatatype(DataType.ALIGNED_READS_INDEX, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bai(metadata.sampleName()))));
}
output = outputBuilder.build();
} else {
output = AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.FAILED).build();
}
trace.stop();
executorService.shutdown();
return output;
}
use of com.hartwig.pipeline.storage.RuntimeBucket in project pipeline5 by hartwigmedical.
the class GoogleStorageSampleSource method sample.
@Override
public Sample sample(final SingleSampleRunMetadata metadata) {
RuntimeBucket runtimeBucket = RuntimeBucket.from(storage, Aligner.NAMESPACE, metadata, arguments, labels);
Iterable<Blob> blobs = runtimeBucket.list("samples/");
if (Iterables.isEmpty(blobs)) {
throw new IllegalArgumentException(String.format("No sample data found in bucket [%s] so there is no input to process. " + "You cannot use the upload=false flag if no sample has already been uploaded", runtimeBucket.name()));
}
String sampleDirectory = "/samples/" + metadata.barcode();
String sampleNameWithPostfix = metadata.barcode();
List<Lane> lanes = FastqFiles.toLanes(StreamSupport.stream(blobs.spliterator(), false).map(Blob::getName).map(File::new).map(File::getName).collect(Collectors.toList()), sampleDirectory, sampleNameWithPostfix);
return Sample.builder(sampleNameWithPostfix).addAllLanes(lanes).build();
}
use of com.hartwig.pipeline.storage.RuntimeBucket in project pipeline5 by hartwigmedical.
the class StartupScriptComponentTest method copiesStartupScriptIntoReportBucket.
@Test
public void copiesStartupScriptIntoReportBucket() {
Storage storage = mock(Storage.class);
RuntimeBucket runtimeBucket = MockRuntimeBucket.test().getRuntimeBucket();
Bucket reportBucket = mock(Bucket.class);
when(reportBucket.getName()).thenReturn(REPORT_BUCKET);
ArgumentCaptor<String> sourceBlobCaptor = ArgumentCaptor.forClass(String.class);
ArgumentCaptor<String> targetBucketCaptor = ArgumentCaptor.forClass(String.class);
ArgumentCaptor<String> targetBlobCaptor = ArgumentCaptor.forClass(String.class);
StartupScriptComponent victim = new StartupScriptComponent(runtimeBucket, "test", Folder.from(TestInputs.referenceRunMetadata()));
victim.addToReport(storage, reportBucket, "test_set");
verify(runtimeBucket, times(1)).copyOutOf(sourceBlobCaptor.capture(), targetBucketCaptor.capture(), targetBlobCaptor.capture());
assertThat(sourceBlobCaptor.getValue()).isEqualTo("copy_of_startup_script_for_run.sh");
assertThat(targetBucketCaptor.getValue()).isEqualTo(REPORT_BUCKET);
assertThat(targetBlobCaptor.getValue()).isEqualTo("test_set/reference/test/run.sh");
}
Aggregations