use of com.hartwig.pipeline.metadata.SingleSampleRunMetadata in project pipeline5 by hartwigmedical.
the class StagedOutputPublisher method publish.
public void publish(final PipelineState state, final SomaticRunMetadata metadata) {
if (state.status() != PipelineStatus.FAILED && run.isPresent()) {
List<AddDatatype> addDatatypes = state.stageOutputs().stream().map(StageOutput::datatypes).flatMap(List::stream).collect(Collectors.toList());
SampleSet set = setResolver.resolve(metadata.set(), useOnlyDBSets);
Optional<String> tumorSampleName = metadata.maybeTumor().map(SingleSampleRunMetadata::sampleName);
Optional<String> refSampleName = metadata.maybeReference().map(SingleSampleRunMetadata::sampleName);
ImmutableAnalysis.Builder alignedReadsAnalysis = eventBuilder(Type.ALIGNMENT);
ImmutableAnalysis.Builder somaticAnalysis = eventBuilder(Type.SOMATIC);
ImmutableAnalysis.Builder germlineAnalysis = eventBuilder(Type.GERMLINE);
OutputIterator.from(blob -> {
Optional<AddDatatype> dataType = addDatatypes.stream().filter(d -> blob.getName().endsWith(d.path())).findFirst();
Blob blobWithMd5 = sourceBucket.get(blob.getName());
if (isSecondary(blobWithMd5)) {
alignedReadsAnalysis.addOutput(createBlob(tumorSampleName, refSampleName, dataType, blobWithMd5));
} else {
if (isGermline(blobWithMd5)) {
germlineAnalysis.addOutput(createBlob(tumorSampleName, refSampleName, dataType, blobWithMd5));
} else if (notSecondary(blobWithMd5)) {
somaticAnalysis.addOutput(createBlob(tumorSampleName, refSampleName, dataType, blobWithMd5));
}
}
}, sourceBucket).iterate(metadata);
publish(PipelineComplete.builder().pipeline(ImmutablePipeline.builder().sample(tumorSampleName.orElseGet(() -> refSampleName.orElseThrow())).bucket(sourceBucket.getName()).runId(run.get().getId()).setId(set.getId()).context(context).addAnalyses(alignedReadsAnalysis.build(), somaticAnalysis.build(), germlineAnalysis.build()).version(Versions.pipelineMajorMinorVersion()).build()).build());
}
}
use of com.hartwig.pipeline.metadata.SingleSampleRunMetadata in project pipeline5 by hartwigmedical.
the class BwaAligner method run.
public AlignmentOutput run(final SingleSampleRunMetadata metadata) throws Exception {
StageTrace trace = new StageTrace(NAMESPACE, metadata.sampleName(), StageTrace.ExecutorType.COMPUTE_ENGINE).start();
RuntimeBucket rootBucket = RuntimeBucket.from(storage, NAMESPACE, metadata, arguments, labels);
Sample sample = sampleSource.sample(metadata);
if (sample.bam().isPresent()) {
String noPrefix = sample.bam().orElseThrow().replace("gs://", "");
int firstSlash = noPrefix.indexOf("/");
String bucket = noPrefix.substring(0, firstSlash);
String path = noPrefix.substring(firstSlash + 1);
return AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.PROVIDED).maybeAlignments(GoogleStorageLocation.of(bucket, path)).build();
}
final ResourceFiles resourceFiles = buildResourceFiles(arguments);
sampleUpload.run(sample, rootBucket);
List<Future<PipelineStatus>> futures = new ArrayList<>();
List<GoogleStorageLocation> perLaneBams = new ArrayList<>();
List<ReportComponent> laneLogComponents = new ArrayList<>();
List<GoogleStorageLocation> laneFailedLogs = new ArrayList<>();
for (Lane lane : sample.lanes()) {
RuntimeBucket laneBucket = RuntimeBucket.from(storage, laneNamespace(lane), metadata, arguments, labels);
BashStartupScript bash = BashStartupScript.of(laneBucket.name());
InputDownload first = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.firstOfPairPath())));
InputDownload second = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.secondOfPairPath())));
bash.addCommand(first).addCommand(second);
bash.addCommands(OverrideReferenceGenomeCommand.overrides(arguments));
SubStageInputOutput alignment = new LaneAlignment(arguments.sbpApiRunId().isPresent(), resourceFiles.refGenomeFile(), first.getLocalTargetPath(), second.getLocalTargetPath(), metadata.sampleName(), lane).apply(SubStageInputOutput.empty(metadata.sampleName()));
perLaneBams.add(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path(alignment.outputFile().fileName())));
bash.addCommands(alignment.bash()).addCommand(new OutputUpload(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
futures.add(executorService.submit(() -> runWithRetries(metadata, laneBucket, VirtualMachineJobDefinition.alignment(laneId(lane).toLowerCase(), bash, resultsDirectory))));
laneLogComponents.add(new RunLogComponent(laneBucket, laneNamespace(lane), Folder.from(metadata), resultsDirectory));
laneFailedLogs.add(GoogleStorageLocation.of(laneBucket.name(), RunLogComponent.LOG_FILE));
}
AlignmentOutput output;
if (lanesSuccessfullyComplete(futures)) {
List<InputDownload> laneBams = perLaneBams.stream().map(InputDownload::new).collect(Collectors.toList());
BashStartupScript mergeMarkdupsBash = BashStartupScript.of(rootBucket.name());
laneBams.forEach(mergeMarkdupsBash::addCommand);
SubStageInputOutput merged = new MergeMarkDups(laneBams.stream().map(InputDownload::getLocalTargetPath).filter(path -> path.endsWith("bam")).collect(Collectors.toList())).apply(SubStageInputOutput.empty(metadata.sampleName()));
mergeMarkdupsBash.addCommands(merged.bash());
mergeMarkdupsBash.addCommand(new OutputUpload(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
PipelineStatus status = runWithRetries(metadata, rootBucket, VirtualMachineJobDefinition.mergeMarkdups(mergeMarkdupsBash, resultsDirectory));
ImmutableAlignmentOutput.Builder outputBuilder = AlignmentOutput.builder().sample(metadata.sampleName()).status(status).maybeAlignments(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path(merged.outputFile().fileName()))).addAllReportComponents(laneLogComponents).addAllFailedLogLocations(laneFailedLogs).addFailedLogLocations(GoogleStorageLocation.of(rootBucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new RunLogComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), resultsDirectory));
if (!arguments.outputCram()) {
outputBuilder.addReportComponents(new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bam(metadata.sampleName()), bam(metadata.sampleName()), resultsDirectory), new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bai(bam(metadata.sampleName())), bai(bam(metadata.sampleName())), resultsDirectory)).addDatatypes(new AddDatatype(DataType.ALIGNED_READS, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bam(metadata.sampleName()))), new AddDatatype(DataType.ALIGNED_READS_INDEX, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bai(metadata.sampleName()))));
}
output = outputBuilder.build();
} else {
output = AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.FAILED).build();
}
trace.stop();
executorService.shutdown();
return output;
}
use of com.hartwig.pipeline.metadata.SingleSampleRunMetadata in project pipeline5 by hartwigmedical.
the class PipelineMain method start.
public PipelineState start(final Arguments arguments) {
LOGGER.info("Arguments are [{}]", arguments);
Versions.printAll();
try {
GoogleCredentials credentials = CredentialProvider.from(arguments).get();
Storage storage = StorageProvider.from(arguments, credentials).get();
Publisher turquoisePublisher = PublisherProvider.from(arguments, credentials).get("turquoise.events");
Publisher pipelinePublisher = PublisherProvider.from(arguments, credentials).get(PipelineComplete.TOPIC);
SomaticMetadataApi somaticMetadataApi = SomaticMetadataApiProvider.from(arguments, storage, pipelinePublisher).get();
SingleSampleEventListener referenceEventListener = new SingleSampleEventListener();
SingleSampleEventListener tumorEventListener = new SingleSampleEventListener();
SomaticRunMetadata somaticRunMetadata = somaticMetadataApi.get();
InputMode mode = new ModeResolver().apply(somaticRunMetadata);
LOGGER.info("Starting pipeline in [{}] mode", mode);
String ini = somaticRunMetadata.isSingleSample() ? "single_sample" : arguments.shallow() ? "shallow" : "somatic";
PipelineProperties eventSubjects = PipelineProperties.builder().sample(somaticRunMetadata.maybeTumor().map(SingleSampleRunMetadata::sampleName).orElseGet(() -> somaticRunMetadata.reference().sampleName())).runId(arguments.sbpApiRunId()).set(somaticRunMetadata.set()).referenceBarcode(somaticRunMetadata.maybeReference().map(SingleSampleRunMetadata::barcode)).tumorBarcode(somaticRunMetadata.maybeTumor().map(SingleSampleRunMetadata::barcode)).type(ini).build();
somaticMetadataApi.start();
startedEvent(eventSubjects, turquoisePublisher, arguments.publishToTurquoise());
BlockingQueue<BamMetricsOutput> referenceBamMetricsOutputQueue = new ArrayBlockingQueue<>(1);
BlockingQueue<BamMetricsOutput> tumorBamMetricsOutputQueue = new ArrayBlockingQueue<>(1);
BlockingQueue<FlagstatOutput> referenceFlagstatOutputQueue = new ArrayBlockingQueue<>(1);
BlockingQueue<FlagstatOutput> tumorFlagstatOutputQueue = new ArrayBlockingQueue<>(1);
BlockingQueue<GermlineCallerOutput> germlineCallerOutputQueue = new ArrayBlockingQueue<>(1);
StartingPoint startingPoint = new StartingPoint(arguments);
PersistedDataset persistedDataset = arguments.biopsy().<PersistedDataset>map(b -> new ApiPersistedDataset(SbpRestApi.newInstance(arguments.sbpApiUrl()), ObjectMappers.get(), b, arguments.project())).orElse(new NoopPersistedDataset());
PipelineState state = new FullPipeline(singleSamplePipeline(arguments, credentials, storage, referenceEventListener, somaticRunMetadata, referenceBamMetricsOutputQueue, germlineCallerOutputQueue, referenceFlagstatOutputQueue, startingPoint, persistedDataset, mode), singleSamplePipeline(arguments, credentials, storage, tumorEventListener, somaticRunMetadata, tumorBamMetricsOutputQueue, germlineCallerOutputQueue, tumorFlagstatOutputQueue, startingPoint, persistedDataset, mode), somaticPipeline(arguments, credentials, storage, somaticRunMetadata, referenceBamMetricsOutputQueue, tumorBamMetricsOutputQueue, referenceFlagstatOutputQueue, tumorFlagstatOutputQueue, startingPoint, persistedDataset, mode), Executors.newCachedThreadPool(), referenceEventListener, tumorEventListener, somaticMetadataApi, CleanupProvider.from(arguments, storage).get()).run();
completedEvent(eventSubjects, turquoisePublisher, state.status().toString(), arguments.publishToTurquoise());
VmExecutionLogSummary.ofFailedStages(storage, state);
return state;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of com.hartwig.pipeline.metadata.SingleSampleRunMetadata in project pipeline5 by hartwigmedical.
the class TestInputs method defaultSomaticRunMetadata.
public static SomaticRunMetadata defaultSomaticRunMetadata() {
final SingleSampleRunMetadata tumor = tumorRunMetadata();
final SingleSampleRunMetadata reference = referenceRunMetadata();
return SomaticRunMetadata.builder().set(SET).maybeTumor(tumor).maybeReference(reference).bucket(BUCKET).build();
}
Aggregations