use of com.hartwig.pipeline.metadata.AddDatatype in project pipeline5 by hartwigmedical.
the class VirusAnalysis method persistedOutput.
@Override
public VirusOutput persistedOutput(final SomaticRunMetadata metadata) {
String vcf = vcf(metadata);
String summary = summary(metadata);
String annotated = annotatedVirusTsv(metadata);
return VirusOutput.builder().status(PipelineStatus.PERSISTED).maybeAnnotatedVirusFile(persistedDataset.path(metadata.tumor().sampleName(), DataType.VIRUS_INTERPRETATION).orElse(GoogleStorageLocation.of(metadata.bucket(), PersistedLocations.blobForSet(metadata.set(), namespace(), annotated)))).addDatatypes(new AddDatatype(DataType.VIRUSBREAKEND_SUMMARY, metadata.barcode(), new ArchivePath(Folder.root(), namespace(), summary))).addDatatypes(new AddDatatype(DataType.VIRUS_INTERPRETATION, metadata.barcode(), new ArchivePath(Folder.root(), namespace(), annotated))).addDatatypes(new AddDatatype(DataType.VIRUSBREAKEND_VARIANTS, metadata.barcode(), new ArchivePath(Folder.root(), namespace(), vcf))).build();
}
use of com.hartwig.pipeline.metadata.AddDatatype in project pipeline5 by hartwigmedical.
the class SageCaller method outputBuilder.
protected ImmutableSageOutput.Builder outputBuilder(final SomaticRunMetadata metadata, final PipelineStatus jobStatus, final RuntimeBucket bucket, final ResultsDirectory resultsDirectory) {
final String filteredOutputFile = sageConfiguration.filteredTemplate().apply(metadata);
final String unfilteredOutputFile = sageConfiguration.unfilteredTemplate().apply(metadata);
final String geneCoverageFile = sageConfiguration.geneCoverageTemplate().apply(metadata);
final Optional<String> somaticRefSampleBqrPlot = referenceSampleBqrPlot(metadata);
final Optional<String> somaticTumorSampleBqrPlot = tumorSampleBqrPlot(metadata);
final ImmutableSageOutput.Builder builder = SageOutput.builder(namespace()).status(jobStatus);
somaticRefSampleBqrPlot.ifPresent(s -> builder.maybeSomaticRefSampleBqrPlot(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(s))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.reference().sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.reference().sampleName())));
somaticTumorSampleBqrPlot.ifPresent(s -> builder.maybeSomaticTumorSampleBqrPlot(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(s))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.tumor().sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.tumor().sampleName())));
return builder.addFailedLogLocations(GoogleStorageLocation.of(bucket.name(), RunLogComponent.LOG_FILE)).maybeGermlineGeneCoverage(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(geneCoverageFile))).maybeSomaticTumorSampleBqrPlot(somaticTumorSampleBqrPlot.map(t -> GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(t)))).maybeVariants(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(filteredOutputFile))).addReportComponents(bqrComponent("png", bucket, resultsDirectory, metadata.sampleName())).addReportComponents(bqrComponent("tsv", bucket, resultsDirectory, metadata.sampleName())).addReportComponents(vcfComponent(unfilteredOutputFile, bucket, resultsDirectory)).addReportComponents(vcfComponent(filteredOutputFile, bucket, resultsDirectory)).addReportComponents(singleFileComponent(geneCoverageFile, bucket, resultsDirectory)).addReportComponents(new RunLogComponent(bucket, namespace(), Folder.root(), resultsDirectory)).addReportComponents(new StartupScriptComponent(bucket, namespace(), Folder.root())).addDatatypes(new AddDatatype(sageConfiguration.vcfDatatype(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), filteredOutputFile))).addDatatypes(new AddDatatype(sageConfiguration.geneCoverageDatatype(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), geneCoverageFile))).addAllDatatypes(somaticRefSampleBqrPlot.stream().map(r -> new AddDatatype(sageConfiguration.tumorSampleBqrPlot(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), r))).collect(Collectors.toList())).addAllDatatypes(somaticTumorSampleBqrPlot.stream().map(t -> new AddDatatype(sageConfiguration.refSampleBqrPlot(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), t))).collect(Collectors.toList()));
}
use of com.hartwig.pipeline.metadata.AddDatatype in project pipeline5 by hartwigmedical.
the class BwaAligner method run.
public AlignmentOutput run(final SingleSampleRunMetadata metadata) throws Exception {
StageTrace trace = new StageTrace(NAMESPACE, metadata.sampleName(), StageTrace.ExecutorType.COMPUTE_ENGINE).start();
RuntimeBucket rootBucket = RuntimeBucket.from(storage, NAMESPACE, metadata, arguments, labels);
Sample sample = sampleSource.sample(metadata);
if (sample.bam().isPresent()) {
String noPrefix = sample.bam().orElseThrow().replace("gs://", "");
int firstSlash = noPrefix.indexOf("/");
String bucket = noPrefix.substring(0, firstSlash);
String path = noPrefix.substring(firstSlash + 1);
return AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.PROVIDED).maybeAlignments(GoogleStorageLocation.of(bucket, path)).build();
}
final ResourceFiles resourceFiles = buildResourceFiles(arguments);
sampleUpload.run(sample, rootBucket);
List<Future<PipelineStatus>> futures = new ArrayList<>();
List<GoogleStorageLocation> perLaneBams = new ArrayList<>();
List<ReportComponent> laneLogComponents = new ArrayList<>();
List<GoogleStorageLocation> laneFailedLogs = new ArrayList<>();
for (Lane lane : sample.lanes()) {
RuntimeBucket laneBucket = RuntimeBucket.from(storage, laneNamespace(lane), metadata, arguments, labels);
BashStartupScript bash = BashStartupScript.of(laneBucket.name());
InputDownload first = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.firstOfPairPath())));
InputDownload second = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.secondOfPairPath())));
bash.addCommand(first).addCommand(second);
bash.addCommands(OverrideReferenceGenomeCommand.overrides(arguments));
SubStageInputOutput alignment = new LaneAlignment(arguments.sbpApiRunId().isPresent(), resourceFiles.refGenomeFile(), first.getLocalTargetPath(), second.getLocalTargetPath(), metadata.sampleName(), lane).apply(SubStageInputOutput.empty(metadata.sampleName()));
perLaneBams.add(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path(alignment.outputFile().fileName())));
bash.addCommands(alignment.bash()).addCommand(new OutputUpload(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
futures.add(executorService.submit(() -> runWithRetries(metadata, laneBucket, VirtualMachineJobDefinition.alignment(laneId(lane).toLowerCase(), bash, resultsDirectory))));
laneLogComponents.add(new RunLogComponent(laneBucket, laneNamespace(lane), Folder.from(metadata), resultsDirectory));
laneFailedLogs.add(GoogleStorageLocation.of(laneBucket.name(), RunLogComponent.LOG_FILE));
}
AlignmentOutput output;
if (lanesSuccessfullyComplete(futures)) {
List<InputDownload> laneBams = perLaneBams.stream().map(InputDownload::new).collect(Collectors.toList());
BashStartupScript mergeMarkdupsBash = BashStartupScript.of(rootBucket.name());
laneBams.forEach(mergeMarkdupsBash::addCommand);
SubStageInputOutput merged = new MergeMarkDups(laneBams.stream().map(InputDownload::getLocalTargetPath).filter(path -> path.endsWith("bam")).collect(Collectors.toList())).apply(SubStageInputOutput.empty(metadata.sampleName()));
mergeMarkdupsBash.addCommands(merged.bash());
mergeMarkdupsBash.addCommand(new OutputUpload(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
PipelineStatus status = runWithRetries(metadata, rootBucket, VirtualMachineJobDefinition.mergeMarkdups(mergeMarkdupsBash, resultsDirectory));
ImmutableAlignmentOutput.Builder outputBuilder = AlignmentOutput.builder().sample(metadata.sampleName()).status(status).maybeAlignments(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path(merged.outputFile().fileName()))).addAllReportComponents(laneLogComponents).addAllFailedLogLocations(laneFailedLogs).addFailedLogLocations(GoogleStorageLocation.of(rootBucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new RunLogComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), resultsDirectory));
if (!arguments.outputCram()) {
outputBuilder.addReportComponents(new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bam(metadata.sampleName()), bam(metadata.sampleName()), resultsDirectory), new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bai(bam(metadata.sampleName())), bai(bam(metadata.sampleName())), resultsDirectory)).addDatatypes(new AddDatatype(DataType.ALIGNED_READS, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bam(metadata.sampleName()))), new AddDatatype(DataType.ALIGNED_READS_INDEX, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bai(metadata.sampleName()))));
}
output = outputBuilder.build();
} else {
output = AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.FAILED).build();
}
trace.stop();
executorService.shutdown();
return output;
}
use of com.hartwig.pipeline.metadata.AddDatatype in project pipeline5 by hartwigmedical.
the class Gripss method output.
@Override
public GripssOutput output(final SomaticRunMetadata metadata, final PipelineStatus jobStatus, final RuntimeBucket bucket, final ResultsDirectory resultsDirectory) {
String filteredVcfFile = filteredVcf(metadata);
String unfilteredVcfFile = unfilteredVcf(metadata);
return GripssOutput.builder(namespace()).status(jobStatus).maybeFilteredVariants(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(basename(filteredVcfFile)))).maybeUnfilteredVariants(GoogleStorageLocation.of(bucket.name(), resultsDirectory.path(basename(unfilteredVcfFile)))).addFailedLogLocations(GoogleStorageLocation.of(bucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new ZippedVcfAndIndexComponent(bucket, namespace(), Folder.root(), basename(unfilteredVcfFile), basename(unfilteredVcfFile), resultsDirectory)).addReportComponents(new ZippedVcfAndIndexComponent(bucket, namespace(), Folder.root(), basename(filteredVcfFile), basename(filteredVcfFile), resultsDirectory)).addReportComponents(new RunLogComponent(bucket, namespace(), Folder.root(), resultsDirectory)).addReportComponents(new StartupScriptComponent(bucket, namespace(), Folder.root())).addDatatypes(new AddDatatype(unfilteredDatatype(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), basename(unfilteredVcfFile))), new AddDatatype(filteredDatatype(), metadata.barcode(), new ArchivePath(Folder.root(), namespace(), basename(filteredVcfFile)))).build();
}
use of com.hartwig.pipeline.metadata.AddDatatype in project pipeline5 by hartwigmedical.
the class CramConversion method output.
@Override
public CramOutput output(final SingleSampleRunMetadata metadata, final PipelineStatus jobStatus, final RuntimeBucket bucket, final ResultsDirectory resultsDirectory) {
String cram = new File(outputCram).getName();
String crai = FileTypes.crai(cram);
Folder folder = Folder.from(metadata);
return CramOutput.builder().status(jobStatus).addFailedLogLocations(GoogleStorageLocation.of(bucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new RunLogComponent(bucket, NAMESPACE, folder, resultsDirectory), new StartupScriptComponent(bucket, NAMESPACE, folder), new SingleFileComponent(bucket, NAMESPACE, folder, cram, cram, resultsDirectory), new SingleFileComponent(bucket, NAMESPACE, folder, crai, crai, resultsDirectory)).addDatatypes(new AddDatatype(DataType.ALIGNED_READS, metadata.barcode(), new ArchivePath(Folder.from(metadata), namespace(), cram)), new AddDatatype(DataType.ALIGNED_READS_INDEX, metadata.barcode(), new ArchivePath(Folder.from(metadata), namespace(), crai))).build();
}
Aggregations