use of com.hartwig.pipeline.stages.SubStageInputOutput in project pipeline5 by hartwigmedical.
the class GermlineCaller method commands.
@Override
public List<BashCommand> commands(final SingleSampleRunMetadata metadata) {
String referenceFasta = resourceFiles.refGenomeFile();
SubStageInputOutput callerOutput = new GatkGermlineCaller(bamDownload.getLocalTargetPath(), referenceFasta).andThen(new GenotypeGVCFs(referenceFasta)).apply(SubStageInputOutput.empty(metadata.sampleName()));
SubStageInputOutput snpFilterOutput = new SelectVariants("snp", Lists.newArrayList("SNP", "NO_VARIATION"), referenceFasta).andThen(new VariantFiltration("snp", SNP_FILTER_EXPRESSION, referenceFasta)).apply(callerOutput);
SubStageInputOutput indelFilterOutput = new SelectVariants("indels", Lists.newArrayList("INDEL", "MIXED"), referenceFasta).andThen(new VariantFiltration("indels", INDEL_FILTER_EXPRESSION, referenceFasta)).apply(SubStageInputOutput.of(metadata.sampleName(), callerOutput.outputFile(), Collections.emptyList()));
SubStageInputOutput combinedFilters = snpFilterOutput.combine(indelFilterOutput);
SubStageInputOutput finalOutput = new CombineFilteredVariants(indelFilterOutput.outputFile().path(), referenceFasta).andThen(new GermlineZipIndex()).apply(combinedFilters);
return ImmutableList.<BashCommand>builder().addAll(finalOutput.bash()).add(new MvCommand(finalOutput.outputFile().path(), outputFile.path())).add(new MvCommand(finalOutput.outputFile().path() + ".tbi", outputFile.path() + ".tbi")).build();
}
use of com.hartwig.pipeline.stages.SubStageInputOutput in project pipeline5 by hartwigmedical.
the class GridssRerun method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final String referenceSampleName = inputs.get("reference_sample").inputValue();
final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
final InputFileDescriptor runData = inputs.get();
final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), tumorSampleName);
InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
final String localTumorFile = localFilename(remoteTumorFile);
final String localReferenceFile = localFilename(remoteReferenceFile);
final String tumorBamPath = localTumorFile.replace("cram", "bam");
final String refBamPath = localReferenceFile.replace("cram", "bam");
Driver driver = new Driver(resourceFiles, VmDirectories.outputFile(tumorSampleName + ".assembly.bam")).tumorSample(tumorSampleName, tumorBamPath).referenceSample(referenceSampleName, refBamPath);
GridssAnnotation viralAnnotation = new GridssAnnotation(resourceFiles, false);
SubStageInputOutput unfilteredVcfOutput = driver.andThen(viralAnnotation).apply(SubStageInputOutput.empty(tumorSampleName));
final OutputFile unfilteredVcf = unfilteredVcfOutput.outputFile();
final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, tumorSampleName);
final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
// COMMANDS
commands.addCommand(new ExportPathCommand(new BwaCommand()));
commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
if (!localTumorFile.equals(tumorBamPath)) {
commands.addCommands(cramToBam(localTumorFile));
}
if (!localReferenceFile.equals(refBamPath)) {
commands.addCommands(cramToBam(localReferenceFile));
}
commands.addCommands(unfilteredVcfOutput.bash());
commands.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
commands.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.pipeline.stages.SubStageInputOutput in project pipeline5 by hartwigmedical.
the class SageRerunOld method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final String referenceSampleName = inputs.get("ref_sample").inputValue();
final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
final String localTumorFile = localFilename(remoteTumorFile);
final String localReferenceFile = localFilename(remoteReferenceFile);
final String localTumorBam = CONVERT_TO_BAM ? localTumorFile.replace("cram", "bam") : localTumorFile;
final String localReferenceBam = CONVERT_TO_BAM ? localReferenceFile.replace("cram", "bam") : localReferenceFile;
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
// Download tumor
commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
// Download normal
commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
final SageCommandBuilder sageCommandBuilder = new SageCommandBuilder(resourceFiles).addReference(referenceSampleName, localReferenceBam).addTumor(tumorSampleName, localTumorBam);
if (PANEL_ONLY) {
sageCommandBuilder.panelOnly();
}
if (inputs.contains("rna")) {
final InputFileDescriptor remoteRnaBam = inputs.get("rna");
final InputFileDescriptor remoteRnaBamIndex = remoteRnaBam.index();
final String localRnaBam = localFilename(remoteRnaBam);
// Download rna
commands.addCommand(() -> remoteRnaBam.toCommandForm(localRnaBam));
commands.addCommand(() -> remoteRnaBamIndex.toCommandForm(localFilename(remoteRnaBamIndex)));
// Add to sage application
sageCommandBuilder.addReference(referenceSampleName + "NA", localRnaBam);
}
// Convert to bam if necessary
if (!localTumorFile.equals(localTumorBam)) {
commands.addCommands(cramToBam(localTumorFile));
}
if (!localReferenceFile.equals(localReferenceBam)) {
commands.addCommands(cramToBam(localReferenceFile));
}
SageApplication sageApplication = new SageApplication(sageCommandBuilder);
SageSomaticPostProcess sagePostProcess = new SageSomaticPostProcess(tumorSampleName, resourceFiles);
SubStageInputOutput sageOutput = sageApplication.andThen(sagePostProcess).apply(SubStageInputOutput.empty(tumorSampleName));
commands.addCommands(sageOutput.bash());
// 8. Archive targeted output
final GoogleStorageLocation archiveStorageLocation = sageArchiveDirectory(set);
final OutputFile filteredOutputFile = sageOutput.outputFile();
final OutputFile filteredOutputFileIndex = filteredOutputFile.index(".tbi");
final OutputFile unfilteredOutputFile = sageApplication.apply(SubStageInputOutput.empty(tumorSampleName)).outputFile();
final OutputFile unfilteredOutputFileIndex = unfilteredOutputFile.index(".tbi");
commands.addCommand(() -> filteredOutputFile.copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> filteredOutputFileIndex.copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> unfilteredOutputFile.copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> unfilteredOutputFileIndex.copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> bqrFile(tumorSampleName, "png").copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> bqrFile(tumorSampleName, "tsv").copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> bqrFile(referenceSampleName, "png").copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> bqrFile(referenceSampleName, "tsv").copyToRemoteLocation(archiveStorageLocation));
// Store output
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
return VirtualMachineJobDefinition.sageSomaticCalling(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.pipeline.stages.SubStageInputOutput in project pipeline5 by hartwigmedical.
the class BwaAligner method run.
public AlignmentOutput run(final SingleSampleRunMetadata metadata) throws Exception {
StageTrace trace = new StageTrace(NAMESPACE, metadata.sampleName(), StageTrace.ExecutorType.COMPUTE_ENGINE).start();
RuntimeBucket rootBucket = RuntimeBucket.from(storage, NAMESPACE, metadata, arguments, labels);
Sample sample = sampleSource.sample(metadata);
if (sample.bam().isPresent()) {
String noPrefix = sample.bam().orElseThrow().replace("gs://", "");
int firstSlash = noPrefix.indexOf("/");
String bucket = noPrefix.substring(0, firstSlash);
String path = noPrefix.substring(firstSlash + 1);
return AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.PROVIDED).maybeAlignments(GoogleStorageLocation.of(bucket, path)).build();
}
final ResourceFiles resourceFiles = buildResourceFiles(arguments);
sampleUpload.run(sample, rootBucket);
List<Future<PipelineStatus>> futures = new ArrayList<>();
List<GoogleStorageLocation> perLaneBams = new ArrayList<>();
List<ReportComponent> laneLogComponents = new ArrayList<>();
List<GoogleStorageLocation> laneFailedLogs = new ArrayList<>();
for (Lane lane : sample.lanes()) {
RuntimeBucket laneBucket = RuntimeBucket.from(storage, laneNamespace(lane), metadata, arguments, labels);
BashStartupScript bash = BashStartupScript.of(laneBucket.name());
InputDownload first = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.firstOfPairPath())));
InputDownload second = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.secondOfPairPath())));
bash.addCommand(first).addCommand(second);
bash.addCommands(OverrideReferenceGenomeCommand.overrides(arguments));
SubStageInputOutput alignment = new LaneAlignment(arguments.sbpApiRunId().isPresent(), resourceFiles.refGenomeFile(), first.getLocalTargetPath(), second.getLocalTargetPath(), metadata.sampleName(), lane).apply(SubStageInputOutput.empty(metadata.sampleName()));
perLaneBams.add(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path(alignment.outputFile().fileName())));
bash.addCommands(alignment.bash()).addCommand(new OutputUpload(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
futures.add(executorService.submit(() -> runWithRetries(metadata, laneBucket, VirtualMachineJobDefinition.alignment(laneId(lane).toLowerCase(), bash, resultsDirectory))));
laneLogComponents.add(new RunLogComponent(laneBucket, laneNamespace(lane), Folder.from(metadata), resultsDirectory));
laneFailedLogs.add(GoogleStorageLocation.of(laneBucket.name(), RunLogComponent.LOG_FILE));
}
AlignmentOutput output;
if (lanesSuccessfullyComplete(futures)) {
List<InputDownload> laneBams = perLaneBams.stream().map(InputDownload::new).collect(Collectors.toList());
BashStartupScript mergeMarkdupsBash = BashStartupScript.of(rootBucket.name());
laneBams.forEach(mergeMarkdupsBash::addCommand);
SubStageInputOutput merged = new MergeMarkDups(laneBams.stream().map(InputDownload::getLocalTargetPath).filter(path -> path.endsWith("bam")).collect(Collectors.toList())).apply(SubStageInputOutput.empty(metadata.sampleName()));
mergeMarkdupsBash.addCommands(merged.bash());
mergeMarkdupsBash.addCommand(new OutputUpload(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
PipelineStatus status = runWithRetries(metadata, rootBucket, VirtualMachineJobDefinition.mergeMarkdups(mergeMarkdupsBash, resultsDirectory));
ImmutableAlignmentOutput.Builder outputBuilder = AlignmentOutput.builder().sample(metadata.sampleName()).status(status).maybeAlignments(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path(merged.outputFile().fileName()))).addAllReportComponents(laneLogComponents).addAllFailedLogLocations(laneFailedLogs).addFailedLogLocations(GoogleStorageLocation.of(rootBucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new RunLogComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), resultsDirectory));
if (!arguments.outputCram()) {
outputBuilder.addReportComponents(new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bam(metadata.sampleName()), bam(metadata.sampleName()), resultsDirectory), new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bai(bam(metadata.sampleName())), bai(bam(metadata.sampleName())), resultsDirectory)).addDatatypes(new AddDatatype(DataType.ALIGNED_READS, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bam(metadata.sampleName()))), new AddDatatype(DataType.ALIGNED_READS_INDEX, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bai(metadata.sampleName()))));
}
output = outputBuilder.build();
} else {
output = AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.FAILED).build();
}
trace.stop();
executorService.shutdown();
return output;
}
use of com.hartwig.pipeline.stages.SubStageInputOutput in project pipeline5 by hartwigmedical.
the class Gridss method gridssCommands.
private List<BashCommand> gridssCommands(final Driver driver, final String sampleName) {
SubStageInputOutput unfilteredVcfOutput = driver.andThen(new RepeatMasker()).andThen(new GridssAnnotation(resourceFiles, false)).apply(SubStageInputOutput.empty(sampleName));
unfilteredVcf = unfilteredVcfOutput.outputFile().path();
List<BashCommand> commands = new ArrayList<>();
commands.add(new ExportPathCommand(new BwaCommand()));
commands.add(new ExportPathCommand(new SamtoolsCommand()));
commands.addAll(unfilteredVcfOutput.bash());
return commands;
}
Aggregations