use of com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation in project pipeline5 by hartwigmedical.
the class GridssRerun method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final String referenceSampleName = inputs.get("reference_sample").inputValue();
final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
final InputFileDescriptor runData = inputs.get();
final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), tumorSampleName);
InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
final String localTumorFile = localFilename(remoteTumorFile);
final String localReferenceFile = localFilename(remoteReferenceFile);
final String tumorBamPath = localTumorFile.replace("cram", "bam");
final String refBamPath = localReferenceFile.replace("cram", "bam");
Driver driver = new Driver(resourceFiles, VmDirectories.outputFile(tumorSampleName + ".assembly.bam")).tumorSample(tumorSampleName, tumorBamPath).referenceSample(referenceSampleName, refBamPath);
GridssAnnotation viralAnnotation = new GridssAnnotation(resourceFiles, false);
SubStageInputOutput unfilteredVcfOutput = driver.andThen(viralAnnotation).apply(SubStageInputOutput.empty(tumorSampleName));
final OutputFile unfilteredVcf = unfilteredVcfOutput.outputFile();
final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, tumorSampleName);
final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
// COMMANDS
commands.addCommand(new ExportPathCommand(new BwaCommand()));
commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
if (!localTumorFile.equals(tumorBamPath)) {
commands.addCommands(cramToBam(localTumorFile));
}
if (!localReferenceFile.equals(refBamPath)) {
commands.addCommands(cramToBam(localReferenceFile));
}
commands.addCommands(unfilteredVcfOutput.bash());
commands.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
commands.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation in project pipeline5 by hartwigmedical.
the class Gridss method gridssCommands.
private List<BashCommand> gridssCommands(final Driver driver, final String sampleName) {
SubStageInputOutput unfilteredVcfOutput = driver.andThen(new RepeatMasker()).andThen(new GridssAnnotation(resourceFiles, false)).apply(SubStageInputOutput.empty(sampleName));
unfilteredVcf = unfilteredVcfOutput.outputFile().path();
List<BashCommand> commands = new ArrayList<>();
commands.add(new ExportPathCommand(new BwaCommand()));
commands.add(new ExportPathCommand(new SamtoolsCommand()));
commands.addAll(unfilteredVcfOutput.bash());
return commands;
}
use of com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation in project pipeline5 by hartwigmedical.
the class GridssBackport method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
final InputFileDescriptor template = inputs.get("set");
final String set = inputs.get("set").inputValue();
final String sample = inputs.get("tumor_sample").inputValue();
final String bamFile = String.format("gs://hmf-gridss/assembly/%s/%s.assembly.bam.sv.bam", set, sample);
final String vcfFile = String.format("gs://hmf-gridss/original/%s/%s.gridss.unfiltered.vcf.gz", set, sample);
final InputFileDescriptor inputBam = ImmutableInputFileDescriptor.builder().from(template).inputValue(bamFile).build();
final InputFileDescriptor inputBamIndex = inputBam.index();
final InputFileDescriptor inputVcf = ImmutableInputFileDescriptor.builder().from(template).inputValue(vcfFile).build();
final InputFileDescriptor inputVcfIndex = inputVcf.index();
// 1. Set up paths
startupScript.addCommand(new ExportPathCommand(new BwaCommand()));
startupScript.addCommand(new ExportPathCommand(new SamtoolsCommand()));
// 2. Download input files
startupScript.addCommand(inputBam::copyToLocalDestinationCommand);
startupScript.addCommand(inputBamIndex::copyToLocalDestinationCommand);
startupScript.addCommand(inputVcf::copyToLocalDestinationCommand);
startupScript.addCommand(inputVcfIndex::copyToLocalDestinationCommand);
// 3. Get sample names
startupScript.addCommand(() -> format("sampleNames=$(zgrep -m1 CHROM %s)", inputVcf.localDestination()));
startupScript.addCommand(() -> "sample0=$(echo $sampleNames | cut -d \" \" -f 10)");
startupScript.addCommand(() -> "sample1=$(echo $sampleNames | cut -d \" \" -f 11)");
// 4. Create empty bams (and their working directories)
final String emptyBam1 = String.format("%s/${%s}", VmDirectories.INPUT, "sample0");
final String emptyBam1Working = workingDir(emptyBam1) + ".sv.bam";
final String emptyBam2 = String.format("%s/${%s}", VmDirectories.INPUT, "sample1");
final String emptyBam2Working = workingDir(emptyBam2) + ".sv.bam";
startupScript.addCommand(() -> format("samtools view -H %s | samtools view -o %s", inputBam.localDestination(), emptyBam1));
startupScript.addCommand(() -> format("samtools view -H %s | samtools view -o %s", inputBam.localDestination(), emptyBam2));
startupScript.addCommand(() -> format("mkdir -p %s", dirname(emptyBam1Working)));
startupScript.addCommand(() -> format("mkdir -p %s", dirname(emptyBam2Working)));
startupScript.addCommand(() -> format("cp %s %s", emptyBam1, emptyBam1Working));
startupScript.addCommand(() -> format("cp %s %s", emptyBam2, emptyBam2Working));
// 5. SoftClipsToSplitReads
final String newAssemblyBam = workingDir(inputBam.localDestination());
startupScript.addCommand(() -> format("mkdir -p %s", dirname(newAssemblyBam)));
startupScript.addCommand(new SoftClipsToSplitReads(inputBam.localDestination(), resourceFiles.refGenomeFile(), newAssemblyBam));
// 6. Allocate Evidence
final OutputFile newRawVcf = OutputFile.of(sample, "gridss_" + Versions.GRIDSS.replace(".", "_") + ".raw", FileTypes.GZIPPED_VCF);
startupScript.addCommand(new AllocateEvidence(emptyBam1, emptyBam2, newAssemblyBam, inputVcf.localDestination(), newRawVcf.path(), resourceFiles.refGenomeFile(), resourceFiles.gridssPropertiesFile()));
// 7. Gridss Annotation
final SubStageInputOutput annotation = new GridssAnnotation(resourceFiles, true).apply(SubStageInputOutput.of(sample, newRawVcf, Collections.emptyList()));
startupScript.addCommands(annotation.bash());
// 8. Archive targeted output
final OutputFile unfilteredVcf = annotation.outputFile();
final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, sample);
final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
startupScript.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
startupScript.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
// 9. Upload all output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
return VirtualMachineJobDefinition.structuralCalling(startupScript, ResultsDirectory.defaultDirectory());
}
Aggregations