Search in sources :

Example 1 with SamtoolsCommand

use of com.hartwig.pipeline.calling.command.SamtoolsCommand in project pipeline5 by hartwigmedical.

the class GridssRerun method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    final String set = inputs.get("set").inputValue();
    final String tumorSampleName = inputs.get("tumor_sample").inputValue();
    final String referenceSampleName = inputs.get("reference_sample").inputValue();
    final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
    final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
    final InputFileDescriptor runData = inputs.get();
    final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), tumorSampleName);
    InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
    InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
    InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
    InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
    final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
    final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
    final String localTumorFile = localFilename(remoteTumorFile);
    final String localReferenceFile = localFilename(remoteReferenceFile);
    final String tumorBamPath = localTumorFile.replace("cram", "bam");
    final String refBamPath = localReferenceFile.replace("cram", "bam");
    Driver driver = new Driver(resourceFiles, VmDirectories.outputFile(tumorSampleName + ".assembly.bam")).tumorSample(tumorSampleName, tumorBamPath).referenceSample(referenceSampleName, refBamPath);
    GridssAnnotation viralAnnotation = new GridssAnnotation(resourceFiles, false);
    SubStageInputOutput unfilteredVcfOutput = driver.andThen(viralAnnotation).apply(SubStageInputOutput.empty(tumorSampleName));
    final OutputFile unfilteredVcf = unfilteredVcfOutput.outputFile();
    final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
    final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, tumorSampleName);
    final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
    // COMMANDS
    commands.addCommand(new ExportPathCommand(new BwaCommand()));
    commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
    commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
    commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
    commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
    commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
    if (!localTumorFile.equals(tumorBamPath)) {
        commands.addCommands(cramToBam(localTumorFile));
    }
    if (!localReferenceFile.equals(refBamPath)) {
        commands.addCommands(cramToBam(localReferenceFile));
    }
    commands.addCommands(unfilteredVcfOutput.bash());
    commands.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
    commands.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
    return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : OutputFile(com.hartwig.pipeline.execution.vm.OutputFile) ExportPathCommand(com.hartwig.pipeline.execution.vm.unix.ExportPathCommand) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) Driver(com.hartwig.pipeline.calling.structural.gridss.stage.Driver) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) GridssAnnotation(com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi) BwaCommand(com.hartwig.pipeline.calling.command.BwaCommand) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) SamtoolsCommand(com.hartwig.pipeline.calling.command.SamtoolsCommand) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputDownload(com.hartwig.pipeline.execution.vm.InputDownload) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation)

Example 2 with SamtoolsCommand

use of com.hartwig.pipeline.calling.command.SamtoolsCommand in project pipeline5 by hartwigmedical.

the class Gridss method gridssCommands.

private List<BashCommand> gridssCommands(final Driver driver, final String sampleName) {
    SubStageInputOutput unfilteredVcfOutput = driver.andThen(new RepeatMasker()).andThen(new GridssAnnotation(resourceFiles, false)).apply(SubStageInputOutput.empty(sampleName));
    unfilteredVcf = unfilteredVcfOutput.outputFile().path();
    List<BashCommand> commands = new ArrayList<>();
    commands.add(new ExportPathCommand(new BwaCommand()));
    commands.add(new ExportPathCommand(new SamtoolsCommand()));
    commands.addAll(unfilteredVcfOutput.bash());
    return commands;
}
Also used : BwaCommand(com.hartwig.pipeline.calling.command.BwaCommand) ExportPathCommand(com.hartwig.pipeline.execution.vm.unix.ExportPathCommand) SamtoolsCommand(com.hartwig.pipeline.calling.command.SamtoolsCommand) BashCommand(com.hartwig.pipeline.execution.vm.BashCommand) ArrayList(java.util.ArrayList) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) RepeatMasker(com.hartwig.pipeline.calling.structural.gridss.stage.RepeatMasker) GridssAnnotation(com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)

Example 3 with SamtoolsCommand

use of com.hartwig.pipeline.calling.command.SamtoolsCommand in project pipeline5 by hartwigmedical.

the class GridssBackport method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    final InputFileDescriptor template = inputs.get("set");
    final String set = inputs.get("set").inputValue();
    final String sample = inputs.get("tumor_sample").inputValue();
    final String bamFile = String.format("gs://hmf-gridss/assembly/%s/%s.assembly.bam.sv.bam", set, sample);
    final String vcfFile = String.format("gs://hmf-gridss/original/%s/%s.gridss.unfiltered.vcf.gz", set, sample);
    final InputFileDescriptor inputBam = ImmutableInputFileDescriptor.builder().from(template).inputValue(bamFile).build();
    final InputFileDescriptor inputBamIndex = inputBam.index();
    final InputFileDescriptor inputVcf = ImmutableInputFileDescriptor.builder().from(template).inputValue(vcfFile).build();
    final InputFileDescriptor inputVcfIndex = inputVcf.index();
    // 1. Set up paths
    startupScript.addCommand(new ExportPathCommand(new BwaCommand()));
    startupScript.addCommand(new ExportPathCommand(new SamtoolsCommand()));
    // 2. Download input files
    startupScript.addCommand(inputBam::copyToLocalDestinationCommand);
    startupScript.addCommand(inputBamIndex::copyToLocalDestinationCommand);
    startupScript.addCommand(inputVcf::copyToLocalDestinationCommand);
    startupScript.addCommand(inputVcfIndex::copyToLocalDestinationCommand);
    // 3. Get sample names
    startupScript.addCommand(() -> format("sampleNames=$(zgrep -m1 CHROM %s)", inputVcf.localDestination()));
    startupScript.addCommand(() -> "sample0=$(echo $sampleNames | cut -d \" \" -f 10)");
    startupScript.addCommand(() -> "sample1=$(echo $sampleNames | cut -d \" \" -f 11)");
    // 4. Create empty bams (and their working directories)
    final String emptyBam1 = String.format("%s/${%s}", VmDirectories.INPUT, "sample0");
    final String emptyBam1Working = workingDir(emptyBam1) + ".sv.bam";
    final String emptyBam2 = String.format("%s/${%s}", VmDirectories.INPUT, "sample1");
    final String emptyBam2Working = workingDir(emptyBam2) + ".sv.bam";
    startupScript.addCommand(() -> format("samtools view -H %s | samtools view -o %s", inputBam.localDestination(), emptyBam1));
    startupScript.addCommand(() -> format("samtools view -H %s | samtools view -o %s", inputBam.localDestination(), emptyBam2));
    startupScript.addCommand(() -> format("mkdir -p %s", dirname(emptyBam1Working)));
    startupScript.addCommand(() -> format("mkdir -p %s", dirname(emptyBam2Working)));
    startupScript.addCommand(() -> format("cp %s %s", emptyBam1, emptyBam1Working));
    startupScript.addCommand(() -> format("cp %s %s", emptyBam2, emptyBam2Working));
    // 5. SoftClipsToSplitReads
    final String newAssemblyBam = workingDir(inputBam.localDestination());
    startupScript.addCommand(() -> format("mkdir -p %s", dirname(newAssemblyBam)));
    startupScript.addCommand(new SoftClipsToSplitReads(inputBam.localDestination(), resourceFiles.refGenomeFile(), newAssemblyBam));
    // 6. Allocate Evidence
    final OutputFile newRawVcf = OutputFile.of(sample, "gridss_" + Versions.GRIDSS.replace(".", "_") + ".raw", FileTypes.GZIPPED_VCF);
    startupScript.addCommand(new AllocateEvidence(emptyBam1, emptyBam2, newAssemblyBam, inputVcf.localDestination(), newRawVcf.path(), resourceFiles.refGenomeFile(), resourceFiles.gridssPropertiesFile()));
    // 7. Gridss Annotation
    final SubStageInputOutput annotation = new GridssAnnotation(resourceFiles, true).apply(SubStageInputOutput.of(sample, newRawVcf, Collections.emptyList()));
    startupScript.addCommands(annotation.bash());
    // 8. Archive targeted output
    final OutputFile unfilteredVcf = annotation.outputFile();
    final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
    final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, sample);
    final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
    startupScript.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
    startupScript.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
    // 9. Upload all output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
    return VirtualMachineJobDefinition.structuralCalling(startupScript, ResultsDirectory.defaultDirectory());
}
Also used : OutputFile(com.hartwig.pipeline.execution.vm.OutputFile) ExportPathCommand(com.hartwig.pipeline.execution.vm.unix.ExportPathCommand) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) ImmutableInputFileDescriptor(com.hartwig.batch.input.ImmutableInputFileDescriptor) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) GridssAnnotation(com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation) BwaCommand(com.hartwig.pipeline.calling.command.BwaCommand) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) SamtoolsCommand(com.hartwig.pipeline.calling.command.SamtoolsCommand) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation) AllocateEvidence(com.hartwig.pipeline.calling.structural.gridss.command.AllocateEvidence) SoftClipsToSplitReads(com.hartwig.pipeline.calling.structural.gridss.command.SoftClipsToSplitReads)

Example 4 with SamtoolsCommand

use of com.hartwig.pipeline.calling.command.SamtoolsCommand in project pipeline5 by hartwigmedical.

the class GridssPanelTumor method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String sampleId = descriptor.inputValue();
    // download tumor BAM
    final String tumorBam = String.format("%s.non_umi_dedup.bam", sampleId);
    final String tumorBamIndex = String.format("%s.non_umi_dedup.bam.bai", sampleId);
    commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s* %s", PANEL_BAM_BUCKET, tumorBam, VmDirectories.INPUT));
    // Inputs
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V38);
    commands.addCommand(new ExportPathCommand(new BwaCommand()));
    commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
    // run Gridss variant calling
    final String gridssToolDir = String.format("%s/%s/%s", VmDirectories.TOOLS, GRIDSS_TOOL_DIR, Versions.GRIDSS);
    final String gridssJar = String.format("%s/gridss.jar", gridssToolDir);
    commands.addCommand(() -> format("chmod a+x %s", gridssJar));
    final String gridssOutputVcf = String.format("%s/%s.gridss.driver.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner gridssArgs = new StringJoiner(" ");
    gridssArgs.add(String.format("--output %s", gridssOutputVcf));
    gridssArgs.add(String.format("--assembly %s/%s.gridss.assembly.vcf.gz", VmDirectories.OUTPUT, sampleId));
    gridssArgs.add(String.format("--workingdir %s/gridss_working", VmDirectories.OUTPUT));
    gridssArgs.add(String.format("--reference %s", resourceFiles.refGenomeFile()));
    gridssArgs.add(String.format("--jar %s", gridssJar));
    gridssArgs.add(String.format("--blacklist %s", resourceFiles.gridssBlacklistBed()));
    gridssArgs.add(String.format("--configuration %s", resourceFiles.gridssPropertiesFile()));
    gridssArgs.add(String.format("--labels %s", sampleId));
    gridssArgs.add(String.format("--threads %s", Bash.allCpus()));
    gridssArgs.add("--jvmheap 31G");
    gridssArgs.add("--externalaligner");
    gridssArgs.add(String.format("%s/%s", VmDirectories.INPUT, tumorBam));
    // nohup /data/tools/gridss/2.13.2/gridss
    // --output ./FR16648841.gridss.driver.vcf.gz
    // --assembly ./FR16648841.assembly.bam
    // --workingdir ./gridss_working
    // --reference /data/resources/bucket/reference_genome/38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
    // --jar /data/tools/gridss/2.13.2/gridss.jar
    // --blacklist /data/resources/public/gridss_repeatmasker_db/38/ENCFF001TDO.38.bed
    // --configuration /data/resources/public/gridss_config/gridss.properties
    // --labels FR16648841
    // --jvmheap 31G
    // --threads 4
    // --externalaligner
    // FR16648841.chr21_slice1.bam &
    commands.addCommand(() -> format("%s/gridss %s", gridssToolDir, gridssArgs.toString()));
    // VersionedToolCommand with bash:
    // /opt/tools/gridss/2.13.2/gridss_annotate_vcf_repeatmasker
    // --output /data/output/CPCT12345678T.gridss.repeatmasker.vcf.gz
    // --jar /opt/tools/gridss/2.13.2/gridss.jar
    // -w /data/output
    // --rm /opt/tools/repeatmasker/4.1.1/RepeatMasker
    // /data/output/CPCT12345678T.gridss.driver.vcf.gz
    // final String gridssToolDir = String.format("%s/%s/%s/", VmDirectories.TOOLS, GRIDSS_TOOL_DIR, Versions.GRIDSS);
    final String rmOutputVcf = String.format("%s/%s.gridss.repeatmasker.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner rmArgs = new StringJoiner(" ");
    rmArgs.add(String.format("--output %s", rmOutputVcf));
    rmArgs.add(String.format("--jar %s", gridssJar));
    rmArgs.add(String.format("-w %s", VmDirectories.OUTPUT));
    rmArgs.add(String.format("--rm %s", REPEAT_MASKER_TOOL));
    rmArgs.add(gridssOutputVcf);
    commands.addCommand(() -> format("%s/gridss_annotate_vcf_repeatmasker %s", gridssToolDir, rmArgs.toString()));
    // AnnotateInsertedSequence with bash:
    // java -Xmx8G -Dsamjdk.create_index=true
    // -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true
    // -Dsamjdk.use_async_io_write_tribble=true -Dsamjdk.buffer_size=4194304
    // -cp /opt/tools/gridss/2.13.2/gridss.jar gridss.AnnotateInsertedSequence
    // REFERENCE_SEQUENCE=/opt/resources/virus_reference_genome/human_virus.fa
    // INPUT=/data/output/CPCT12345678T.gridss.repeatmasker.vcf.gz
    // OUTPUT=/data/output/CPCT12345678T.gridss.unfiltered.vcf.gz
    // ALIGNMENT=APPEND WORKER_THREADS=12
    final String finalOutputVcf = String.format("%s/%s.gridss.unfiltered.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner vmArgs = new StringJoiner(" ");
    GridssCommand.JVM_ARGUMENTS.forEach(x -> vmArgs.add(x));
    final StringJoiner annInsSeqArgs = new StringJoiner(" ");
    annInsSeqArgs.add(String.format("REFERENCE_SEQUENCE=%s", resourceFiles.gridssVirusRefGenomeFile()));
    annInsSeqArgs.add(String.format("INPUT=%s", rmOutputVcf));
    annInsSeqArgs.add(String.format("OUTPUT=%s", finalOutputVcf));
    annInsSeqArgs.add(String.format("ALIGNMENT=APPEND WORKER_THREADS=%s", Bash.allCpus()));
    commands.addCommand(() -> format("java -Xmx8G -Dsamjdk.create_index=true %s -cp %s gridss.AnnotateInsertedSequence %s", vmArgs.toString(), gridssJar, annInsSeqArgs.toString()));
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
    return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : BwaCommand(com.hartwig.pipeline.calling.command.BwaCommand) ExportPathCommand(com.hartwig.pipeline.execution.vm.unix.ExportPathCommand) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) SamtoolsCommand(com.hartwig.pipeline.calling.command.SamtoolsCommand) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) StringJoiner(java.util.StringJoiner)

Aggregations

BwaCommand (com.hartwig.pipeline.calling.command.BwaCommand)4 SamtoolsCommand (com.hartwig.pipeline.calling.command.SamtoolsCommand)4 ExportPathCommand (com.hartwig.pipeline.execution.vm.unix.ExportPathCommand)4 InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)3 GridssAnnotation (com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)3 OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)3 ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)3 SubStageInputOutput (com.hartwig.pipeline.stages.SubStageInputOutput)3 OutputFile (com.hartwig.pipeline.execution.vm.OutputFile)2 GoogleStorageLocation (com.hartwig.pipeline.storage.GoogleStorageLocation)2 RemoteLocationsApi (com.hartwig.batch.api.RemoteLocationsApi)1 ImmutableInputFileDescriptor (com.hartwig.batch.input.ImmutableInputFileDescriptor)1 AllocateEvidence (com.hartwig.pipeline.calling.structural.gridss.command.AllocateEvidence)1 SoftClipsToSplitReads (com.hartwig.pipeline.calling.structural.gridss.command.SoftClipsToSplitReads)1 Driver (com.hartwig.pipeline.calling.structural.gridss.stage.Driver)1 RepeatMasker (com.hartwig.pipeline.calling.structural.gridss.stage.RepeatMasker)1 BashCommand (com.hartwig.pipeline.execution.vm.BashCommand)1 InputDownload (com.hartwig.pipeline.execution.vm.InputDownload)1 ArrayList (java.util.ArrayList)1 StringJoiner (java.util.StringJoiner)1