Search in sources :

Example 21 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class GridssPanelTumor method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String sampleId = descriptor.inputValue();
    // download tumor BAM
    final String tumorBam = String.format("%s.non_umi_dedup.bam", sampleId);
    final String tumorBamIndex = String.format("%s.non_umi_dedup.bam.bai", sampleId);
    commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s* %s", PANEL_BAM_BUCKET, tumorBam, VmDirectories.INPUT));
    // Inputs
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V38);
    commands.addCommand(new ExportPathCommand(new BwaCommand()));
    commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
    // run Gridss variant calling
    final String gridssToolDir = String.format("%s/%s/%s", VmDirectories.TOOLS, GRIDSS_TOOL_DIR, Versions.GRIDSS);
    final String gridssJar = String.format("%s/gridss.jar", gridssToolDir);
    commands.addCommand(() -> format("chmod a+x %s", gridssJar));
    final String gridssOutputVcf = String.format("%s/%s.gridss.driver.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner gridssArgs = new StringJoiner(" ");
    gridssArgs.add(String.format("--output %s", gridssOutputVcf));
    gridssArgs.add(String.format("--assembly %s/%s.gridss.assembly.vcf.gz", VmDirectories.OUTPUT, sampleId));
    gridssArgs.add(String.format("--workingdir %s/gridss_working", VmDirectories.OUTPUT));
    gridssArgs.add(String.format("--reference %s", resourceFiles.refGenomeFile()));
    gridssArgs.add(String.format("--jar %s", gridssJar));
    gridssArgs.add(String.format("--blacklist %s", resourceFiles.gridssBlacklistBed()));
    gridssArgs.add(String.format("--configuration %s", resourceFiles.gridssPropertiesFile()));
    gridssArgs.add(String.format("--labels %s", sampleId));
    gridssArgs.add(String.format("--threads %s", Bash.allCpus()));
    gridssArgs.add("--jvmheap 31G");
    gridssArgs.add("--externalaligner");
    gridssArgs.add(String.format("%s/%s", VmDirectories.INPUT, tumorBam));
    // nohup /data/tools/gridss/2.13.2/gridss
    // --output ./FR16648841.gridss.driver.vcf.gz
    // --assembly ./FR16648841.assembly.bam
    // --workingdir ./gridss_working
    // --reference /data/resources/bucket/reference_genome/38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
    // --jar /data/tools/gridss/2.13.2/gridss.jar
    // --blacklist /data/resources/public/gridss_repeatmasker_db/38/ENCFF001TDO.38.bed
    // --configuration /data/resources/public/gridss_config/gridss.properties
    // --labels FR16648841
    // --jvmheap 31G
    // --threads 4
    // --externalaligner
    // FR16648841.chr21_slice1.bam &
    commands.addCommand(() -> format("%s/gridss %s", gridssToolDir, gridssArgs.toString()));
    // VersionedToolCommand with bash:
    // /opt/tools/gridss/2.13.2/gridss_annotate_vcf_repeatmasker
    // --output /data/output/CPCT12345678T.gridss.repeatmasker.vcf.gz
    // --jar /opt/tools/gridss/2.13.2/gridss.jar
    // -w /data/output
    // --rm /opt/tools/repeatmasker/4.1.1/RepeatMasker
    // /data/output/CPCT12345678T.gridss.driver.vcf.gz
    // final String gridssToolDir = String.format("%s/%s/%s/", VmDirectories.TOOLS, GRIDSS_TOOL_DIR, Versions.GRIDSS);
    final String rmOutputVcf = String.format("%s/%s.gridss.repeatmasker.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner rmArgs = new StringJoiner(" ");
    rmArgs.add(String.format("--output %s", rmOutputVcf));
    rmArgs.add(String.format("--jar %s", gridssJar));
    rmArgs.add(String.format("-w %s", VmDirectories.OUTPUT));
    rmArgs.add(String.format("--rm %s", REPEAT_MASKER_TOOL));
    rmArgs.add(gridssOutputVcf);
    commands.addCommand(() -> format("%s/gridss_annotate_vcf_repeatmasker %s", gridssToolDir, rmArgs.toString()));
    // AnnotateInsertedSequence with bash:
    // java -Xmx8G -Dsamjdk.create_index=true
    // -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true
    // -Dsamjdk.use_async_io_write_tribble=true -Dsamjdk.buffer_size=4194304
    // -cp /opt/tools/gridss/2.13.2/gridss.jar gridss.AnnotateInsertedSequence
    // REFERENCE_SEQUENCE=/opt/resources/virus_reference_genome/human_virus.fa
    // INPUT=/data/output/CPCT12345678T.gridss.repeatmasker.vcf.gz
    // OUTPUT=/data/output/CPCT12345678T.gridss.unfiltered.vcf.gz
    // ALIGNMENT=APPEND WORKER_THREADS=12
    final String finalOutputVcf = String.format("%s/%s.gridss.unfiltered.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner vmArgs = new StringJoiner(" ");
    GridssCommand.JVM_ARGUMENTS.forEach(x -> vmArgs.add(x));
    final StringJoiner annInsSeqArgs = new StringJoiner(" ");
    annInsSeqArgs.add(String.format("REFERENCE_SEQUENCE=%s", resourceFiles.gridssVirusRefGenomeFile()));
    annInsSeqArgs.add(String.format("INPUT=%s", rmOutputVcf));
    annInsSeqArgs.add(String.format("OUTPUT=%s", finalOutputVcf));
    annInsSeqArgs.add(String.format("ALIGNMENT=APPEND WORKER_THREADS=%s", Bash.allCpus()));
    commands.addCommand(() -> format("java -Xmx8G -Dsamjdk.create_index=true %s -cp %s gridss.AnnotateInsertedSequence %s", vmArgs.toString(), gridssJar, annInsSeqArgs.toString()));
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
    return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : BwaCommand(com.hartwig.pipeline.calling.command.BwaCommand) ExportPathCommand(com.hartwig.pipeline.execution.vm.unix.ExportPathCommand) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) SamtoolsCommand(com.hartwig.pipeline.calling.command.SamtoolsCommand) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) StringJoiner(java.util.StringJoiner)

Example 22 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class GripssGermline method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String sampleId = descriptor.inputValue();
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    final LocalLocations inputFileFactory = new LocalLocations(new RemoteLocationsApi(descriptor.billedProject(), sampleId));
    final String referenceId = inputFileFactory.getReference();
    final String inputVcf = inputFileFactory.getStructuralVariantsGridss();
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, GRIPSS_DIR, GRIPSS_JAR, VmDirectories.TOOLS));
    startupScript.addCommands(inputFileFactory.generateDownloadCommands());
    // run GRIPSS
    final String outputVcf1 = String.format("%s/%s.gripss.vcf.gz", VmDirectories.OUTPUT, referenceId);
    final StringJoiner gripssArgs = new StringJoiner(" ");
    gripssArgs.add(String.format("-tumor %s", referenceId));
    gripssArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    gripssArgs.add(String.format("-breakpoint_hotspot %s", resourceFiles.knownFusionPairBedpe()));
    gripssArgs.add(String.format("-breakend_pon %s", resourceFiles.gridssBreakendPon()));
    gripssArgs.add(String.format("-breakpoint_pon %s", resourceFiles.gridssBreakpointPon()));
    gripssArgs.add(String.format("-pon_distance %d", 4));
    gripssArgs.add(String.format("-min_qual_break_end %d", 400));
    gripssArgs.add(String.format("-min_qual_rescue_mobile_element_insertion %d", 400));
    gripssArgs.add(String.format("-min_qual_break_point %d", 250));
    gripssArgs.add(String.format("-input_vcf %s", inputVcf));
    gripssArgs.add(String.format("-output_vcf %s", outputVcf1));
    startupScript.addCommand(() -> format("java -Xmx%s -cp %s/%s com.hartwig.hmftools.gripsskt.GripssApplicationKt %s", MAX_HEAP, VmDirectories.TOOLS, GRIPSS_JAR, gripssArgs.toString()));
    final String outputVcf2 = String.format("%s/%s.gripss.filtered.vcf.gz", VmDirectories.OUTPUT, referenceId);
    final StringJoiner gripss2Args = new StringJoiner(" ");
    gripss2Args.add(String.format("-input_vcf %s", outputVcf1));
    gripss2Args.add(String.format("-output_vcf %s", outputVcf2));
    startupScript.addCommand(() -> format("java -Xmx%s -cp %s/%s com.hartwig.hmftools.gripsskt.GripssHardFilterApplicationKt %s", MAX_HEAP, VmDirectories.TOOLS, GRIPSS_JAR, gripss2Args.toString()));
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gripss"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("gripss").startupCommand(startupScript).performanceProfile(custom(8, 30)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) LocalLocations(com.hartwig.batch.api.LocalLocations) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 23 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class TeloBatch method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    final String sampleId = inputs.get("sampleId").inputValue();
    Optional<String> specificChromosome = Optional.empty();
    try {
        specificChromosome = Optional.of(inputs.get("specificChromosome").inputValue());
    } catch (IllegalArgumentException ignored) {
    }
    final InputFileDescriptor runData = inputs.get();
    final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), sampleId);
    // download the telo.jar
    // InputDownload teloJarDownload = new InputDownload(GoogleStorageLocation.of(teloToolsBucket, teloToolsPath + "/telo.jar"), VmDirectories.TOOLS);
    // InputDownload teloJarDownload = downloadExperimentalVersion();
    // commands.addCommand(teloJarDownload);
    commands.addCommand(downloadExperimentalVersion());
    /*() -> format("gsutil -u hmf-crunch cp gs://%s/%s/%s %s",
                COMMON_RESOURCES, TELO_DIR, TELO_JAR, VmDirectories.TOOLS));*/
    // ref genome
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
    InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
    // download the tumour and reference bam / index files
    commands.addCommand(tumorBamDownload);
    commands.addCommand(tumorBamIndexDownload);
    commands.addCommand(makeTeloRunCommand(sampleId, "somatic", tumorBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
    // delete the tumor bam file to save disk space
    commands.addCommand(() -> format("rm -f %s", tumorBamDownload.getLocalTargetPath()));
    commands.addCommand(() -> format("rm -f %s", tumorBamIndexDownload.getLocalTargetPath()));
    InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
    InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
    commands.addCommand(referenceBamDownload);
    commands.addCommand(referenceBamIndexDownload);
    commands.addCommand(makeTeloRunCommand(sampleId, "germline", referenceBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
    // JavaJarCommand jarCommand = new JavaJarCommand("telo", TELO_VERSION, "telo.jar", "16G", teloArgs);
    // commands.addCommand(jarCommand);
    // Store output
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), sampleId), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("telo").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(500).performanceProfile(VirtualMachinePerformanceProfile.custom(16, MEMORY_GB)).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) InputDownload(com.hartwig.pipeline.execution.vm.InputDownload) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 24 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class RnaIsofoxExonCounts method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    if (batchItems.length < 2) {
        System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
        return null;
    }
    final String sampleId = batchItems[COL_SAMPLE_ID];
    final String geneIds = batchItems[COL_GENE_IDS];
    final RefGenomeVersion refGenomeVersion = V37;
    final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
    // final String rnaCohortDirectory = getRnaCohortDirectory(refGenomeVersion);
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    // copy down BAM and index file for this sample
    final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
    final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
    // copy down the Isofox JAR
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    // run Isofox
    StringJoiner isofoxArgs = new StringJoiner(" ");
    isofoxArgs.add(String.format("-sample %s", sampleId));
    isofoxArgs.add(String.format("-functions %s", FUNC_TRANSCRIPT_COUNTS));
    isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
    isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
    isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    isofoxArgs.add(String.format("-write_exon_data"));
    // isofoxArgs.add(String.format("-write_read_data"));
    isofoxArgs.add(String.format("-restricted_gene_ids %s", geneIds));
    startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
Also used : ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) StringJoiner(java.util.StringJoiner)

Example 25 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class RnaIsofoxSpliceJunctions method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    if (batchItems.length < 2) {
        System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
        return null;
    }
    final String sampleId = batchItems[COL_SAMPLE_ID];
    final String geneIds = batchItems[COL_GENE_IDS];
    final RefGenomeVersion refGenomeVersion = V37;
    final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    // copy down BAM and index file for this sample
    final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
    final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
    // copy down the executable
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    // run Isofox
    StringJoiner isofoxArgs = new StringJoiner(" ");
    isofoxArgs.add(String.format("-sample %s", sampleId));
    isofoxArgs.add(String.format("-functions %s", FUNC_TRANSCRIPT_COUNTS));
    isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
    isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
    isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    isofoxArgs.add(String.format("-write_exon_data"));
    isofoxArgs.add(String.format("-restricted_gene_ids %s", geneIds));
    isofoxArgs.add(" -output_id gene_sj");
    startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
Also used : ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) StringJoiner(java.util.StringJoiner)

Aggregations

ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)33 OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)24 InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)23 StringJoiner (java.util.StringJoiner)13 GoogleStorageLocation (com.hartwig.pipeline.storage.GoogleStorageLocation)10 RemoteLocationsApi (com.hartwig.batch.api.RemoteLocationsApi)8 ResourceFilesFactory.buildResourceFiles (com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles)7 CopyLogToOutput (com.hartwig.pipeline.execution.vm.CopyLogToOutput)5 SubStageInputOutput (com.hartwig.pipeline.stages.SubStageInputOutput)5 LocalLocations (com.hartwig.batch.api.LocalLocations)4 InputDownload (com.hartwig.pipeline.execution.vm.InputDownload)4 RefGenomeVersion (com.hartwig.pipeline.resource.RefGenomeVersion)4 BwaCommand (com.hartwig.pipeline.calling.command.BwaCommand)3 SamtoolsCommand (com.hartwig.pipeline.calling.command.SamtoolsCommand)3 OutputFile (com.hartwig.pipeline.execution.vm.OutputFile)3 AlignmentOutput (com.hartwig.pipeline.alignment.AlignmentOutput)2 SageApplication (com.hartwig.pipeline.calling.sage.SageApplication)2 SageCommandBuilder (com.hartwig.pipeline.calling.sage.SageCommandBuilder)2 GridssAnnotation (com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)2 PipelineStatus (com.hartwig.pipeline.execution.PipelineStatus)2