Search in sources :

Example 1 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class LilacBatch method addSampleCommands.

private void addSampleCommands(final InputFileDescriptor runData, final BashStartupScript commands, final String runDirectory, final String sampleId, boolean hasRna) {
    final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), sampleId);
    final LocalLocations localInput = new LocalLocations(new BamSliceDecorator(locationsApi));
    final String somaticVcf = localInput.getSomaticVariantsPurple();
    final String geneCopyNumber = localInput.getGeneCopyNumberTsv();
    final String tumorAlignment = localInput.getTumorAlignment();
    final String referenceAlignment = localInput.getReferenceAlignment();
    final String rnaAlignment = hasRna ? String.format("%s.rna.hla.bam", sampleId) : "";
    // download sample input files
    commands.addCommands(localInput.generateDownloadCommands());
    if (hasRna) {
        commands.addCommand(() -> format("gsutil -m cp gs://%s/%s/%s* %s", HLA_BAMS_BUCKET, sampleId, rnaAlignment, VmDirectories.INPUT));
    }
    // build Lilac arguments
    String sampleOutputDir = String.format("%s/%s/", VmDirectories.OUTPUT, sampleId);
    commands.addCommand(() -> format("mkdir -p %s", sampleOutputDir));
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    StringJoiner lilacArgs = new StringJoiner(" ");
    lilacArgs.add(String.format(" -sample %s", sampleId));
    lilacArgs.add(String.format(" -resource_dir %s", VmDirectories.INPUT));
    lilacArgs.add(String.format(" -ref_genome %s", resourceFiles.refGenomeFile()));
    lilacArgs.add(String.format(" -reference_bam %s", referenceAlignment));
    lilacArgs.add(String.format(" -tumor_bam %s", tumorAlignment));
    if (hasRna) {
        lilacArgs.add(String.format(" -rna_bam %s/%s", VmDirectories.INPUT, rnaAlignment));
    }
    lilacArgs.add(String.format(" -output_dir %s", sampleOutputDir));
    lilacArgs.add(String.format(" -gene_copy_number_file %s", geneCopyNumber));
    lilacArgs.add(String.format(" -somatic_variants_file %s", somaticVcf));
    lilacArgs.add(String.format(" -threads %s", Bash.allCpus()));
    commands.addCommand(() -> format("java -Xmx%s -jar %s/%s %s", MAX_HEAP, VmDirectories.TOOLS, LILAC_JAR, lilacArgs.toString()));
    /*
        if(tumorOnly)
        {
            String tumorOutputDir = String.format("%s/%s/tumor", VmDirectories.OUTPUT, sampleId);
            commands.addCommand(() -> format("mkdir -p %s", tumorOutputDir));

            StringBuilder tumorLilacArgs = new StringBuilder();
            tumorLilacArgs.add(String.format(" -sample %s", sampleId));
            tumorLilacArgs.add(String.format(" -resource_dir %s", LOCAL_LILAC_RESOURCES));
            tumorLilacArgs.add(String.format(" -ref_genome %s", resourceFiles.refGenomeFile()));
            tumorLilacArgs.add(String.format(" -reference_bam %s", tumorAlignment));
            tumorLilacArgs.add(" -tumor_only");
            tumorLilacArgs.add(String.format(" -output_dir %s", tumorOutputDir));
            tumorLilacArgs.add(String.format(" -threads %s", Bash.allCpus()));

            commands.addCommand(() -> format("java -Xmx%s -jar %s/%s %s",
                    MAX_HEAP, VmDirectories.TOOLS, LILAC_JAR, tumorLilacArgs.toString()));
        }
        */
    String sampleRemoteOutputDir = String.format("gs://%s/%s/", LILAC_BATCH_BUCKET, runDirectory);
    commands.addCommand(() -> format("gsutil -m cp -r %s/%s/ %s", VmDirectories.OUTPUT, sampleId, sampleRemoteOutputDir));
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) LocalLocations(com.hartwig.batch.api.LocalLocations) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 2 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class SageCompare method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String[] sampleData = descriptor.inputValue().split(",", -1);
    final String sampleId = sampleData[0];
    String runTypes = sampleData.length > 1 ? sampleData[1] : RUN_BOTH;
    boolean runBoth = runTypes.equalsIgnoreCase(RUN_BOTH);
    boolean cramVsBam = runTypes.equalsIgnoreCase(RUN_CRAM_VS_BAM);
    boolean runOld = runBoth || runTypes.equalsIgnoreCase(RUN_OLD);
    boolean runNew = runBoth || cramVsBam || runTypes.equalsIgnoreCase(RUN_NEW);
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
    final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
    String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
    String tumorCramFile = tumorCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    String referenceId = locations.getReference();
    String[] refCramData = getCramFileData(locations.getReferenceAlignment());
    String refCramFile = refCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    // download tumor CRAM
    String localTumorCram = String.format("%s/%s", VmDirectories.INPUT, tumorCramFile);
    String localRefCram = String.format("%s/%s", VmDirectories.INPUT, refCramFile);
    // and convert to BAM
    startupScript.addCommands(cramToBam(localTumorCram));
    startupScript.addCommands(cramToBam(localRefCram));
    String localTumorBam = localTumorCram.replace("cram", "bam");
    String localRefBam = localRefCram.replace("cram", "bam");
    if (runOld) {
        final String oldSageVcf = String.format("%s/%s.sage.somatic.vcf.gz", VmDirectories.OUTPUT, sampleId);
        // run old Sage
        final StringJoiner oldSageArgs = new StringJoiner(" ");
        oldSageArgs.add(String.format("-tumor %s", sampleId));
        oldSageArgs.add(String.format("-tumor_bam %s", localTumorBam));
        oldSageArgs.add(String.format("-reference %s", referenceId));
        oldSageArgs.add(String.format("-reference_bam %s", localRefBam));
        oldSageArgs.add(String.format("-hotspots %s", resourceFiles.sageSomaticHotspots()));
        oldSageArgs.add(String.format("-panel_bed %s", resourceFiles.sageSomaticCodingPanel()));
        oldSageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
        oldSageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
        oldSageArgs.add("-assembly hg19");
        oldSageArgs.add("-bqr_plot false");
        oldSageArgs.add(String.format("-out %s", oldSageVcf));
        oldSageArgs.add(String.format("-threads %s", Bash.allCpus()));
        // oldSageArgs.add("-chr 14");
        String oldSageJar = String.format("sage/%s/sage.jar", Versions.SAGE);
        startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, oldSageJar, oldSageArgs.toString()));
    }
    if (runNew) {
        final String newSageVcf = String.format("%s/%s.sage.somatic.vcf.gz", VmDirectories.OUTPUT, sampleId);
        final StringJoiner newSageArgs = new StringJoiner(" ");
        newSageArgs.add(String.format("-tumor %s", sampleId));
        newSageArgs.add(String.format("-tumor_bam %s", localTumorBam));
        newSageArgs.add(String.format("-reference %s", referenceId));
        newSageArgs.add(String.format("-reference_bam %s", localRefBam));
        newSageArgs.add(String.format("-hotspots %s", resourceFiles.sageSomaticHotspots()));
        newSageArgs.add(String.format("-panel_bed %s", resourceFiles.sageSomaticCodingPanel()));
        newSageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
        newSageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
        newSageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
        newSageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
        newSageArgs.add(String.format("-perf_warn_time 50"));
        newSageArgs.add(String.format("-log_debug"));
        newSageArgs.add(String.format("-out %s", newSageVcf));
        newSageArgs.add(String.format("-threads %s", Bash.allCpus()));
        startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, newSageArgs.toString()));
    }
    if (cramVsBam) {
        final String newCramSageVcf = String.format("%s/%s.sage.somatic.cram.vcf.gz", VmDirectories.OUTPUT, sampleId);
        final StringJoiner newSageArgs = new StringJoiner(" ");
        newSageArgs.add(String.format("-tumor %s", sampleId));
        newSageArgs.add(String.format("-tumor_bam %s", localTumorCram));
        newSageArgs.add(String.format("-reference %s", referenceId));
        newSageArgs.add(String.format("-reference_bam %s", localRefCram));
        newSageArgs.add(String.format("-hotspots %s", resourceFiles.sageSomaticHotspots()));
        newSageArgs.add(String.format("-panel_bed %s", resourceFiles.sageSomaticCodingPanel()));
        newSageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
        newSageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
        newSageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
        newSageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
        newSageArgs.add(String.format("-perf_warn_time 50"));
        newSageArgs.add(String.format("-log_debug"));
        newSageArgs.add(String.format("-out %s", newCramSageVcf));
        newSageArgs.add(String.format("-threads %s", Bash.allCpus()));
        startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, newSageArgs.toString()));
    }
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 3 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class GridssRerun method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    final String set = inputs.get("set").inputValue();
    final String tumorSampleName = inputs.get("tumor_sample").inputValue();
    final String referenceSampleName = inputs.get("reference_sample").inputValue();
    final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
    final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
    final InputFileDescriptor runData = inputs.get();
    final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), tumorSampleName);
    InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
    InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
    InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
    InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
    final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
    final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
    final String localTumorFile = localFilename(remoteTumorFile);
    final String localReferenceFile = localFilename(remoteReferenceFile);
    final String tumorBamPath = localTumorFile.replace("cram", "bam");
    final String refBamPath = localReferenceFile.replace("cram", "bam");
    Driver driver = new Driver(resourceFiles, VmDirectories.outputFile(tumorSampleName + ".assembly.bam")).tumorSample(tumorSampleName, tumorBamPath).referenceSample(referenceSampleName, refBamPath);
    GridssAnnotation viralAnnotation = new GridssAnnotation(resourceFiles, false);
    SubStageInputOutput unfilteredVcfOutput = driver.andThen(viralAnnotation).apply(SubStageInputOutput.empty(tumorSampleName));
    final OutputFile unfilteredVcf = unfilteredVcfOutput.outputFile();
    final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
    final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, tumorSampleName);
    final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
    // COMMANDS
    commands.addCommand(new ExportPathCommand(new BwaCommand()));
    commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
    commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
    commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
    commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
    commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
    if (!localTumorFile.equals(tumorBamPath)) {
        commands.addCommands(cramToBam(localTumorFile));
    }
    if (!localReferenceFile.equals(refBamPath)) {
        commands.addCommands(cramToBam(localReferenceFile));
    }
    commands.addCommands(unfilteredVcfOutput.bash());
    commands.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
    commands.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
    return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : OutputFile(com.hartwig.pipeline.execution.vm.OutputFile) ExportPathCommand(com.hartwig.pipeline.execution.vm.unix.ExportPathCommand) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) Driver(com.hartwig.pipeline.calling.structural.gridss.stage.Driver) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) GridssAnnotation(com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi) BwaCommand(com.hartwig.pipeline.calling.command.BwaCommand) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) SamtoolsCommand(com.hartwig.pipeline.calling.command.SamtoolsCommand) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputDownload(com.hartwig.pipeline.execution.vm.InputDownload) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation)

Example 4 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class GripssPurpleLinx method runSample.

private void runSample(final BashStartupScript startupScript, final ResourceFiles resourceFiles, final String sampleId, final Map<String, SampleLocationData> sampleLocationsMap) {
    final SampleLocationData sampleLocations = sampleLocationsMap.containsKey(sampleId) ? sampleLocationsMap.get(sampleId) : SampleLocationData.fromRemoteLocationsApi(sampleId, new RemoteLocationsApi("hmf-crunch", sampleId));
    // download required input files
    String gridssVcf = sampleLocations.localFileRef(sampleLocations.GridssVcf);
    startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.GridssVcf, false));
    // run Gripss
    final StringJoiner gripssArgs = new StringJoiner(" ");
    gripssArgs.add(String.format("-sample %s", sampleId));
    gripssArgs.add(String.format("-reference %s", sampleLocations.ReferenceId));
    gripssArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    gripssArgs.add(String.format("-known_hotspot_file %s", resourceFiles.knownFusionPairBedpe()));
    // VmDirectories.INPUT, PON_BE
    gripssArgs.add(String.format("-pon_sgl_file %s", resourceFiles.gridssBreakendPon()));
    gripssArgs.add(String.format("-pon_sv_file %s", resourceFiles.gridssBreakpointPon()));
    gripssArgs.add(String.format("-vcf %s", gridssVcf));
    gripssArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
    gripssArgs.add(String.format("-log_debug"));
    String gripssJar = String.format("%s/%s", VmDirectories.TOOLS, GRIPSS_JAR);
    // String gripssJar = String.format("%s/gripss/%s/gripss.jar", VmDirectories.TOOLS, Versions.GRIPSS);
    startupScript.addCommand(() -> format("java -Xmx30G -jar %s %s", gripssJar, gripssArgs.toString()));
    final String gripssUnfilteredVcf = String.format("%s/%s.gripss.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final String gripssFilteredVcf = String.format("%s/%s.gripss.filtered.vcf.gz", VmDirectories.OUTPUT, sampleId);
    // Pave somatic
    final String paveSomaticVcf = String.format("%s/%s.sage.somatic.filtered.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
    String sageSomaticVcf = sampleLocations.localFileRef(sampleLocations.SageSomaticVcf);
    startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.SageSomaticVcf, false));
    StringJoiner paveSomaticArgs = new StringJoiner(" ");
    paveSomaticArgs.add(String.format("-sample %s", sampleId));
    paveSomaticArgs.add(String.format("-vcf_file %s", sageSomaticVcf));
    paveSomaticArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    paveSomaticArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
    paveSomaticArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    paveSomaticArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
    paveSomaticArgs.add(String.format("-output_vcf_file %s", paveSomaticVcf));
    // String paveJar = String.format("%s/%s", VmDirectories.TOOLS, PAVE_JAR);
    String paveJar = String.format("%s/pave/%s/pave.jar", VmDirectories.TOOLS, Versions.PAVE);
    startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveSomaticArgs.toString()));
    // Pave germline
    final String paveGermlineVcf = String.format("%s/%s.sage.germline.filtered.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
    String sageGermlineVcf = sampleLocations.localFileRef(sampleLocations.SageGermlineVcf);
    startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.SageGermlineVcf, false));
    StringJoiner paveGermlineArgs = new StringJoiner(" ");
    paveGermlineArgs.add(String.format("-sample %s", sampleId));
    paveGermlineArgs.add(String.format("-vcf_file %s", sageGermlineVcf));
    paveGermlineArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    paveGermlineArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
    paveGermlineArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    paveGermlineArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
    paveGermlineArgs.add(String.format("-output_vcf_file %s", paveGermlineVcf));
    startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveGermlineArgs.toString()));
    // Purple
    // String amberDir = sampleLocations.localFileRef(sampleLocations.Amber);
    // startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.Amber, true));
    String amberDir = VmDirectories.INPUT;
    String amberFiles = String.format("%s/*amber*", sampleLocations.Amber);
    startupScript.addCommand(() -> sampleLocations.formDownloadRequest(amberFiles, false));
    // String cobaltDir = sampleLocations.localFileRef(sampleLocations.Cobalt);
    // startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.Cobalt, true));
    String cobaltDir = VmDirectories.INPUT;
    String cobaltFiles = String.format("%s/*cobalt*", sampleLocations.Cobalt);
    startupScript.addCommand(() -> sampleLocations.formDownloadRequest(cobaltFiles, false));
    final StringJoiner purpleArgs = new StringJoiner(" ");
    purpleArgs.add(String.format("-tumor %s", sampleId));
    purpleArgs.add(String.format("-reference %s", sampleLocations.ReferenceId));
    purpleArgs.add(String.format("-structural_vcf %s", gripssFilteredVcf));
    purpleArgs.add(String.format("-sv_recovery_vcf %s", gripssUnfilteredVcf));
    purpleArgs.add(String.format("-somatic_vcf %s", paveSomaticVcf));
    purpleArgs.add(String.format("-germline_vcf %s", paveGermlineVcf));
    purpleArgs.add(String.format("-amber %s", amberDir));
    purpleArgs.add(String.format("-cobalt %s", cobaltDir));
    purpleArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    purpleArgs.add(String.format("-gc_profile %s", resourceFiles.gcProfileFile()));
    purpleArgs.add(String.format("-somatic_hotspots %s", resourceFiles.sageSomaticHotspots()));
    purpleArgs.add(String.format("-germline_hotspots %s", resourceFiles.sageGermlineHotspots()));
    purpleArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
    purpleArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    purpleArgs.add(String.format("-run_drivers"));
    purpleArgs.add(String.format("-no_charts"));
    purpleArgs.add(String.format("-threads %s", Bash.allCpus()));
    purpleArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
    // String purpleJar = String.format("%s/%s", VmDirectories.TOOLS, PURPLE_JAR);
    String purpleJar = String.format("%s/purple/%s/purple.jar", VmDirectories.TOOLS, Versions.PURPLE);
    startupScript.addCommand(() -> format("java -jar %s %s", purpleJar, purpleArgs.toString()));
    final String purpleSvVcf = String.format("%s/%s.purple.sv.vcf.gz", VmDirectories.OUTPUT, sampleId);
    // Linx
    final StringJoiner linxArgs = new StringJoiner(" ");
    linxArgs.add(String.format("-sample %s", sampleId));
    linxArgs.add(String.format("-sv_vcf %s", purpleSvVcf));
    linxArgs.add(String.format("-purple_dir %s", VmDirectories.OUTPUT));
    linxArgs.add(String.format("-fragile_site_file %s", resourceFiles.fragileSites()));
    linxArgs.add(String.format("-line_element_file %s", resourceFiles.lineElements()));
    linxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    linxArgs.add(String.format("-check_drivers"));
    linxArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
    linxArgs.add(String.format("-check_fusions"));
    linxArgs.add(String.format("-known_fusion_file %s", resourceFiles.knownFusionData()));
    linxArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
    String linxJar = String.format("%s/%s", VmDirectories.TOOLS, LINX_JAR);
    // String linxJar = String.format("%s/linx/%s/linx.jar", VmDirectories.TOOLS, Versions.LINX);
    startupScript.addCommand(() -> format("java -jar %s %s", linxJar, linxArgs.toString()));
}
Also used : SampleLocationData(com.hartwig.batch.utils.SampleLocationData) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 5 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class SageGermline method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String sampleId = descriptor.inputValue();
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
    final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
    String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
    String tumorCramFile = tumorCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    String referenceId = locations.getReference();
    String[] refCramData = getCramFileData(locations.getReferenceAlignment());
    String refCramFile = refCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    // download tumor CRAM
    String localTumorCram = String.format("%s/%s", VmDirectories.INPUT, tumorCramFile);
    String localRefCram = String.format("%s/%s", VmDirectories.INPUT, refCramFile);
    final String sageVcf = String.format("%s/%s.sage.germline.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner sageArgs = new StringJoiner(" ");
    // not the switch on samples
    sageArgs.add(String.format("-tumor %s", referenceId));
    sageArgs.add(String.format("-tumor_bam %s", localRefCram));
    sageArgs.add(String.format("-reference %s", sampleId));
    sageArgs.add(String.format("-reference_bam %s", localTumorCram));
    sageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
    sageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    sageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
    sageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    sageArgs.add(String.format("-hotspots %s", resourceFiles.sageGermlineHotspots()));
    sageArgs.add(String.format("-panel_bed %s", resourceFiles.sageGermlineCodingPanel()));
    sageArgs.add("-panel_only");
    sageArgs.add("-hotspot_min_tumor_qual 50");
    sageArgs.add("-panel_min_tumor_qual 75");
    sageArgs.add("-hotspot_max_germline_vaf 100");
    sageArgs.add("-hotspot_max_germline_rel_raw_base_qual 100");
    sageArgs.add("-panel_max_germline_vaf 100");
    sageArgs.add("-panel_max_germline_rel_raw_base_qual 100");
    sageArgs.add("-mnv_filter_enabled false");
    sageArgs.add(String.format("-out %s", sageVcf));
    sageArgs.add(String.format("-threads %s", Bash.allCpus()));
    startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, sageArgs.toString()));
    // Pave germline
    String paveJar = String.format("%s/pave/%s/pave.jar", VmDirectories.TOOLS, Versions.PAVE);
    final String paveGermlineVcf = String.format("%s/%s.sage.germline.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
    StringJoiner paveGermlineArgs = new StringJoiner(" ");
    paveGermlineArgs.add(String.format("-sample %s", sampleId));
    paveGermlineArgs.add(String.format("-vcf_file %s", sageVcf));
    paveGermlineArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    paveGermlineArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
    paveGermlineArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    paveGermlineArgs.add("-filter_pass");
    paveGermlineArgs.add(String.format("-output_vcf_file %s", paveGermlineVcf));
    startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveGermlineArgs.toString()));
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Aggregations

RemoteLocationsApi (com.hartwig.batch.api.RemoteLocationsApi)11 InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)8 ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)8 OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)7 StringJoiner (java.util.StringJoiner)6 LocalLocations (com.hartwig.batch.api.LocalLocations)3 InputDownload (com.hartwig.pipeline.execution.vm.InputDownload)2 SampleLocationData (com.hartwig.batch.utils.SampleLocationData)1 BwaCommand (com.hartwig.pipeline.calling.command.BwaCommand)1 SamtoolsCommand (com.hartwig.pipeline.calling.command.SamtoolsCommand)1 Driver (com.hartwig.pipeline.calling.structural.gridss.stage.Driver)1 GridssAnnotation (com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)1 OutputFile (com.hartwig.pipeline.execution.vm.OutputFile)1 ExportPathCommand (com.hartwig.pipeline.execution.vm.unix.ExportPathCommand)1 SubStageInputOutput (com.hartwig.pipeline.stages.SubStageInputOutput)1 GoogleStorageLocation (com.hartwig.pipeline.storage.GoogleStorageLocation)1 IOException (java.io.IOException)1 ParseException (org.apache.commons.cli.ParseException)1