Search in sources :

Example 6 with OutputUpload

use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.

the class GridssRerun method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    final String set = inputs.get("set").inputValue();
    final String tumorSampleName = inputs.get("tumor_sample").inputValue();
    final String referenceSampleName = inputs.get("reference_sample").inputValue();
    final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
    final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
    final InputFileDescriptor runData = inputs.get();
    final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), tumorSampleName);
    InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
    InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
    InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
    InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
    final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
    final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
    final String localTumorFile = localFilename(remoteTumorFile);
    final String localReferenceFile = localFilename(remoteReferenceFile);
    final String tumorBamPath = localTumorFile.replace("cram", "bam");
    final String refBamPath = localReferenceFile.replace("cram", "bam");
    Driver driver = new Driver(resourceFiles, VmDirectories.outputFile(tumorSampleName + ".assembly.bam")).tumorSample(tumorSampleName, tumorBamPath).referenceSample(referenceSampleName, refBamPath);
    GridssAnnotation viralAnnotation = new GridssAnnotation(resourceFiles, false);
    SubStageInputOutput unfilteredVcfOutput = driver.andThen(viralAnnotation).apply(SubStageInputOutput.empty(tumorSampleName));
    final OutputFile unfilteredVcf = unfilteredVcfOutput.outputFile();
    final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
    final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, tumorSampleName);
    final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
    // COMMANDS
    commands.addCommand(new ExportPathCommand(new BwaCommand()));
    commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
    commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
    commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
    commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
    commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
    if (!localTumorFile.equals(tumorBamPath)) {
        commands.addCommands(cramToBam(localTumorFile));
    }
    if (!localReferenceFile.equals(refBamPath)) {
        commands.addCommands(cramToBam(localReferenceFile));
    }
    commands.addCommands(unfilteredVcfOutput.bash());
    commands.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
    commands.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
    return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : OutputFile(com.hartwig.pipeline.execution.vm.OutputFile) ExportPathCommand(com.hartwig.pipeline.execution.vm.unix.ExportPathCommand) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) Driver(com.hartwig.pipeline.calling.structural.gridss.stage.Driver) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) GridssAnnotation(com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi) BwaCommand(com.hartwig.pipeline.calling.command.BwaCommand) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) SamtoolsCommand(com.hartwig.pipeline.calling.command.SamtoolsCommand) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputDownload(com.hartwig.pipeline.execution.vm.InputDownload) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation)

Example 7 with OutputUpload

use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.

the class GripssPurpleLinx method execute.

// private static String PON_BP = "gridss_pon_breakpoint.37.sorted.bedpe";
// private static String PON_BE = "gridss_pon_single_breakend.37.sorted.bed";
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String[] inputArguments = descriptor.inputValue().split(",");
    final List<String> sampleIds = Arrays.stream(inputArguments[0].split(";")).collect(Collectors.toList());
    Map<String, SampleLocationData> sampleLocations = null;
    if (inputArguments.length > 1) {
        sampleLocations = SampleLocationData.loadSampleLocations(inputArguments[1], sampleIds);
    } else {
        sampleLocations = Maps.newHashMap();
    }
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    // download required JARs and resources
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, GRIPSS_DIR, GRIPSS_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, PURPLE_DIR, PURPLE_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, LINX_DIR, LINX_JAR, VmDirectories.TOOLS));
    for (String sampleId : sampleIds) {
        runSample(startupScript, resourceFiles, sampleId, sampleLocations);
    }
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gpl"), executionFlags));
    // and copy the key output files to a single directory for convenience
    String gripssCombined = String.format("%s/gripss/", COMBINED_OUTPUT_DIR);
    String linxCombined = String.format("%s/linx/", COMBINED_OUTPUT_DIR);
    String purpleCombined = String.format("%s/purple/", COMBINED_OUTPUT_DIR);
    String paveCombined = String.format("%s/pave/", COMBINED_OUTPUT_DIR);
    startupScript.addCommand(() -> format("gsutil -m cp %s/*gripss*vcf* %s", VmDirectories.OUTPUT, gripssCombined));
    startupScript.addCommand(() -> format("gsutil -m cp %s/*sage.somatic.filtered.pave.vcf.gz* %s", VmDirectories.OUTPUT, paveCombined));
    startupScript.addCommand(() -> format("gsutil -m cp %s/*sage.germline.filtered.pave.vcf.gz* %s", VmDirectories.OUTPUT, paveCombined));
    // select files for subsequent Linx runs and/or comparison using Compar
    startupScript.addCommand(() -> format("gsutil -m cp %s/*linx*.tsv %s", VmDirectories.OUTPUT, linxCombined));
    startupScript.addCommand(() -> format("gsutil -m cp %s/*purple* %s", VmDirectories.OUTPUT, purpleCombined));
    startupScript.addCommand(() -> format("gsutil -m cp %s/*driver.catalog* %s", VmDirectories.OUTPUT, purpleCombined));
    return ImmutableVirtualMachineJobDefinition.builder().name("gpl").startupCommand(startupScript).performanceProfile(custom(12, 32)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) SampleLocationData(com.hartwig.batch.utils.SampleLocationData)

Example 8 with OutputUpload

use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.

the class SageGermline method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String sampleId = descriptor.inputValue();
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
    final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
    String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
    String tumorCramFile = tumorCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    String referenceId = locations.getReference();
    String[] refCramData = getCramFileData(locations.getReferenceAlignment());
    String refCramFile = refCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    // download tumor CRAM
    String localTumorCram = String.format("%s/%s", VmDirectories.INPUT, tumorCramFile);
    String localRefCram = String.format("%s/%s", VmDirectories.INPUT, refCramFile);
    final String sageVcf = String.format("%s/%s.sage.germline.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner sageArgs = new StringJoiner(" ");
    // not the switch on samples
    sageArgs.add(String.format("-tumor %s", referenceId));
    sageArgs.add(String.format("-tumor_bam %s", localRefCram));
    sageArgs.add(String.format("-reference %s", sampleId));
    sageArgs.add(String.format("-reference_bam %s", localTumorCram));
    sageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
    sageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    sageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
    sageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    sageArgs.add(String.format("-hotspots %s", resourceFiles.sageGermlineHotspots()));
    sageArgs.add(String.format("-panel_bed %s", resourceFiles.sageGermlineCodingPanel()));
    sageArgs.add("-panel_only");
    sageArgs.add("-hotspot_min_tumor_qual 50");
    sageArgs.add("-panel_min_tumor_qual 75");
    sageArgs.add("-hotspot_max_germline_vaf 100");
    sageArgs.add("-hotspot_max_germline_rel_raw_base_qual 100");
    sageArgs.add("-panel_max_germline_vaf 100");
    sageArgs.add("-panel_max_germline_rel_raw_base_qual 100");
    sageArgs.add("-mnv_filter_enabled false");
    sageArgs.add(String.format("-out %s", sageVcf));
    sageArgs.add(String.format("-threads %s", Bash.allCpus()));
    startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, sageArgs.toString()));
    // Pave germline
    String paveJar = String.format("%s/pave/%s/pave.jar", VmDirectories.TOOLS, Versions.PAVE);
    final String paveGermlineVcf = String.format("%s/%s.sage.germline.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
    StringJoiner paveGermlineArgs = new StringJoiner(" ");
    paveGermlineArgs.add(String.format("-sample %s", sampleId));
    paveGermlineArgs.add(String.format("-vcf_file %s", sageVcf));
    paveGermlineArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    paveGermlineArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
    paveGermlineArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    paveGermlineArgs.add("-filter_pass");
    paveGermlineArgs.add(String.format("-output_vcf_file %s", paveGermlineVcf));
    startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveGermlineArgs.toString()));
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 9 with OutputUpload

use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.

the class SageRerunOld method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final String set = inputs.get("set").inputValue();
    final String tumorSampleName = inputs.get("tumor_sample").inputValue();
    final String referenceSampleName = inputs.get("ref_sample").inputValue();
    final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
    final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
    final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
    final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
    final String localTumorFile = localFilename(remoteTumorFile);
    final String localReferenceFile = localFilename(remoteReferenceFile);
    final String localTumorBam = CONVERT_TO_BAM ? localTumorFile.replace("cram", "bam") : localTumorFile;
    final String localReferenceBam = CONVERT_TO_BAM ? localReferenceFile.replace("cram", "bam") : localReferenceFile;
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    // Download tumor
    commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
    commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
    // Download normal
    commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
    commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
    final SageCommandBuilder sageCommandBuilder = new SageCommandBuilder(resourceFiles).addReference(referenceSampleName, localReferenceBam).addTumor(tumorSampleName, localTumorBam);
    if (PANEL_ONLY) {
        sageCommandBuilder.panelOnly();
    }
    if (inputs.contains("rna")) {
        final InputFileDescriptor remoteRnaBam = inputs.get("rna");
        final InputFileDescriptor remoteRnaBamIndex = remoteRnaBam.index();
        final String localRnaBam = localFilename(remoteRnaBam);
        // Download rna
        commands.addCommand(() -> remoteRnaBam.toCommandForm(localRnaBam));
        commands.addCommand(() -> remoteRnaBamIndex.toCommandForm(localFilename(remoteRnaBamIndex)));
        // Add to sage application
        sageCommandBuilder.addReference(referenceSampleName + "NA", localRnaBam);
    }
    // Convert to bam if necessary
    if (!localTumorFile.equals(localTumorBam)) {
        commands.addCommands(cramToBam(localTumorFile));
    }
    if (!localReferenceFile.equals(localReferenceBam)) {
        commands.addCommands(cramToBam(localReferenceFile));
    }
    SageApplication sageApplication = new SageApplication(sageCommandBuilder);
    SageSomaticPostProcess sagePostProcess = new SageSomaticPostProcess(tumorSampleName, resourceFiles);
    SubStageInputOutput sageOutput = sageApplication.andThen(sagePostProcess).apply(SubStageInputOutput.empty(tumorSampleName));
    commands.addCommands(sageOutput.bash());
    // 8. Archive targeted output
    final GoogleStorageLocation archiveStorageLocation = sageArchiveDirectory(set);
    final OutputFile filteredOutputFile = sageOutput.outputFile();
    final OutputFile filteredOutputFileIndex = filteredOutputFile.index(".tbi");
    final OutputFile unfilteredOutputFile = sageApplication.apply(SubStageInputOutput.empty(tumorSampleName)).outputFile();
    final OutputFile unfilteredOutputFileIndex = unfilteredOutputFile.index(".tbi");
    commands.addCommand(() -> filteredOutputFile.copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> filteredOutputFileIndex.copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> unfilteredOutputFile.copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> unfilteredOutputFileIndex.copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> bqrFile(tumorSampleName, "png").copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> bqrFile(tumorSampleName, "tsv").copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> bqrFile(referenceSampleName, "png").copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> bqrFile(referenceSampleName, "tsv").copyToRemoteLocation(archiveStorageLocation));
    // Store output
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
    return VirtualMachineJobDefinition.sageSomaticCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : OutputFile(com.hartwig.pipeline.execution.vm.OutputFile) ImmutableOutputFile(com.hartwig.pipeline.execution.vm.ImmutableOutputFile) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) SageSomaticPostProcess(com.hartwig.pipeline.calling.sage.SageSomaticPostProcess) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) SageCommandBuilder(com.hartwig.pipeline.calling.sage.SageCommandBuilder) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation) SageApplication(com.hartwig.pipeline.calling.sage.SageApplication)

Example 10 with OutputUpload

use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.

the class RnaIsofox method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    if (batchItems.length < 2) {
        System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
        return null;
    }
    final String sampleId = batchItems[COL_SAMPLE_ID];
    final String readLength = batchItems[COL_READ_LENGTH];
    final String functionsStr = batchItems.length > COL_FUNCTIONS ? batchItems[COL_FUNCTIONS] : FUNC_TRANSCRIPT_COUNTS + ";" + FUNC_NOVEL_LOCATIONS + ";" + FUNC_FUSIONS;
    final RefGenomeVersion refGenomeVersion = batchItems.length > COL_REF_GENOME_VERSION ? RefGenomeVersion.valueOf(batchItems[COL_REF_GENOME_VERSION]) : V37;
    final int maxRam = batchItems.length > COL_MAX_RAM ? Integer.parseInt(batchItems[COL_MAX_RAM]) : DEFAULT_MAX_RAM;
    final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
    // final String rnaCohortDirectory = getRnaCohortDirectory(refGenomeVersion);
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    // copy down BAM and index file for this sample
    final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
    final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
    // copy down the executable
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
    // startupScript.addCommand(() -> format("chmod a+x %s/%s", VmDirectories.TOOLS, ISOFOX_JAR));
    // copy down required reference files
    // startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s",
    // getRnaResourceDirectory(refGenomeVersion, ENSEMBL_DATA_CACHE), VmDirectories.INPUT));
    final String expectedCountsFile = readLength.equals(READ_LENGTH_76) ? EXP_COUNTS_READ_76 : EXP_COUNTS_READ_151;
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s", getRnaResourceDirectory(refGenomeVersion, "ensembl_data_cache"), VmDirectories.INPUT));
    if (functionsStr.contains(FUNC_TRANSCRIPT_COUNTS)) {
        startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), expectedCountsFile, VmDirectories.INPUT));
        startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), EXP_GC_COUNTS_READ_100, VmDirectories.INPUT));
    }
    if (functionsStr.equals(FUNC_FUSIONS)) {
        startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), COHORT_FUSION_FILE, VmDirectories.INPUT));
    }
    final String threadCount = Bash.allCpus();
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    boolean writeExpData = false;
    boolean writeCatCountsData = false;
    final String neoEpitopeFile = String.format("%s.imu.neo_epitopes.csv", sampleId);
    if (functionsStr.contains(FUNC_NEO_EPITOPES)) {
        startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", NEO_EPITOPE_DIR, neoEpitopeFile, VmDirectories.INPUT));
    }
    // run Isofox
    StringJoiner isofoxArgs = new StringJoiner(" ");
    isofoxArgs.add(String.format("-sample %s", sampleId));
    isofoxArgs.add(String.format("-functions \"%s\"", functionsStr));
    isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
    isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
    isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    // isofoxArgs.add(String.format("-ensembl_data_dir %s", VmDirectories.INPUT));
    isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    isofoxArgs.add(String.format("-long_frag_limit %d", LONG_FRAG_LENGTH_LIMIT));
    if (refGenomeVersion == RefGenomeVersion.V38) {
        isofoxArgs.add(String.format("-ref_genome_version %s", "38"));
    }
    if (functionsStr.contains(FUNC_TRANSCRIPT_COUNTS)) {
        isofoxArgs.add(String.format("-apply_exp_rates"));
        isofoxArgs.add(String.format("-apply_calc_frag_lengths"));
        isofoxArgs.add(String.format("-exp_counts_file %s/%s", VmDirectories.INPUT, expectedCountsFile));
        isofoxArgs.add(String.format("-frag_length_min_count %d", FRAG_LENGTH_FRAG_COUNT));
        isofoxArgs.add(String.format("-apply_gc_bias_adjust"));
        isofoxArgs.add(String.format("-exp_gc_ratios_file %s/%s", VmDirectories.INPUT, EXP_GC_COUNTS_READ_100));
        isofoxArgs.add(String.format("-apply_map_qual_adjust"));
        isofoxArgs.add(String.format("-write_frag_lengths"));
        isofoxArgs.add(String.format("-write_gc_data"));
        if (writeCatCountsData)
            isofoxArgs.add(String.format("-write_trans_combo_data"));
        if (writeExpData)
            isofoxArgs.add(String.format("-write_exp_rates"));
    }
    if (functionsStr.equals(FUNC_NOVEL_LOCATIONS)) {
        isofoxArgs.add(String.format("-write_splice_sites"));
    }
    if (functionsStr.contains(FUNC_FUSIONS)) {
        isofoxArgs.add(String.format("-known_fusion_file %s", resourceFiles.knownFusionData()));
        isofoxArgs.add(String.format("-fusion_cohort_file %s/%s", VmDirectories.INPUT, COHORT_FUSION_FILE));
    }
    if (functionsStr.equals(FUNC_NEO_EPITOPES)) {
        isofoxArgs.add(String.format("-neoepitope_file %s/%s", VmDirectories.INPUT, neoEpitopeFile));
    }
    isofoxArgs.add(String.format("-threads %s", threadCount));
    startupScript.addCommand(() -> format("java -Xmx60G -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
    if (functionsStr.equals(FUNC_FUSIONS)) {
        startupScript.addCommand(() -> format("gsutil -m cp %s/*fusions.csv %s/%s/isofox/", VmDirectories.OUTPUT, samplesDir, sampleId));
    } else {
        // copy results to rna-analysis location on crunch
        startupScript.addCommand(() -> format("gsutil -m cp %s/* %s/%s/isofox/", VmDirectories.OUTPUT, samplesDir, sampleId));
    }
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).performanceProfile(VirtualMachinePerformanceProfile.custom(DEFAULT_CORES, maxRam)).build();
}
Also used : ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) StringJoiner(java.util.StringJoiner)

Aggregations

OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)40 InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)35 ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)24 StringJoiner (java.util.StringJoiner)12 GoogleStorageLocation (com.hartwig.pipeline.storage.GoogleStorageLocation)9 RemoteLocationsApi (com.hartwig.batch.api.RemoteLocationsApi)7 CopyLogToOutput (com.hartwig.pipeline.execution.vm.CopyLogToOutput)6 VersionedToolCommand (com.hartwig.pipeline.calling.command.VersionedToolCommand)5 RefGenomeVersion (com.hartwig.pipeline.resource.RefGenomeVersion)5 ResourceFilesFactory.buildResourceFiles (com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles)5 SubStageInputOutput (com.hartwig.pipeline.stages.SubStageInputOutput)5 File (java.io.File)5 BwaCommand (com.hartwig.pipeline.calling.command.BwaCommand)3 SamtoolsCommand (com.hartwig.pipeline.calling.command.SamtoolsCommand)3 InputDownload (com.hartwig.pipeline.execution.vm.InputDownload)3 OutputFile (com.hartwig.pipeline.execution.vm.OutputFile)3 SageApplication (com.hartwig.pipeline.calling.sage.SageApplication)2 SageCommandBuilder (com.hartwig.pipeline.calling.sage.SageCommandBuilder)2 GridssAnnotation (com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)2 PipelineStatus (com.hartwig.pipeline.execution.PipelineStatus)2