Search in sources :

Example 1 with RefGenomeVersion

use of com.hartwig.pipeline.resource.RefGenomeVersion in project pipeline5 by hartwigmedical.

the class RnaStarMapping method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    // required format: SampleId,RefGenomeVersion (37 by default),FASTA file bucket
    /*
        if(batchItems.length != 2)
        {
            System.out.print(String.format("invalid input arguments(%s) - expected SampleId,RefGenomeVersion,FastqFileBucketDir", batchInputs));
            return null;
        }
        */
    final String sampleId = batchItems[0];
    final RefGenomeVersion refGenomeVersion = batchItems.length >= 2 ? RefGenomeVersion.valueOf(batchItems[1]) : V37;
    final String sampleBucket = batchItems[2];
    /*
        if(batchItems.length >= 3)
        {
            final String fastqFilelist = batchItems[2];

            final List<String> sampleFastqFiles = getSampleFastqFileList(sampleId, fastqFilelist);

            if(sampleFastqFiles.isEmpty()) {
                System.out.print(String.format("sampleId(%s) fastq files not found", sampleId));
                return null;
            }

            // copy down FASTQ files for this sample
            for(final String fastqFile : sampleFastqFiles)
            {
                startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s %s", fastqFile, VmDirectories.INPUT));
            }
        }
        else
        {
            // expected location: "gs://cpct02010255tii-rna-reads/1.3/CPCT02010255TII_AHWGLNBGX5_S4_L002_R1_001.fastq.gz
            final String sampleFastqFiles = String.format("gs://%s-rna-reads/1.3/*.fastq.gz", sampleId.toLowerCase());
            startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s %s", sampleFastqFiles, VmDirectories.INPUT));
        }
        */
    final String sampleFastqFiles = String.format("%s/*.fastq.gz", sampleBucket);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s %s", sampleFastqFiles, VmDirectories.INPUT));
    // locate the FASTQ files for reads 1 and 2
    final String r1Files = format("$(ls %s/*_R1* | tr '\\n' ',')", VmDirectories.INPUT);
    final String r2Files = format("$(ls %s/*_R2* | tr '\\n' ',')", VmDirectories.INPUT);
    // copy reference files for STAR
    final String starGenomeDir = getRnaResourceDirectory(refGenomeVersion, STAR_DIR);
    final String localStarGenomeDir = String.format("%s/%s", VmDirectories.INPUT, STAR_DIR);
    startupScript.addCommand(() -> format("mkdir %s", localStarGenomeDir));
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s", starGenomeDir, localStarGenomeDir));
    final String threadCount = Bash.allCpus();
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    // run the STAR mapper
    final String[] starArgs = { "--runThreadN", threadCount, "--genomeDir", localStarGenomeDir, "--genomeLoad", "NoSharedMemory", "--readFilesIn", r1Files, r2Files, "--readFilesCommand", "zcat", "--outSAMtype", "BAM", "Unsorted", "--outSAMunmapped", "Within", "--outBAMcompression", "0", "--outSAMattributes", "All", "--outFilterMultimapNmax", "10", "--outFilterMismatchNmax", "3", "limitOutSJcollapsed", "3000000", "--chimSegmentMin", "10", "--chimOutType", "WithinBAM", "SoftClip", "--chimJunctionOverhangMin", "10", "--chimSegmentReadGapMax", "3", "--chimScoreMin", "1", "--chimScoreDropMax", "30", "--chimScoreJunctionNonGTAG", "0", "--chimScoreSeparation", "1", "--outFilterScoreMinOverLread", "0.33", "--outFilterMatchNminOverLread", "0.33", "--outFilterMatchNmin", "35", "--alignSplicedMateMapLminOverLmate", "0.33", "--alignSplicedMateMapLmin", "35", "--alignSJstitchMismatchNmax", "5", "-1", "5", "5" };
    startupScript.addCommand(new VersionedToolCommand("star", "STAR", "2.7.3a", starArgs));
    final String bamFile = "Aligned.out.bam";
    // sort the BAM
    final String sortedBam = sampleId + ".sorted.bam";
    final String[] sortArgs = { "sort", "-@", threadCount, "-m", "2G", "-T", "tmp", "-O", "bam", bamFile, "-o", sortedBam };
    startupScript.addCommand(new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, sortArgs));
    // mark duplicate fragment reads within the BAM
    final String sortedDedupedBam = sampleId + ".sorted.dups.bam";
    final String[] dupArgs = { "markdup", "-t", threadCount, "--overflow-list-size=45000000", sortedBam, sortedDedupedBam };
    startupScript.addCommand(new SambambaCommand(dupArgs));
    final String[] indexArgs = { "index", sortedDedupedBam };
    startupScript.addCommand(new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, indexArgs));
    // clean up intermediary BAMs
    startupScript.addCommand(() -> format("rm -f %s", bamFile));
    startupScript.addCommand(() -> format("rm -f %s", sortedBam));
    final String starStats = "Log.final.out";
    final String statsFile = sampleId + "." + starStats;
    startupScript.addCommand(() -> format("mv %s %s", starStats, statsFile));
    // run QC stats on the fast-Qs as well
    // final String fastqcOutputDir = format("%s/fastqc", VmDirectories.OUTPUT);
    // startupScript.addCommand(() -> format("mkdir %s", fastqcOutputDir));
    // final String allFastQs = format("%s/*gz", VmDirectories.INPUT);
    // final String[] fastqcArgs = {"-o", fastqcOutputDir, allFastQs};
    // TEMP until reimage has taken place
    // startupScript.addCommand(() -> format("chmod a+x /opt/tools/fastqc/0.11.4/fastqc"));
    // startupScript.addCommand(new VersionedToolCommand("fastqc", "fastqc", "0.11.4", fastqcArgs));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "star"), executionFlags));
    // copy results to crunch
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    startupScript.addCommand(() -> format("gsutil -m cp %s/* %s/%s/", VmDirectories.OUTPUT, samplesDir, sampleId));
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-star-mapping").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(500).performanceProfile(VirtualMachinePerformanceProfile.custom(12, 48)).build();
}
Also used : OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) SambambaCommand(com.hartwig.pipeline.execution.vm.SambambaCommand) VersionedToolCommand(com.hartwig.pipeline.calling.command.VersionedToolCommand)

Example 2 with RefGenomeVersion

use of com.hartwig.pipeline.resource.RefGenomeVersion in project pipeline5 by hartwigmedical.

the class RnaIsofox method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    if (batchItems.length < 2) {
        System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
        return null;
    }
    final String sampleId = batchItems[COL_SAMPLE_ID];
    final String readLength = batchItems[COL_READ_LENGTH];
    final String functionsStr = batchItems.length > COL_FUNCTIONS ? batchItems[COL_FUNCTIONS] : FUNC_TRANSCRIPT_COUNTS + ";" + FUNC_NOVEL_LOCATIONS + ";" + FUNC_FUSIONS;
    final RefGenomeVersion refGenomeVersion = batchItems.length > COL_REF_GENOME_VERSION ? RefGenomeVersion.valueOf(batchItems[COL_REF_GENOME_VERSION]) : V37;
    final int maxRam = batchItems.length > COL_MAX_RAM ? Integer.parseInt(batchItems[COL_MAX_RAM]) : DEFAULT_MAX_RAM;
    final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
    // final String rnaCohortDirectory = getRnaCohortDirectory(refGenomeVersion);
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    // copy down BAM and index file for this sample
    final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
    final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
    // copy down the executable
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
    // startupScript.addCommand(() -> format("chmod a+x %s/%s", VmDirectories.TOOLS, ISOFOX_JAR));
    // copy down required reference files
    // startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s",
    // getRnaResourceDirectory(refGenomeVersion, ENSEMBL_DATA_CACHE), VmDirectories.INPUT));
    final String expectedCountsFile = readLength.equals(READ_LENGTH_76) ? EXP_COUNTS_READ_76 : EXP_COUNTS_READ_151;
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s", getRnaResourceDirectory(refGenomeVersion, "ensembl_data_cache"), VmDirectories.INPUT));
    if (functionsStr.contains(FUNC_TRANSCRIPT_COUNTS)) {
        startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), expectedCountsFile, VmDirectories.INPUT));
        startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), EXP_GC_COUNTS_READ_100, VmDirectories.INPUT));
    }
    if (functionsStr.equals(FUNC_FUSIONS)) {
        startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), COHORT_FUSION_FILE, VmDirectories.INPUT));
    }
    final String threadCount = Bash.allCpus();
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    boolean writeExpData = false;
    boolean writeCatCountsData = false;
    final String neoEpitopeFile = String.format("%s.imu.neo_epitopes.csv", sampleId);
    if (functionsStr.contains(FUNC_NEO_EPITOPES)) {
        startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", NEO_EPITOPE_DIR, neoEpitopeFile, VmDirectories.INPUT));
    }
    // run Isofox
    StringJoiner isofoxArgs = new StringJoiner(" ");
    isofoxArgs.add(String.format("-sample %s", sampleId));
    isofoxArgs.add(String.format("-functions \"%s\"", functionsStr));
    isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
    isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
    isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    // isofoxArgs.add(String.format("-ensembl_data_dir %s", VmDirectories.INPUT));
    isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    isofoxArgs.add(String.format("-long_frag_limit %d", LONG_FRAG_LENGTH_LIMIT));
    if (refGenomeVersion == RefGenomeVersion.V38) {
        isofoxArgs.add(String.format("-ref_genome_version %s", "38"));
    }
    if (functionsStr.contains(FUNC_TRANSCRIPT_COUNTS)) {
        isofoxArgs.add(String.format("-apply_exp_rates"));
        isofoxArgs.add(String.format("-apply_calc_frag_lengths"));
        isofoxArgs.add(String.format("-exp_counts_file %s/%s", VmDirectories.INPUT, expectedCountsFile));
        isofoxArgs.add(String.format("-frag_length_min_count %d", FRAG_LENGTH_FRAG_COUNT));
        isofoxArgs.add(String.format("-apply_gc_bias_adjust"));
        isofoxArgs.add(String.format("-exp_gc_ratios_file %s/%s", VmDirectories.INPUT, EXP_GC_COUNTS_READ_100));
        isofoxArgs.add(String.format("-apply_map_qual_adjust"));
        isofoxArgs.add(String.format("-write_frag_lengths"));
        isofoxArgs.add(String.format("-write_gc_data"));
        if (writeCatCountsData)
            isofoxArgs.add(String.format("-write_trans_combo_data"));
        if (writeExpData)
            isofoxArgs.add(String.format("-write_exp_rates"));
    }
    if (functionsStr.equals(FUNC_NOVEL_LOCATIONS)) {
        isofoxArgs.add(String.format("-write_splice_sites"));
    }
    if (functionsStr.contains(FUNC_FUSIONS)) {
        isofoxArgs.add(String.format("-known_fusion_file %s", resourceFiles.knownFusionData()));
        isofoxArgs.add(String.format("-fusion_cohort_file %s/%s", VmDirectories.INPUT, COHORT_FUSION_FILE));
    }
    if (functionsStr.equals(FUNC_NEO_EPITOPES)) {
        isofoxArgs.add(String.format("-neoepitope_file %s/%s", VmDirectories.INPUT, neoEpitopeFile));
    }
    isofoxArgs.add(String.format("-threads %s", threadCount));
    startupScript.addCommand(() -> format("java -Xmx60G -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
    if (functionsStr.equals(FUNC_FUSIONS)) {
        startupScript.addCommand(() -> format("gsutil -m cp %s/*fusions.csv %s/%s/isofox/", VmDirectories.OUTPUT, samplesDir, sampleId));
    } else {
        // copy results to rna-analysis location on crunch
        startupScript.addCommand(() -> format("gsutil -m cp %s/* %s/%s/isofox/", VmDirectories.OUTPUT, samplesDir, sampleId));
    }
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).performanceProfile(VirtualMachinePerformanceProfile.custom(DEFAULT_CORES, maxRam)).build();
}
Also used : ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) StringJoiner(java.util.StringJoiner)

Example 3 with RefGenomeVersion

use of com.hartwig.pipeline.resource.RefGenomeVersion in project pipeline5 by hartwigmedical.

the class RnaIsofoxUnmapped method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    if (batchItems.length < 2) {
        System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
        return null;
    }
    final String sampleId = batchItems[COL_SAMPLE_ID];
    final RefGenomeVersion refGenomeVersion = V37;
    final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    // copy down BAM and index file for this sample
    final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
    final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
    // copy down the executable
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    // run Isofox
    StringJoiner isofoxArgs = new StringJoiner(" ");
    isofoxArgs.add(String.format("-sample %s", sampleId));
    isofoxArgs.add(String.format("-functions UNMAPPED_READS"));
    isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
    isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
    isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    final String threadCount = Bash.allCpus();
    isofoxArgs.add(String.format("-threads %s", threadCount));
    startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
Also used : ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) StringJoiner(java.util.StringJoiner)

Example 4 with RefGenomeVersion

use of com.hartwig.pipeline.resource.RefGenomeVersion in project pipeline5 by hartwigmedical.

the class RnaIsofoxExonCounts method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    if (batchItems.length < 2) {
        System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
        return null;
    }
    final String sampleId = batchItems[COL_SAMPLE_ID];
    final String geneIds = batchItems[COL_GENE_IDS];
    final RefGenomeVersion refGenomeVersion = V37;
    final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
    // final String rnaCohortDirectory = getRnaCohortDirectory(refGenomeVersion);
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    // copy down BAM and index file for this sample
    final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
    final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
    // copy down the Isofox JAR
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    // run Isofox
    StringJoiner isofoxArgs = new StringJoiner(" ");
    isofoxArgs.add(String.format("-sample %s", sampleId));
    isofoxArgs.add(String.format("-functions %s", FUNC_TRANSCRIPT_COUNTS));
    isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
    isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
    isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    isofoxArgs.add(String.format("-write_exon_data"));
    // isofoxArgs.add(String.format("-write_read_data"));
    isofoxArgs.add(String.format("-restricted_gene_ids %s", geneIds));
    startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
Also used : ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) StringJoiner(java.util.StringJoiner)

Example 5 with RefGenomeVersion

use of com.hartwig.pipeline.resource.RefGenomeVersion in project pipeline5 by hartwigmedical.

the class RnaIsofoxSpliceJunctions method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    if (batchItems.length < 2) {
        System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
        return null;
    }
    final String sampleId = batchItems[COL_SAMPLE_ID];
    final String geneIds = batchItems[COL_GENE_IDS];
    final RefGenomeVersion refGenomeVersion = V37;
    final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    // copy down BAM and index file for this sample
    final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
    final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
    // copy down the executable
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    // run Isofox
    StringJoiner isofoxArgs = new StringJoiner(" ");
    isofoxArgs.add(String.format("-sample %s", sampleId));
    isofoxArgs.add(String.format("-functions %s", FUNC_TRANSCRIPT_COUNTS));
    isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
    isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
    isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    isofoxArgs.add(String.format("-write_exon_data"));
    isofoxArgs.add(String.format("-restricted_gene_ids %s", geneIds));
    isofoxArgs.add(" -output_id gene_sj");
    startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
Also used : ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) StringJoiner(java.util.StringJoiner)

Aggregations

InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)5 OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)5 RefGenomeVersion (com.hartwig.pipeline.resource.RefGenomeVersion)5 ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)4 ResourceFilesFactory.buildResourceFiles (com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles)4 StringJoiner (java.util.StringJoiner)4 VersionedToolCommand (com.hartwig.pipeline.calling.command.VersionedToolCommand)1 SambambaCommand (com.hartwig.pipeline.execution.vm.SambambaCommand)1