use of com.hartwig.pipeline.calling.command.VersionedToolCommand in project pipeline5 by hartwigmedical.
the class RnaRsem method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String batchInputs = descriptor.inputValue();
final String[] batchItems = batchInputs.split(",");
if (batchItems.length != 2) {
System.out.print(String.format("invalid input arguments(%s) - expected SampleId,PathToFastqFiles", batchInputs));
return null;
}
final String sampleId = batchItems[0];
final String fastqFilelist = batchItems[1];
final List<String> sampleFastqFiles = getSampleFastqFileList(sampleId, fastqFilelist);
if (sampleFastqFiles.isEmpty()) {
System.out.print(String.format("sampleId(%s) fastq files not found", sampleId));
return null;
}
// copy down FASTQ files for this sample
for (final String fastqFile : sampleFastqFiles) {
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s %s", fastqFile, VmDirectories.INPUT));
}
// download the executables
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp -r %s/%s %s", RSEM_RESOURCES, RSEM_TOOL, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("chmod a+x %s/%s/*", VmDirectories.TOOLS, RSEM_TOOL));
// locate the FASTQ files for reads 1 and 2
final String r1Files = format("$(ls %s/*_R1* | tr '\\n' ',')", VmDirectories.INPUT);
final String r2Files = format("$(ls %s/*_R2* | tr '\\n' ',')", VmDirectories.INPUT);
// download reference files
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp -r %s/%s %s", RNA_RESOURCES, REF_GENCODE_37_DIR, VmDirectories.INPUT));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp -r %s/%s %s", RSEM_RESOURCES, RSEM_GENE_INDEX_DIR, VmDirectories.INPUT));
startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
// run STAR with transcriptome mapping
final String refGenomeDir = String.format("%s/%s", VmDirectories.INPUT, REF_GENCODE_37_DIR);
final String threadCount = Bash.allCpus();
final String[] starArgs = { "--runThreadN", threadCount, "--genomeDir", refGenomeDir, "--genomeLoad", "NoSharedMemory", "--readFilesIn", r1Files, r2Files, "--readFilesCommand", "zcat", "--outSAMtype", "BAM", "Unsorted", "--outSAMunmapped", "Within", "--outBAMcompression", "0", "--outSAMattributes", "All", "--outFilterMultimapNmax", "10", "--outFilterMismatchNmax", "3", "limitOutSJcollapsed", "3000000", "--chimSegmentMin", "10", "--chimOutType", "WithinBAM", "SoftClip", "--chimJunctionOverhangMin", "10", "--chimSegmentReadGapMax", "3", "--chimScoreMin", "1", "--chimScoreDropMax", "30", "--chimScoreJunctionNonGTAG", "0", "--chimScoreSeparation", "1", "--outFilterScoreMinOverLread", "0.33", "--outFilterMatchNminOverLread", "0.33", "--outFilterMatchNmin", "35", "--alignSplicedMateMapLminOverLmate", "0.33", "--alignSplicedMateMapLmin", "35", "--alignSJstitchMismatchNmax", "5", "-1", "5", "5", "--quantMode", // key line for RSEM;
"TranscriptomeSAM" };
startupScript.addCommand(new VersionedToolCommand("star", "STAR", "2.7.3a", starArgs));
// key output file is 'Aligned.toTranscriptome.out.bam'
// ./tools/RSEM-1.3.3/rsem-calculate-expression --alignments --paired-end
// ./runs/CPCT02020378T/Aligned.toTranscriptome.out.bam
// ./ref/rsem_gene_index/human_gencode
// CPCT02020378T.rsem -p 6 &
final String transcriptomeBam = "Aligned.toTranscriptome.out.bam";
// TMP: copy transcriptome BAM to the bucket
// startupScript.addCommand(() -> format("gsutil -m cp %s/%s gs://rna-cohort/%s/rsem/", VmDirectories.OUTPUT, transcriptomeBam, sampleId));
final String rsemGeneIndex = String.format("%s/%s/%s", VmDirectories.INPUT, RSEM_GENE_INDEX_DIR, RSEM_GENE_INDEX);
final String outputPrefix = String.format("%s.rsem", sampleId);
// run RSEM
StringBuilder rsemArgs = new StringBuilder();
rsemArgs.append(" --alignments");
rsemArgs.append(" --paired-end");
rsemArgs.append(String.format(" %s", transcriptomeBam));
rsemArgs.append(String.format(" %s", rsemGeneIndex));
rsemArgs.append(String.format(" %s", outputPrefix));
rsemArgs.append(String.format(" -p %s", threadCount));
// run RSEM transcript expression calcs
startupScript.addCommand(() -> format("%s/%s/%s %s", VmDirectories.TOOLS, RSEM_TOOL, RSEM_EXPRESSION_CMD, rsemArgs.toString()));
startupScript.addCommand(() -> format("mv %s.rsem.genes.results %s.rsem.gene_data.tsv", sampleId, sampleId));
startupScript.addCommand(() -> format("mv %s.rsem.isoforms.results %s.rsem.trans_data.tsv", sampleId, sampleId));
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "rsem"), executionFlags));
// copy results to rna-analysis location on crunch
startupScript.addCommand(() -> format("gsutil -m cp %s/*tsv %s/%s/rsem/", VmDirectories.OUTPUT, RNA_COHORT_LOCATION_V37, sampleId));
return ImmutableVirtualMachineJobDefinition.builder().name("rna-rsem").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(500).performanceProfile(VirtualMachinePerformanceProfile.custom(12, 36)).build();
}
use of com.hartwig.pipeline.calling.command.VersionedToolCommand in project pipeline5 by hartwigmedical.
the class SambambaCramaBam method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket bucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor input = inputs.get();
String outputFile = VmDirectories.outputFile(new File(input.inputValue()).getName().replaceAll("\\.bam$", ".cram"));
String localInput = String.format("%s/%s", VmDirectories.INPUT, new File(input.inputValue()).getName());
startupScript.addCommand(() -> input.toCommandForm(localInput));
final RefGenome37ResourceFiles resourceFiles = new RefGenome37ResourceFiles();
startupScript.addCommand(new VersionedToolCommand("sambamba", "sambamba", Versions.SAMBAMBA, "view", localInput, "-o", outputFile, "-t", Bash.allCpus(), "--format=cram", "-T", resourceFiles.refGenomeFile()));
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "cram"), executionFlags));
return VirtualMachineJobDefinition.builder().name("cram").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).performanceProfile(VirtualMachinePerformanceProfile.custom(4, 6)).build();
}
Aggregations