use of com.hartwig.pipeline.calling.command.VersionedToolCommand in project pipeline5 by hartwigmedical.
the class RnaStarMapping method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String batchInputs = descriptor.inputValue();
final String[] batchItems = batchInputs.split(",");
// required format: SampleId,RefGenomeVersion (37 by default),FASTA file bucket
/*
if(batchItems.length != 2)
{
System.out.print(String.format("invalid input arguments(%s) - expected SampleId,RefGenomeVersion,FastqFileBucketDir", batchInputs));
return null;
}
*/
final String sampleId = batchItems[0];
final RefGenomeVersion refGenomeVersion = batchItems.length >= 2 ? RefGenomeVersion.valueOf(batchItems[1]) : V37;
final String sampleBucket = batchItems[2];
/*
if(batchItems.length >= 3)
{
final String fastqFilelist = batchItems[2];
final List<String> sampleFastqFiles = getSampleFastqFileList(sampleId, fastqFilelist);
if(sampleFastqFiles.isEmpty()) {
System.out.print(String.format("sampleId(%s) fastq files not found", sampleId));
return null;
}
// copy down FASTQ files for this sample
for(final String fastqFile : sampleFastqFiles)
{
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s %s", fastqFile, VmDirectories.INPUT));
}
}
else
{
// expected location: "gs://cpct02010255tii-rna-reads/1.3/CPCT02010255TII_AHWGLNBGX5_S4_L002_R1_001.fastq.gz
final String sampleFastqFiles = String.format("gs://%s-rna-reads/1.3/*.fastq.gz", sampleId.toLowerCase());
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s %s", sampleFastqFiles, VmDirectories.INPUT));
}
*/
final String sampleFastqFiles = String.format("%s/*.fastq.gz", sampleBucket);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s %s", sampleFastqFiles, VmDirectories.INPUT));
// locate the FASTQ files for reads 1 and 2
final String r1Files = format("$(ls %s/*_R1* | tr '\\n' ',')", VmDirectories.INPUT);
final String r2Files = format("$(ls %s/*_R2* | tr '\\n' ',')", VmDirectories.INPUT);
// copy reference files for STAR
final String starGenomeDir = getRnaResourceDirectory(refGenomeVersion, STAR_DIR);
final String localStarGenomeDir = String.format("%s/%s", VmDirectories.INPUT, STAR_DIR);
startupScript.addCommand(() -> format("mkdir %s", localStarGenomeDir));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s", starGenomeDir, localStarGenomeDir));
final String threadCount = Bash.allCpus();
startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
// run the STAR mapper
final String[] starArgs = { "--runThreadN", threadCount, "--genomeDir", localStarGenomeDir, "--genomeLoad", "NoSharedMemory", "--readFilesIn", r1Files, r2Files, "--readFilesCommand", "zcat", "--outSAMtype", "BAM", "Unsorted", "--outSAMunmapped", "Within", "--outBAMcompression", "0", "--outSAMattributes", "All", "--outFilterMultimapNmax", "10", "--outFilterMismatchNmax", "3", "limitOutSJcollapsed", "3000000", "--chimSegmentMin", "10", "--chimOutType", "WithinBAM", "SoftClip", "--chimJunctionOverhangMin", "10", "--chimSegmentReadGapMax", "3", "--chimScoreMin", "1", "--chimScoreDropMax", "30", "--chimScoreJunctionNonGTAG", "0", "--chimScoreSeparation", "1", "--outFilterScoreMinOverLread", "0.33", "--outFilterMatchNminOverLread", "0.33", "--outFilterMatchNmin", "35", "--alignSplicedMateMapLminOverLmate", "0.33", "--alignSplicedMateMapLmin", "35", "--alignSJstitchMismatchNmax", "5", "-1", "5", "5" };
startupScript.addCommand(new VersionedToolCommand("star", "STAR", "2.7.3a", starArgs));
final String bamFile = "Aligned.out.bam";
// sort the BAM
final String sortedBam = sampleId + ".sorted.bam";
final String[] sortArgs = { "sort", "-@", threadCount, "-m", "2G", "-T", "tmp", "-O", "bam", bamFile, "-o", sortedBam };
startupScript.addCommand(new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, sortArgs));
// mark duplicate fragment reads within the BAM
final String sortedDedupedBam = sampleId + ".sorted.dups.bam";
final String[] dupArgs = { "markdup", "-t", threadCount, "--overflow-list-size=45000000", sortedBam, sortedDedupedBam };
startupScript.addCommand(new SambambaCommand(dupArgs));
final String[] indexArgs = { "index", sortedDedupedBam };
startupScript.addCommand(new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, indexArgs));
// clean up intermediary BAMs
startupScript.addCommand(() -> format("rm -f %s", bamFile));
startupScript.addCommand(() -> format("rm -f %s", sortedBam));
final String starStats = "Log.final.out";
final String statsFile = sampleId + "." + starStats;
startupScript.addCommand(() -> format("mv %s %s", starStats, statsFile));
// run QC stats on the fast-Qs as well
// final String fastqcOutputDir = format("%s/fastqc", VmDirectories.OUTPUT);
// startupScript.addCommand(() -> format("mkdir %s", fastqcOutputDir));
// final String allFastQs = format("%s/*gz", VmDirectories.INPUT);
// final String[] fastqcArgs = {"-o", fastqcOutputDir, allFastQs};
// TEMP until reimage has taken place
// startupScript.addCommand(() -> format("chmod a+x /opt/tools/fastqc/0.11.4/fastqc"));
// startupScript.addCommand(new VersionedToolCommand("fastqc", "fastqc", "0.11.4", fastqcArgs));
// upload the results
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "star"), executionFlags));
// copy results to crunch
final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
startupScript.addCommand(() -> format("gsutil -m cp %s/* %s/%s/", VmDirectories.OUTPUT, samplesDir, sampleId));
return ImmutableVirtualMachineJobDefinition.builder().name("rna-star-mapping").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(500).performanceProfile(VirtualMachinePerformanceProfile.custom(12, 48)).build();
}
use of com.hartwig.pipeline.calling.command.VersionedToolCommand in project pipeline5 by hartwigmedical.
the class SageCreatePonData method cramToBam.
private List<BashCommand> cramToBam(String cram) {
final String output = cram.replace("cram", "bam");
final BashCommand toBam = new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, "view", "-o", output, "-O", "bam", "-@", Bash.allCpus(), cram);
final BashCommand index = new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, "index", "-@", Bash.allCpus(), output);
return Lists.newArrayList(toBam, index);
}
use of com.hartwig.pipeline.calling.command.VersionedToolCommand in project pipeline5 by hartwigmedical.
the class SageRerunOld method cramToBam.
static List<BashCommand> cramToBam(String cram) {
final String output = cram.replace("cram", "bam");
final BashCommand toBam = new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, "view", "-o", output, "-O", "bam", "-@", Bash.allCpus(), cram);
final BashCommand index = new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, "index", "-@", Bash.allCpus(), output);
return Lists.newArrayList(toBam, index);
}
use of com.hartwig.pipeline.calling.command.VersionedToolCommand in project pipeline5 by hartwigmedical.
the class FlagstatGenerator method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket bucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor input = inputs.get();
InputFileDescriptor existingFlagstat = InputFileDescriptor.from(input).withInputValue(input.inputValue().replaceAll("\\.bam$", ".flagstat"));
String localCopyOfOriginalFlagstat = format("%s/%s", VmDirectories.OUTPUT, new File(existingFlagstat.inputValue()).getName());
String outputFile = VmDirectories.outputFile(new File(input.inputValue()).getName().replaceAll("\\.bam$", ".batch.flagstat"));
String localInput = format("%s/%s", VmDirectories.INPUT, new File(input.inputValue()).getName());
startupScript.addCommand(() -> input.toCommandForm(localInput));
startupScript.addCommand(() -> existingFlagstat.toCommandForm(localCopyOfOriginalFlagstat));
startupScript.addCommand(new PipeCommands(new VersionedToolCommand("sambamba", "sambamba", Versions.SAMBAMBA, "flagstat", "-t", Bash.allCpus(), localInput), () -> "tee " + outputFile));
startupScript.addCommand(() -> format("diff %s %s", localCopyOfOriginalFlagstat, outputFile));
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "flagstat"), executionFlags));
return VirtualMachineJobDefinition.builder().name("flagstat").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).performanceProfile(VirtualMachinePerformanceProfile.custom(4, 6)).build();
}
use of com.hartwig.pipeline.calling.command.VersionedToolCommand in project pipeline5 by hartwigmedical.
the class HlaBamSlicer method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs: SampleId,ExpectedAlleles
final InputFileDescriptor runData = inputs.get();
final String batchInputs = runData.inputValue();
final String[] batchItems = batchInputs.split(",");
final String sampleId = batchItems[0];
// final String bamType = batchItems[1];
final String sampleBam = String.format("%s.sorted.dups.bam", sampleId);
final String bamLocation = format("%s/%s/%s", RNA_COHORT_LOCATION_V37, sampleId, sampleBam);
commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s* %s", bamLocation, VmDirectories.INPUT));
// get HLA bed for slicing
commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_RESOURCE_BUCKET, LILAC_DIR, HLA_BED_FILE, VmDirectories.INPUT));
// /opt/tools/sambamba/0.6.8/sambamba view -f bam ./samples/CPCT02020378T/CPCT02020378T.sorted.dups.bam -L /data/lilac/ref/hla.bed
// > ./samples/CPCT02020378T/CPCT02020378T.rna.hla.bam
// download pilot Lilac jar
final String sambamba = "sambamba/0.6.8/sambamba";
final String slicedBam = String.format("%s.hla.bam", sampleId);
commands.addCommand(() -> format("%s/%s slice %s/%s -L %s/%s -o %s/%s", VmDirectories.TOOLS, sambamba, VmDirectories.INPUT, sampleBam, VmDirectories.INPUT, HLA_BED_FILE, VmDirectories.OUTPUT, slicedBam));
// commands.addCommand(() -> format("ls -l %s", VmDirectories.OUTPUT));
final String slicedSortedBam = String.format("%s.rna.hla.bam", sampleId);
// samtools sort -@ 8 -m 2G -T tmp -O bam Aligned.out.bam -o Aligned.sorted.bam
final String[] sortArgs = { "sort", "-@", "8", "-m", "2G", "-T", "tmp", "-O", "bam", String.format("%s/%s", VmDirectories.OUTPUT, slicedBam), "-o", String.format("%s/%s", VmDirectories.OUTPUT, slicedSortedBam) };
commands.addCommand(new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, sortArgs));
// create an index
commands.addCommand(() -> format("%s/%s index %s/%s", VmDirectories.TOOLS, sambamba, VmDirectories.OUTPUT, slicedSortedBam));
// copy the sliced RNA bam back to the HLA BAM directory
final String sampleHlaDir = String.format("gs://%s/%s", HLA_BAMS_BUCKET, sampleId);
commands.addCommand(() -> format("gsutil -m cp %s/%s* %s", VmDirectories.OUTPUT, slicedSortedBam, sampleHlaDir));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Aggregations