use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class HlaBamSlicer method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs: SampleId,ExpectedAlleles
final InputFileDescriptor runData = inputs.get();
final String batchInputs = runData.inputValue();
final String[] batchItems = batchInputs.split(",");
final String sampleId = batchItems[0];
// final String bamType = batchItems[1];
final String sampleBam = String.format("%s.sorted.dups.bam", sampleId);
final String bamLocation = format("%s/%s/%s", RNA_COHORT_LOCATION_V37, sampleId, sampleBam);
commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s* %s", bamLocation, VmDirectories.INPUT));
// get HLA bed for slicing
commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_RESOURCE_BUCKET, LILAC_DIR, HLA_BED_FILE, VmDirectories.INPUT));
// /opt/tools/sambamba/0.6.8/sambamba view -f bam ./samples/CPCT02020378T/CPCT02020378T.sorted.dups.bam -L /data/lilac/ref/hla.bed
// > ./samples/CPCT02020378T/CPCT02020378T.rna.hla.bam
// download pilot Lilac jar
final String sambamba = "sambamba/0.6.8/sambamba";
final String slicedBam = String.format("%s.hla.bam", sampleId);
commands.addCommand(() -> format("%s/%s slice %s/%s -L %s/%s -o %s/%s", VmDirectories.TOOLS, sambamba, VmDirectories.INPUT, sampleBam, VmDirectories.INPUT, HLA_BED_FILE, VmDirectories.OUTPUT, slicedBam));
// commands.addCommand(() -> format("ls -l %s", VmDirectories.OUTPUT));
final String slicedSortedBam = String.format("%s.rna.hla.bam", sampleId);
// samtools sort -@ 8 -m 2G -T tmp -O bam Aligned.out.bam -o Aligned.sorted.bam
final String[] sortArgs = { "sort", "-@", "8", "-m", "2G", "-T", "tmp", "-O", "bam", String.format("%s/%s", VmDirectories.OUTPUT, slicedBam), "-o", String.format("%s/%s", VmDirectories.OUTPUT, slicedSortedBam) };
commands.addCommand(new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, sortArgs));
// create an index
commands.addCommand(() -> format("%s/%s index %s/%s", VmDirectories.TOOLS, sambamba, VmDirectories.OUTPUT, slicedSortedBam));
// copy the sliced RNA bam back to the HLA BAM directory
final String sampleHlaDir = String.format("gs://%s/%s", HLA_BAMS_BUCKET, sampleId);
commands.addCommand(() -> format("gsutil -m cp %s/%s* %s", VmDirectories.OUTPUT, slicedSortedBam, sampleHlaDir));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class SamtoolsBamToCram method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket bucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor input = inputs.get();
String outputFile = VmDirectories.outputFile(new File(input.inputValue()).getName().replaceAll("\\.bam$", ".cram"));
String localInput = format("%s/%s", VmDirectories.INPUT, new File(input.inputValue()).getName());
startupScript.addCommand(() -> input.toCommandForm(localInput));
startupScript.addCommands(new CramAndValidateCommands(localInput, outputFile, new RefGenome37ResourceFiles()).commands());
startupScript.addCommand(new MvCommand("/data/output/*.bam", "/data/tmp"));
startupScript.addCommand(new MvCommand("/data/output/*.bam.flagstat", "/data/tmp"));
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "samtools"), executionFlags));
return VirtualMachineJobDefinition.builder().name("samtoolscram").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(650).performanceProfile(VirtualMachinePerformanceProfile.custom(6, 6)).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class TeloBatch method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
final String sampleId = inputs.get("sampleId").inputValue();
Optional<String> specificChromosome = Optional.empty();
try {
specificChromosome = Optional.of(inputs.get("specificChromosome").inputValue());
} catch (IllegalArgumentException ignored) {
}
final InputFileDescriptor runData = inputs.get();
final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), sampleId);
// download the telo.jar
// InputDownload teloJarDownload = new InputDownload(GoogleStorageLocation.of(teloToolsBucket, teloToolsPath + "/telo.jar"), VmDirectories.TOOLS);
// InputDownload teloJarDownload = downloadExperimentalVersion();
// commands.addCommand(teloJarDownload);
commands.addCommand(downloadExperimentalVersion());
/*() -> format("gsutil -u hmf-crunch cp gs://%s/%s/%s %s",
COMMON_RESOURCES, TELO_DIR, TELO_JAR, VmDirectories.TOOLS));*/
// ref genome
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
// download the tumour and reference bam / index files
commands.addCommand(tumorBamDownload);
commands.addCommand(tumorBamIndexDownload);
commands.addCommand(makeTeloRunCommand(sampleId, "somatic", tumorBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
// delete the tumor bam file to save disk space
commands.addCommand(() -> format("rm -f %s", tumorBamDownload.getLocalTargetPath()));
commands.addCommand(() -> format("rm -f %s", tumorBamIndexDownload.getLocalTargetPath()));
InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
commands.addCommand(referenceBamDownload);
commands.addCommand(referenceBamIndexDownload);
commands.addCommand(makeTeloRunCommand(sampleId, "germline", referenceBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
// JavaJarCommand jarCommand = new JavaJarCommand("telo", TELO_VERSION, "telo.jar", "16G", teloArgs);
// commands.addCommand(jarCommand);
// Store output
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), sampleId), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("telo").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(500).performanceProfile(VirtualMachinePerformanceProfile.custom(16, MEMORY_GB)).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class RnaArriba method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String batchInputs = descriptor.inputValue();
final String sampleId = batchInputs;
// copy down BAM and index file for this sample
final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", RNA_COHORT_LOCATION_V37, sampleId, bamFile, VmDirectories.INPUT));
final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", RNA_COHORT_LOCATION_V37, sampleId, bamIndexFile, VmDirectories.INPUT));
// copy down the executable
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ARRIBA_RESOURCES, ARRIBA_TOOL, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("chmod a+x %s/%s", VmDirectories.TOOLS, ARRIBA_TOOL));
// copy down required reference files
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp -r %s/%s %s", ARRIBA_RESOURCES, REF_GENOME, VmDirectories.INPUT));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ARRIBA_RESOURCES, GENE_DEFINITIONS, VmDirectories.INPUT));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ARRIBA_RESOURCES, BLACKLIST, VmDirectories.INPUT));
startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
// run Arriba
StringBuilder arribaArgs = new StringBuilder();
arribaArgs.append(String.format(" -x %s/%s", VmDirectories.INPUT, bamFile));
arribaArgs.append(String.format(" -o %s/%s.fusions.tsv", VmDirectories.OUTPUT, sampleId));
arribaArgs.append(String.format(" -O %s/%s.fusions.discarded.tsv", VmDirectories.OUTPUT, sampleId));
arribaArgs.append(String.format(" -a %s/%s", VmDirectories.INPUT, REF_GENOME));
arribaArgs.append(String.format(" -g %s/%s", VmDirectories.INPUT, GENE_DEFINITIONS));
arribaArgs.append(String.format(" -b %s/%s", VmDirectories.INPUT, BLACKLIST));
arribaArgs.append(" -T -P");
startupScript.addCommand(() -> format("%s/%s %s", VmDirectories.TOOLS, ARRIBA_TOOL, arribaArgs.toString()));
/*
./tools/arriba_v1.1.0/arriba
-x ./runs/CPCT02020378T/CPCT02020378T.sorted.bam
-o ./runs/CPCT02020378T/fusions.tsv -O
./runs/CPCT02020378T/fusions.discarded.tsv
-a "./ref/hs37d5_GENCODE19/hs37d5.fa"
-g "./ref/hs37d5_GENCODE19/GENCODE19.gtf"
-b "./tools/arriba_v1.1.0/database/blacklist_hg19_hs37d5_GRCh37_2018-11-04.tsv.gz"
-T -P
*/
// upload the results
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "arriba"), executionFlags));
// copy results to rna-analysis location on crunch
startupScript.addCommand(() -> format("gsutil -m cp %s/* %s/%s/arriba/", VmDirectories.OUTPUT, RNA_COHORT_LOCATION_V37, sampleId));
return ImmutableVirtualMachineJobDefinition.builder().name("rna-arriba").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(100).performanceProfile(VirtualMachinePerformanceProfile.custom(12, 64)).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class RnaIsofoxExonCounts method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String batchInputs = descriptor.inputValue();
final String[] batchItems = batchInputs.split(",");
if (batchItems.length < 2) {
System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
return null;
}
final String sampleId = batchItems[COL_SAMPLE_ID];
final String geneIds = batchItems[COL_GENE_IDS];
final RefGenomeVersion refGenomeVersion = V37;
final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
// final String rnaCohortDirectory = getRnaCohortDirectory(refGenomeVersion);
final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
// copy down BAM and index file for this sample
final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
// copy down the Isofox JAR
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
// run Isofox
StringJoiner isofoxArgs = new StringJoiner(" ");
isofoxArgs.add(String.format("-sample %s", sampleId));
isofoxArgs.add(String.format("-functions %s", FUNC_TRANSCRIPT_COUNTS));
isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
isofoxArgs.add(String.format("-write_exon_data"));
// isofoxArgs.add(String.format("-write_read_data"));
isofoxArgs.add(String.format("-restricted_gene_ids %s", geneIds));
startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
// upload the results
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
Aggregations