use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.
the class GridssPanelTumor method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String sampleId = descriptor.inputValue();
// download tumor BAM
final String tumorBam = String.format("%s.non_umi_dedup.bam", sampleId);
final String tumorBamIndex = String.format("%s.non_umi_dedup.bam.bai", sampleId);
commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s* %s", PANEL_BAM_BUCKET, tumorBam, VmDirectories.INPUT));
// Inputs
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V38);
commands.addCommand(new ExportPathCommand(new BwaCommand()));
commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
// run Gridss variant calling
final String gridssToolDir = String.format("%s/%s/%s", VmDirectories.TOOLS, GRIDSS_TOOL_DIR, Versions.GRIDSS);
final String gridssJar = String.format("%s/gridss.jar", gridssToolDir);
commands.addCommand(() -> format("chmod a+x %s", gridssJar));
final String gridssOutputVcf = String.format("%s/%s.gridss.driver.vcf.gz", VmDirectories.OUTPUT, sampleId);
final StringJoiner gridssArgs = new StringJoiner(" ");
gridssArgs.add(String.format("--output %s", gridssOutputVcf));
gridssArgs.add(String.format("--assembly %s/%s.gridss.assembly.vcf.gz", VmDirectories.OUTPUT, sampleId));
gridssArgs.add(String.format("--workingdir %s/gridss_working", VmDirectories.OUTPUT));
gridssArgs.add(String.format("--reference %s", resourceFiles.refGenomeFile()));
gridssArgs.add(String.format("--jar %s", gridssJar));
gridssArgs.add(String.format("--blacklist %s", resourceFiles.gridssBlacklistBed()));
gridssArgs.add(String.format("--configuration %s", resourceFiles.gridssPropertiesFile()));
gridssArgs.add(String.format("--labels %s", sampleId));
gridssArgs.add(String.format("--threads %s", Bash.allCpus()));
gridssArgs.add("--jvmheap 31G");
gridssArgs.add("--externalaligner");
gridssArgs.add(String.format("%s/%s", VmDirectories.INPUT, tumorBam));
// nohup /data/tools/gridss/2.13.2/gridss
// --output ./FR16648841.gridss.driver.vcf.gz
// --assembly ./FR16648841.assembly.bam
// --workingdir ./gridss_working
// --reference /data/resources/bucket/reference_genome/38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
// --jar /data/tools/gridss/2.13.2/gridss.jar
// --blacklist /data/resources/public/gridss_repeatmasker_db/38/ENCFF001TDO.38.bed
// --configuration /data/resources/public/gridss_config/gridss.properties
// --labels FR16648841
// --jvmheap 31G
// --threads 4
// --externalaligner
// FR16648841.chr21_slice1.bam &
commands.addCommand(() -> format("%s/gridss %s", gridssToolDir, gridssArgs.toString()));
// VersionedToolCommand with bash:
// /opt/tools/gridss/2.13.2/gridss_annotate_vcf_repeatmasker
// --output /data/output/CPCT12345678T.gridss.repeatmasker.vcf.gz
// --jar /opt/tools/gridss/2.13.2/gridss.jar
// -w /data/output
// --rm /opt/tools/repeatmasker/4.1.1/RepeatMasker
// /data/output/CPCT12345678T.gridss.driver.vcf.gz
// final String gridssToolDir = String.format("%s/%s/%s/", VmDirectories.TOOLS, GRIDSS_TOOL_DIR, Versions.GRIDSS);
final String rmOutputVcf = String.format("%s/%s.gridss.repeatmasker.vcf.gz", VmDirectories.OUTPUT, sampleId);
final StringJoiner rmArgs = new StringJoiner(" ");
rmArgs.add(String.format("--output %s", rmOutputVcf));
rmArgs.add(String.format("--jar %s", gridssJar));
rmArgs.add(String.format("-w %s", VmDirectories.OUTPUT));
rmArgs.add(String.format("--rm %s", REPEAT_MASKER_TOOL));
rmArgs.add(gridssOutputVcf);
commands.addCommand(() -> format("%s/gridss_annotate_vcf_repeatmasker %s", gridssToolDir, rmArgs.toString()));
// AnnotateInsertedSequence with bash:
// java -Xmx8G -Dsamjdk.create_index=true
// -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true
// -Dsamjdk.use_async_io_write_tribble=true -Dsamjdk.buffer_size=4194304
// -cp /opt/tools/gridss/2.13.2/gridss.jar gridss.AnnotateInsertedSequence
// REFERENCE_SEQUENCE=/opt/resources/virus_reference_genome/human_virus.fa
// INPUT=/data/output/CPCT12345678T.gridss.repeatmasker.vcf.gz
// OUTPUT=/data/output/CPCT12345678T.gridss.unfiltered.vcf.gz
// ALIGNMENT=APPEND WORKER_THREADS=12
final String finalOutputVcf = String.format("%s/%s.gridss.unfiltered.vcf.gz", VmDirectories.OUTPUT, sampleId);
final StringJoiner vmArgs = new StringJoiner(" ");
GridssCommand.JVM_ARGUMENTS.forEach(x -> vmArgs.add(x));
final StringJoiner annInsSeqArgs = new StringJoiner(" ");
annInsSeqArgs.add(String.format("REFERENCE_SEQUENCE=%s", resourceFiles.gridssVirusRefGenomeFile()));
annInsSeqArgs.add(String.format("INPUT=%s", rmOutputVcf));
annInsSeqArgs.add(String.format("OUTPUT=%s", finalOutputVcf));
annInsSeqArgs.add(String.format("ALIGNMENT=APPEND WORKER_THREADS=%s", Bash.allCpus()));
commands.addCommand(() -> format("java -Xmx8G -Dsamjdk.create_index=true %s -cp %s gridss.AnnotateInsertedSequence %s", vmArgs.toString(), gridssJar, annInsSeqArgs.toString()));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.
the class GripssGermline method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String sampleId = descriptor.inputValue();
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
final LocalLocations inputFileFactory = new LocalLocations(new RemoteLocationsApi(descriptor.billedProject(), sampleId));
final String referenceId = inputFileFactory.getReference();
final String inputVcf = inputFileFactory.getStructuralVariantsGridss();
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, GRIPSS_DIR, GRIPSS_JAR, VmDirectories.TOOLS));
startupScript.addCommands(inputFileFactory.generateDownloadCommands());
// run GRIPSS
final String outputVcf1 = String.format("%s/%s.gripss.vcf.gz", VmDirectories.OUTPUT, referenceId);
final StringJoiner gripssArgs = new StringJoiner(" ");
gripssArgs.add(String.format("-tumor %s", referenceId));
gripssArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
gripssArgs.add(String.format("-breakpoint_hotspot %s", resourceFiles.knownFusionPairBedpe()));
gripssArgs.add(String.format("-breakend_pon %s", resourceFiles.gridssBreakendPon()));
gripssArgs.add(String.format("-breakpoint_pon %s", resourceFiles.gridssBreakpointPon()));
gripssArgs.add(String.format("-pon_distance %d", 4));
gripssArgs.add(String.format("-min_qual_break_end %d", 400));
gripssArgs.add(String.format("-min_qual_rescue_mobile_element_insertion %d", 400));
gripssArgs.add(String.format("-min_qual_break_point %d", 250));
gripssArgs.add(String.format("-input_vcf %s", inputVcf));
gripssArgs.add(String.format("-output_vcf %s", outputVcf1));
startupScript.addCommand(() -> format("java -Xmx%s -cp %s/%s com.hartwig.hmftools.gripsskt.GripssApplicationKt %s", MAX_HEAP, VmDirectories.TOOLS, GRIPSS_JAR, gripssArgs.toString()));
final String outputVcf2 = String.format("%s/%s.gripss.filtered.vcf.gz", VmDirectories.OUTPUT, referenceId);
final StringJoiner gripss2Args = new StringJoiner(" ");
gripss2Args.add(String.format("-input_vcf %s", outputVcf1));
gripss2Args.add(String.format("-output_vcf %s", outputVcf2));
startupScript.addCommand(() -> format("java -Xmx%s -cp %s/%s com.hartwig.hmftools.gripsskt.GripssHardFilterApplicationKt %s", MAX_HEAP, VmDirectories.TOOLS, GRIPSS_JAR, gripss2Args.toString()));
// upload output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gripss"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("gripss").startupCommand(startupScript).performanceProfile(custom(8, 30)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.
the class HlaBamSlicer method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs: SampleId,ExpectedAlleles
final InputFileDescriptor runData = inputs.get();
final String batchInputs = runData.inputValue();
final String[] batchItems = batchInputs.split(",");
final String sampleId = batchItems[0];
// final String bamType = batchItems[1];
final String sampleBam = String.format("%s.sorted.dups.bam", sampleId);
final String bamLocation = format("%s/%s/%s", RNA_COHORT_LOCATION_V37, sampleId, sampleBam);
commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s* %s", bamLocation, VmDirectories.INPUT));
// get HLA bed for slicing
commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_RESOURCE_BUCKET, LILAC_DIR, HLA_BED_FILE, VmDirectories.INPUT));
// /opt/tools/sambamba/0.6.8/sambamba view -f bam ./samples/CPCT02020378T/CPCT02020378T.sorted.dups.bam -L /data/lilac/ref/hla.bed
// > ./samples/CPCT02020378T/CPCT02020378T.rna.hla.bam
// download pilot Lilac jar
final String sambamba = "sambamba/0.6.8/sambamba";
final String slicedBam = String.format("%s.hla.bam", sampleId);
commands.addCommand(() -> format("%s/%s slice %s/%s -L %s/%s -o %s/%s", VmDirectories.TOOLS, sambamba, VmDirectories.INPUT, sampleBam, VmDirectories.INPUT, HLA_BED_FILE, VmDirectories.OUTPUT, slicedBam));
// commands.addCommand(() -> format("ls -l %s", VmDirectories.OUTPUT));
final String slicedSortedBam = String.format("%s.rna.hla.bam", sampleId);
// samtools sort -@ 8 -m 2G -T tmp -O bam Aligned.out.bam -o Aligned.sorted.bam
final String[] sortArgs = { "sort", "-@", "8", "-m", "2G", "-T", "tmp", "-O", "bam", String.format("%s/%s", VmDirectories.OUTPUT, slicedBam), "-o", String.format("%s/%s", VmDirectories.OUTPUT, slicedSortedBam) };
commands.addCommand(new VersionedToolCommand("samtools", "samtools", Versions.SAMTOOLS, sortArgs));
// create an index
commands.addCommand(() -> format("%s/%s index %s/%s", VmDirectories.TOOLS, sambamba, VmDirectories.OUTPUT, slicedSortedBam));
// copy the sliced RNA bam back to the HLA BAM directory
final String sampleHlaDir = String.format("gs://%s/%s", HLA_BAMS_BUCKET, sampleId);
commands.addCommand(() -> format("gsutil -m cp %s/%s* %s", VmDirectories.OUTPUT, slicedSortedBam, sampleHlaDir));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.
the class SamtoolsBamToCram method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket bucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor input = inputs.get();
String outputFile = VmDirectories.outputFile(new File(input.inputValue()).getName().replaceAll("\\.bam$", ".cram"));
String localInput = format("%s/%s", VmDirectories.INPUT, new File(input.inputValue()).getName());
startupScript.addCommand(() -> input.toCommandForm(localInput));
startupScript.addCommands(new CramAndValidateCommands(localInput, outputFile, new RefGenome37ResourceFiles()).commands());
startupScript.addCommand(new MvCommand("/data/output/*.bam", "/data/tmp"));
startupScript.addCommand(new MvCommand("/data/output/*.bam.flagstat", "/data/tmp"));
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "samtools"), executionFlags));
return VirtualMachineJobDefinition.builder().name("samtoolscram").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(650).performanceProfile(VirtualMachinePerformanceProfile.custom(6, 6)).build();
}
use of com.hartwig.pipeline.execution.vm.OutputUpload in project pipeline5 by hartwigmedical.
the class TeloBatch method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
final String sampleId = inputs.get("sampleId").inputValue();
Optional<String> specificChromosome = Optional.empty();
try {
specificChromosome = Optional.of(inputs.get("specificChromosome").inputValue());
} catch (IllegalArgumentException ignored) {
}
final InputFileDescriptor runData = inputs.get();
final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), sampleId);
// download the telo.jar
// InputDownload teloJarDownload = new InputDownload(GoogleStorageLocation.of(teloToolsBucket, teloToolsPath + "/telo.jar"), VmDirectories.TOOLS);
// InputDownload teloJarDownload = downloadExperimentalVersion();
// commands.addCommand(teloJarDownload);
commands.addCommand(downloadExperimentalVersion());
/*() -> format("gsutil -u hmf-crunch cp gs://%s/%s/%s %s",
COMMON_RESOURCES, TELO_DIR, TELO_JAR, VmDirectories.TOOLS));*/
// ref genome
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
// download the tumour and reference bam / index files
commands.addCommand(tumorBamDownload);
commands.addCommand(tumorBamIndexDownload);
commands.addCommand(makeTeloRunCommand(sampleId, "somatic", tumorBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
// delete the tumor bam file to save disk space
commands.addCommand(() -> format("rm -f %s", tumorBamDownload.getLocalTargetPath()));
commands.addCommand(() -> format("rm -f %s", tumorBamIndexDownload.getLocalTargetPath()));
InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
commands.addCommand(referenceBamDownload);
commands.addCommand(referenceBamIndexDownload);
commands.addCommand(makeTeloRunCommand(sampleId, "germline", referenceBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
// JavaJarCommand jarCommand = new JavaJarCommand("telo", TELO_VERSION, "telo.jar", "16G", teloArgs);
// commands.addCommand(jarCommand);
// Store output
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), sampleId), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("telo").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(500).performanceProfile(VirtualMachinePerformanceProfile.custom(16, MEMORY_GB)).build();
}
Aggregations