use of com.hartwig.batch.utils.SampleLocationData in project pipeline5 by hartwigmedical.
the class GripssPurpleLinx method runSample.
private void runSample(final BashStartupScript startupScript, final ResourceFiles resourceFiles, final String sampleId, final Map<String, SampleLocationData> sampleLocationsMap) {
final SampleLocationData sampleLocations = sampleLocationsMap.containsKey(sampleId) ? sampleLocationsMap.get(sampleId) : SampleLocationData.fromRemoteLocationsApi(sampleId, new RemoteLocationsApi("hmf-crunch", sampleId));
// download required input files
String gridssVcf = sampleLocations.localFileRef(sampleLocations.GridssVcf);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.GridssVcf, false));
// run Gripss
final StringJoiner gripssArgs = new StringJoiner(" ");
gripssArgs.add(String.format("-sample %s", sampleId));
gripssArgs.add(String.format("-reference %s", sampleLocations.ReferenceId));
gripssArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
gripssArgs.add(String.format("-known_hotspot_file %s", resourceFiles.knownFusionPairBedpe()));
// VmDirectories.INPUT, PON_BE
gripssArgs.add(String.format("-pon_sgl_file %s", resourceFiles.gridssBreakendPon()));
gripssArgs.add(String.format("-pon_sv_file %s", resourceFiles.gridssBreakpointPon()));
gripssArgs.add(String.format("-vcf %s", gridssVcf));
gripssArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
gripssArgs.add(String.format("-log_debug"));
String gripssJar = String.format("%s/%s", VmDirectories.TOOLS, GRIPSS_JAR);
// String gripssJar = String.format("%s/gripss/%s/gripss.jar", VmDirectories.TOOLS, Versions.GRIPSS);
startupScript.addCommand(() -> format("java -Xmx30G -jar %s %s", gripssJar, gripssArgs.toString()));
final String gripssUnfilteredVcf = String.format("%s/%s.gripss.vcf.gz", VmDirectories.OUTPUT, sampleId);
final String gripssFilteredVcf = String.format("%s/%s.gripss.filtered.vcf.gz", VmDirectories.OUTPUT, sampleId);
// Pave somatic
final String paveSomaticVcf = String.format("%s/%s.sage.somatic.filtered.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
String sageSomaticVcf = sampleLocations.localFileRef(sampleLocations.SageSomaticVcf);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.SageSomaticVcf, false));
StringJoiner paveSomaticArgs = new StringJoiner(" ");
paveSomaticArgs.add(String.format("-sample %s", sampleId));
paveSomaticArgs.add(String.format("-vcf_file %s", sageSomaticVcf));
paveSomaticArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
paveSomaticArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
paveSomaticArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
paveSomaticArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
paveSomaticArgs.add(String.format("-output_vcf_file %s", paveSomaticVcf));
// String paveJar = String.format("%s/%s", VmDirectories.TOOLS, PAVE_JAR);
String paveJar = String.format("%s/pave/%s/pave.jar", VmDirectories.TOOLS, Versions.PAVE);
startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveSomaticArgs.toString()));
// Pave germline
final String paveGermlineVcf = String.format("%s/%s.sage.germline.filtered.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
String sageGermlineVcf = sampleLocations.localFileRef(sampleLocations.SageGermlineVcf);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.SageGermlineVcf, false));
StringJoiner paveGermlineArgs = new StringJoiner(" ");
paveGermlineArgs.add(String.format("-sample %s", sampleId));
paveGermlineArgs.add(String.format("-vcf_file %s", sageGermlineVcf));
paveGermlineArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
paveGermlineArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
paveGermlineArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
paveGermlineArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
paveGermlineArgs.add(String.format("-output_vcf_file %s", paveGermlineVcf));
startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveGermlineArgs.toString()));
// Purple
// String amberDir = sampleLocations.localFileRef(sampleLocations.Amber);
// startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.Amber, true));
String amberDir = VmDirectories.INPUT;
String amberFiles = String.format("%s/*amber*", sampleLocations.Amber);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(amberFiles, false));
// String cobaltDir = sampleLocations.localFileRef(sampleLocations.Cobalt);
// startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.Cobalt, true));
String cobaltDir = VmDirectories.INPUT;
String cobaltFiles = String.format("%s/*cobalt*", sampleLocations.Cobalt);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(cobaltFiles, false));
final StringJoiner purpleArgs = new StringJoiner(" ");
purpleArgs.add(String.format("-tumor %s", sampleId));
purpleArgs.add(String.format("-reference %s", sampleLocations.ReferenceId));
purpleArgs.add(String.format("-structural_vcf %s", gripssFilteredVcf));
purpleArgs.add(String.format("-sv_recovery_vcf %s", gripssUnfilteredVcf));
purpleArgs.add(String.format("-somatic_vcf %s", paveSomaticVcf));
purpleArgs.add(String.format("-germline_vcf %s", paveGermlineVcf));
purpleArgs.add(String.format("-amber %s", amberDir));
purpleArgs.add(String.format("-cobalt %s", cobaltDir));
purpleArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
purpleArgs.add(String.format("-gc_profile %s", resourceFiles.gcProfileFile()));
purpleArgs.add(String.format("-somatic_hotspots %s", resourceFiles.sageSomaticHotspots()));
purpleArgs.add(String.format("-germline_hotspots %s", resourceFiles.sageGermlineHotspots()));
purpleArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
purpleArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
purpleArgs.add(String.format("-run_drivers"));
purpleArgs.add(String.format("-no_charts"));
purpleArgs.add(String.format("-threads %s", Bash.allCpus()));
purpleArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
// String purpleJar = String.format("%s/%s", VmDirectories.TOOLS, PURPLE_JAR);
String purpleJar = String.format("%s/purple/%s/purple.jar", VmDirectories.TOOLS, Versions.PURPLE);
startupScript.addCommand(() -> format("java -jar %s %s", purpleJar, purpleArgs.toString()));
final String purpleSvVcf = String.format("%s/%s.purple.sv.vcf.gz", VmDirectories.OUTPUT, sampleId);
// Linx
final StringJoiner linxArgs = new StringJoiner(" ");
linxArgs.add(String.format("-sample %s", sampleId));
linxArgs.add(String.format("-sv_vcf %s", purpleSvVcf));
linxArgs.add(String.format("-purple_dir %s", VmDirectories.OUTPUT));
linxArgs.add(String.format("-fragile_site_file %s", resourceFiles.fragileSites()));
linxArgs.add(String.format("-line_element_file %s", resourceFiles.lineElements()));
linxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
linxArgs.add(String.format("-check_drivers"));
linxArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
linxArgs.add(String.format("-check_fusions"));
linxArgs.add(String.format("-known_fusion_file %s", resourceFiles.knownFusionData()));
linxArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
String linxJar = String.format("%s/%s", VmDirectories.TOOLS, LINX_JAR);
// String linxJar = String.format("%s/linx/%s/linx.jar", VmDirectories.TOOLS, Versions.LINX);
startupScript.addCommand(() -> format("java -jar %s %s", linxJar, linxArgs.toString()));
}
use of com.hartwig.batch.utils.SampleLocationData in project pipeline5 by hartwigmedical.
the class GripssPurpleLinx method execute.
// private static String PON_BP = "gridss_pon_breakpoint.37.sorted.bedpe";
// private static String PON_BE = "gridss_pon_single_breakend.37.sorted.bed";
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String[] inputArguments = descriptor.inputValue().split(",");
final List<String> sampleIds = Arrays.stream(inputArguments[0].split(";")).collect(Collectors.toList());
Map<String, SampleLocationData> sampleLocations = null;
if (inputArguments.length > 1) {
sampleLocations = SampleLocationData.loadSampleLocations(inputArguments[1], sampleIds);
} else {
sampleLocations = Maps.newHashMap();
}
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
// download required JARs and resources
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, GRIPSS_DIR, GRIPSS_JAR, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, PURPLE_DIR, PURPLE_JAR, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, LINX_DIR, LINX_JAR, VmDirectories.TOOLS));
for (String sampleId : sampleIds) {
runSample(startupScript, resourceFiles, sampleId, sampleLocations);
}
// upload output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gpl"), executionFlags));
// and copy the key output files to a single directory for convenience
String gripssCombined = String.format("%s/gripss/", COMBINED_OUTPUT_DIR);
String linxCombined = String.format("%s/linx/", COMBINED_OUTPUT_DIR);
String purpleCombined = String.format("%s/purple/", COMBINED_OUTPUT_DIR);
String paveCombined = String.format("%s/pave/", COMBINED_OUTPUT_DIR);
startupScript.addCommand(() -> format("gsutil -m cp %s/*gripss*vcf* %s", VmDirectories.OUTPUT, gripssCombined));
startupScript.addCommand(() -> format("gsutil -m cp %s/*sage.somatic.filtered.pave.vcf.gz* %s", VmDirectories.OUTPUT, paveCombined));
startupScript.addCommand(() -> format("gsutil -m cp %s/*sage.germline.filtered.pave.vcf.gz* %s", VmDirectories.OUTPUT, paveCombined));
// select files for subsequent Linx runs and/or comparison using Compar
startupScript.addCommand(() -> format("gsutil -m cp %s/*linx*.tsv %s", VmDirectories.OUTPUT, linxCombined));
startupScript.addCommand(() -> format("gsutil -m cp %s/*purple* %s", VmDirectories.OUTPUT, purpleCombined));
startupScript.addCommand(() -> format("gsutil -m cp %s/*driver.catalog* %s", VmDirectories.OUTPUT, purpleCombined));
return ImmutableVirtualMachineJobDefinition.builder().name("gpl").startupCommand(startupScript).performanceProfile(custom(12, 32)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Aggregations