use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class LilacBatch method addSampleCommands.
private void addSampleCommands(final InputFileDescriptor runData, final BashStartupScript commands, final String runDirectory, final String sampleId, boolean hasRna) {
final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), sampleId);
final LocalLocations localInput = new LocalLocations(new BamSliceDecorator(locationsApi));
final String somaticVcf = localInput.getSomaticVariantsPurple();
final String geneCopyNumber = localInput.getGeneCopyNumberTsv();
final String tumorAlignment = localInput.getTumorAlignment();
final String referenceAlignment = localInput.getReferenceAlignment();
final String rnaAlignment = hasRna ? String.format("%s.rna.hla.bam", sampleId) : "";
// download sample input files
commands.addCommands(localInput.generateDownloadCommands());
if (hasRna) {
commands.addCommand(() -> format("gsutil -m cp gs://%s/%s/%s* %s", HLA_BAMS_BUCKET, sampleId, rnaAlignment, VmDirectories.INPUT));
}
// build Lilac arguments
String sampleOutputDir = String.format("%s/%s/", VmDirectories.OUTPUT, sampleId);
commands.addCommand(() -> format("mkdir -p %s", sampleOutputDir));
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
StringJoiner lilacArgs = new StringJoiner(" ");
lilacArgs.add(String.format(" -sample %s", sampleId));
lilacArgs.add(String.format(" -resource_dir %s", VmDirectories.INPUT));
lilacArgs.add(String.format(" -ref_genome %s", resourceFiles.refGenomeFile()));
lilacArgs.add(String.format(" -reference_bam %s", referenceAlignment));
lilacArgs.add(String.format(" -tumor_bam %s", tumorAlignment));
if (hasRna) {
lilacArgs.add(String.format(" -rna_bam %s/%s", VmDirectories.INPUT, rnaAlignment));
}
lilacArgs.add(String.format(" -output_dir %s", sampleOutputDir));
lilacArgs.add(String.format(" -gene_copy_number_file %s", geneCopyNumber));
lilacArgs.add(String.format(" -somatic_variants_file %s", somaticVcf));
lilacArgs.add(String.format(" -threads %s", Bash.allCpus()));
commands.addCommand(() -> format("java -Xmx%s -jar %s/%s %s", MAX_HEAP, VmDirectories.TOOLS, LILAC_JAR, lilacArgs.toString()));
/*
if(tumorOnly)
{
String tumorOutputDir = String.format("%s/%s/tumor", VmDirectories.OUTPUT, sampleId);
commands.addCommand(() -> format("mkdir -p %s", tumorOutputDir));
StringBuilder tumorLilacArgs = new StringBuilder();
tumorLilacArgs.add(String.format(" -sample %s", sampleId));
tumorLilacArgs.add(String.format(" -resource_dir %s", LOCAL_LILAC_RESOURCES));
tumorLilacArgs.add(String.format(" -ref_genome %s", resourceFiles.refGenomeFile()));
tumorLilacArgs.add(String.format(" -reference_bam %s", tumorAlignment));
tumorLilacArgs.add(" -tumor_only");
tumorLilacArgs.add(String.format(" -output_dir %s", tumorOutputDir));
tumorLilacArgs.add(String.format(" -threads %s", Bash.allCpus()));
commands.addCommand(() -> format("java -Xmx%s -jar %s/%s %s",
MAX_HEAP, VmDirectories.TOOLS, LILAC_JAR, tumorLilacArgs.toString()));
}
*/
String sampleRemoteOutputDir = String.format("gs://%s/%s/", LILAC_BATCH_BUCKET, runDirectory);
commands.addCommand(() -> format("gsutil -m cp -r %s/%s/ %s", VmDirectories.OUTPUT, sampleId, sampleRemoteOutputDir));
}
use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class SageCompare method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String[] sampleData = descriptor.inputValue().split(",", -1);
final String sampleId = sampleData[0];
String runTypes = sampleData.length > 1 ? sampleData[1] : RUN_BOTH;
boolean runBoth = runTypes.equalsIgnoreCase(RUN_BOTH);
boolean cramVsBam = runTypes.equalsIgnoreCase(RUN_CRAM_VS_BAM);
boolean runOld = runBoth || runTypes.equalsIgnoreCase(RUN_OLD);
boolean runNew = runBoth || cramVsBam || runTypes.equalsIgnoreCase(RUN_NEW);
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
String tumorCramFile = tumorCramData[CRAM_FILENAME];
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
String referenceId = locations.getReference();
String[] refCramData = getCramFileData(locations.getReferenceAlignment());
String refCramFile = refCramData[CRAM_FILENAME];
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
// download tumor CRAM
String localTumorCram = String.format("%s/%s", VmDirectories.INPUT, tumorCramFile);
String localRefCram = String.format("%s/%s", VmDirectories.INPUT, refCramFile);
// and convert to BAM
startupScript.addCommands(cramToBam(localTumorCram));
startupScript.addCommands(cramToBam(localRefCram));
String localTumorBam = localTumorCram.replace("cram", "bam");
String localRefBam = localRefCram.replace("cram", "bam");
if (runOld) {
final String oldSageVcf = String.format("%s/%s.sage.somatic.vcf.gz", VmDirectories.OUTPUT, sampleId);
// run old Sage
final StringJoiner oldSageArgs = new StringJoiner(" ");
oldSageArgs.add(String.format("-tumor %s", sampleId));
oldSageArgs.add(String.format("-tumor_bam %s", localTumorBam));
oldSageArgs.add(String.format("-reference %s", referenceId));
oldSageArgs.add(String.format("-reference_bam %s", localRefBam));
oldSageArgs.add(String.format("-hotspots %s", resourceFiles.sageSomaticHotspots()));
oldSageArgs.add(String.format("-panel_bed %s", resourceFiles.sageSomaticCodingPanel()));
oldSageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
oldSageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
oldSageArgs.add("-assembly hg19");
oldSageArgs.add("-bqr_plot false");
oldSageArgs.add(String.format("-out %s", oldSageVcf));
oldSageArgs.add(String.format("-threads %s", Bash.allCpus()));
// oldSageArgs.add("-chr 14");
String oldSageJar = String.format("sage/%s/sage.jar", Versions.SAGE);
startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, oldSageJar, oldSageArgs.toString()));
}
if (runNew) {
final String newSageVcf = String.format("%s/%s.sage.somatic.vcf.gz", VmDirectories.OUTPUT, sampleId);
final StringJoiner newSageArgs = new StringJoiner(" ");
newSageArgs.add(String.format("-tumor %s", sampleId));
newSageArgs.add(String.format("-tumor_bam %s", localTumorBam));
newSageArgs.add(String.format("-reference %s", referenceId));
newSageArgs.add(String.format("-reference_bam %s", localRefBam));
newSageArgs.add(String.format("-hotspots %s", resourceFiles.sageSomaticHotspots()));
newSageArgs.add(String.format("-panel_bed %s", resourceFiles.sageSomaticCodingPanel()));
newSageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
newSageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
newSageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
newSageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
newSageArgs.add(String.format("-perf_warn_time 50"));
newSageArgs.add(String.format("-log_debug"));
newSageArgs.add(String.format("-out %s", newSageVcf));
newSageArgs.add(String.format("-threads %s", Bash.allCpus()));
startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, newSageArgs.toString()));
}
if (cramVsBam) {
final String newCramSageVcf = String.format("%s/%s.sage.somatic.cram.vcf.gz", VmDirectories.OUTPUT, sampleId);
final StringJoiner newSageArgs = new StringJoiner(" ");
newSageArgs.add(String.format("-tumor %s", sampleId));
newSageArgs.add(String.format("-tumor_bam %s", localTumorCram));
newSageArgs.add(String.format("-reference %s", referenceId));
newSageArgs.add(String.format("-reference_bam %s", localRefCram));
newSageArgs.add(String.format("-hotspots %s", resourceFiles.sageSomaticHotspots()));
newSageArgs.add(String.format("-panel_bed %s", resourceFiles.sageSomaticCodingPanel()));
newSageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
newSageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
newSageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
newSageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
newSageArgs.add(String.format("-perf_warn_time 50"));
newSageArgs.add(String.format("-log_debug"));
newSageArgs.add(String.format("-out %s", newCramSageVcf));
newSageArgs.add(String.format("-threads %s", Bash.allCpus()));
startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, newSageArgs.toString()));
}
// upload output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class GridssRerun method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final String referenceSampleName = inputs.get("reference_sample").inputValue();
final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
final InputFileDescriptor runData = inputs.get();
final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), tumorSampleName);
InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
final String localTumorFile = localFilename(remoteTumorFile);
final String localReferenceFile = localFilename(remoteReferenceFile);
final String tumorBamPath = localTumorFile.replace("cram", "bam");
final String refBamPath = localReferenceFile.replace("cram", "bam");
Driver driver = new Driver(resourceFiles, VmDirectories.outputFile(tumorSampleName + ".assembly.bam")).tumorSample(tumorSampleName, tumorBamPath).referenceSample(referenceSampleName, refBamPath);
GridssAnnotation viralAnnotation = new GridssAnnotation(resourceFiles, false);
SubStageInputOutput unfilteredVcfOutput = driver.andThen(viralAnnotation).apply(SubStageInputOutput.empty(tumorSampleName));
final OutputFile unfilteredVcf = unfilteredVcfOutput.outputFile();
final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, tumorSampleName);
final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
// COMMANDS
commands.addCommand(new ExportPathCommand(new BwaCommand()));
commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
if (!localTumorFile.equals(tumorBamPath)) {
commands.addCommands(cramToBam(localTumorFile));
}
if (!localReferenceFile.equals(refBamPath)) {
commands.addCommands(cramToBam(localReferenceFile));
}
commands.addCommands(unfilteredVcfOutput.bash());
commands.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
commands.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class GripssPurpleLinx method runSample.
private void runSample(final BashStartupScript startupScript, final ResourceFiles resourceFiles, final String sampleId, final Map<String, SampleLocationData> sampleLocationsMap) {
final SampleLocationData sampleLocations = sampleLocationsMap.containsKey(sampleId) ? sampleLocationsMap.get(sampleId) : SampleLocationData.fromRemoteLocationsApi(sampleId, new RemoteLocationsApi("hmf-crunch", sampleId));
// download required input files
String gridssVcf = sampleLocations.localFileRef(sampleLocations.GridssVcf);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.GridssVcf, false));
// run Gripss
final StringJoiner gripssArgs = new StringJoiner(" ");
gripssArgs.add(String.format("-sample %s", sampleId));
gripssArgs.add(String.format("-reference %s", sampleLocations.ReferenceId));
gripssArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
gripssArgs.add(String.format("-known_hotspot_file %s", resourceFiles.knownFusionPairBedpe()));
// VmDirectories.INPUT, PON_BE
gripssArgs.add(String.format("-pon_sgl_file %s", resourceFiles.gridssBreakendPon()));
gripssArgs.add(String.format("-pon_sv_file %s", resourceFiles.gridssBreakpointPon()));
gripssArgs.add(String.format("-vcf %s", gridssVcf));
gripssArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
gripssArgs.add(String.format("-log_debug"));
String gripssJar = String.format("%s/%s", VmDirectories.TOOLS, GRIPSS_JAR);
// String gripssJar = String.format("%s/gripss/%s/gripss.jar", VmDirectories.TOOLS, Versions.GRIPSS);
startupScript.addCommand(() -> format("java -Xmx30G -jar %s %s", gripssJar, gripssArgs.toString()));
final String gripssUnfilteredVcf = String.format("%s/%s.gripss.vcf.gz", VmDirectories.OUTPUT, sampleId);
final String gripssFilteredVcf = String.format("%s/%s.gripss.filtered.vcf.gz", VmDirectories.OUTPUT, sampleId);
// Pave somatic
final String paveSomaticVcf = String.format("%s/%s.sage.somatic.filtered.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
String sageSomaticVcf = sampleLocations.localFileRef(sampleLocations.SageSomaticVcf);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.SageSomaticVcf, false));
StringJoiner paveSomaticArgs = new StringJoiner(" ");
paveSomaticArgs.add(String.format("-sample %s", sampleId));
paveSomaticArgs.add(String.format("-vcf_file %s", sageSomaticVcf));
paveSomaticArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
paveSomaticArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
paveSomaticArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
paveSomaticArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
paveSomaticArgs.add(String.format("-output_vcf_file %s", paveSomaticVcf));
// String paveJar = String.format("%s/%s", VmDirectories.TOOLS, PAVE_JAR);
String paveJar = String.format("%s/pave/%s/pave.jar", VmDirectories.TOOLS, Versions.PAVE);
startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveSomaticArgs.toString()));
// Pave germline
final String paveGermlineVcf = String.format("%s/%s.sage.germline.filtered.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
String sageGermlineVcf = sampleLocations.localFileRef(sampleLocations.SageGermlineVcf);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.SageGermlineVcf, false));
StringJoiner paveGermlineArgs = new StringJoiner(" ");
paveGermlineArgs.add(String.format("-sample %s", sampleId));
paveGermlineArgs.add(String.format("-vcf_file %s", sageGermlineVcf));
paveGermlineArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
paveGermlineArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
paveGermlineArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
paveGermlineArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
paveGermlineArgs.add(String.format("-output_vcf_file %s", paveGermlineVcf));
startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveGermlineArgs.toString()));
// Purple
// String amberDir = sampleLocations.localFileRef(sampleLocations.Amber);
// startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.Amber, true));
String amberDir = VmDirectories.INPUT;
String amberFiles = String.format("%s/*amber*", sampleLocations.Amber);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(amberFiles, false));
// String cobaltDir = sampleLocations.localFileRef(sampleLocations.Cobalt);
// startupScript.addCommand(() -> sampleLocations.formDownloadRequest(sampleLocations.Cobalt, true));
String cobaltDir = VmDirectories.INPUT;
String cobaltFiles = String.format("%s/*cobalt*", sampleLocations.Cobalt);
startupScript.addCommand(() -> sampleLocations.formDownloadRequest(cobaltFiles, false));
final StringJoiner purpleArgs = new StringJoiner(" ");
purpleArgs.add(String.format("-tumor %s", sampleId));
purpleArgs.add(String.format("-reference %s", sampleLocations.ReferenceId));
purpleArgs.add(String.format("-structural_vcf %s", gripssFilteredVcf));
purpleArgs.add(String.format("-sv_recovery_vcf %s", gripssUnfilteredVcf));
purpleArgs.add(String.format("-somatic_vcf %s", paveSomaticVcf));
purpleArgs.add(String.format("-germline_vcf %s", paveGermlineVcf));
purpleArgs.add(String.format("-amber %s", amberDir));
purpleArgs.add(String.format("-cobalt %s", cobaltDir));
purpleArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
purpleArgs.add(String.format("-gc_profile %s", resourceFiles.gcProfileFile()));
purpleArgs.add(String.format("-somatic_hotspots %s", resourceFiles.sageSomaticHotspots()));
purpleArgs.add(String.format("-germline_hotspots %s", resourceFiles.sageGermlineHotspots()));
purpleArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
purpleArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
purpleArgs.add(String.format("-run_drivers"));
purpleArgs.add(String.format("-no_charts"));
purpleArgs.add(String.format("-threads %s", Bash.allCpus()));
purpleArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
// String purpleJar = String.format("%s/%s", VmDirectories.TOOLS, PURPLE_JAR);
String purpleJar = String.format("%s/purple/%s/purple.jar", VmDirectories.TOOLS, Versions.PURPLE);
startupScript.addCommand(() -> format("java -jar %s %s", purpleJar, purpleArgs.toString()));
final String purpleSvVcf = String.format("%s/%s.purple.sv.vcf.gz", VmDirectories.OUTPUT, sampleId);
// Linx
final StringJoiner linxArgs = new StringJoiner(" ");
linxArgs.add(String.format("-sample %s", sampleId));
linxArgs.add(String.format("-sv_vcf %s", purpleSvVcf));
linxArgs.add(String.format("-purple_dir %s", VmDirectories.OUTPUT));
linxArgs.add(String.format("-fragile_site_file %s", resourceFiles.fragileSites()));
linxArgs.add(String.format("-line_element_file %s", resourceFiles.lineElements()));
linxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
linxArgs.add(String.format("-check_drivers"));
linxArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
linxArgs.add(String.format("-check_fusions"));
linxArgs.add(String.format("-known_fusion_file %s", resourceFiles.knownFusionData()));
linxArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
String linxJar = String.format("%s/%s", VmDirectories.TOOLS, LINX_JAR);
// String linxJar = String.format("%s/linx/%s/linx.jar", VmDirectories.TOOLS, Versions.LINX);
startupScript.addCommand(() -> format("java -jar %s %s", linxJar, linxArgs.toString()));
}
use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class SageGermline method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String sampleId = descriptor.inputValue();
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
String tumorCramFile = tumorCramData[CRAM_FILENAME];
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
String referenceId = locations.getReference();
String[] refCramData = getCramFileData(locations.getReferenceAlignment());
String refCramFile = refCramData[CRAM_FILENAME];
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
// download tumor CRAM
String localTumorCram = String.format("%s/%s", VmDirectories.INPUT, tumorCramFile);
String localRefCram = String.format("%s/%s", VmDirectories.INPUT, refCramFile);
final String sageVcf = String.format("%s/%s.sage.germline.vcf.gz", VmDirectories.OUTPUT, sampleId);
final StringJoiner sageArgs = new StringJoiner(" ");
// not the switch on samples
sageArgs.add(String.format("-tumor %s", referenceId));
sageArgs.add(String.format("-tumor_bam %s", localRefCram));
sageArgs.add(String.format("-reference %s", sampleId));
sageArgs.add(String.format("-reference_bam %s", localTumorCram));
sageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
sageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
sageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
sageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
sageArgs.add(String.format("-hotspots %s", resourceFiles.sageGermlineHotspots()));
sageArgs.add(String.format("-panel_bed %s", resourceFiles.sageGermlineCodingPanel()));
sageArgs.add("-panel_only");
sageArgs.add("-hotspot_min_tumor_qual 50");
sageArgs.add("-panel_min_tumor_qual 75");
sageArgs.add("-hotspot_max_germline_vaf 100");
sageArgs.add("-hotspot_max_germline_rel_raw_base_qual 100");
sageArgs.add("-panel_max_germline_vaf 100");
sageArgs.add("-panel_max_germline_rel_raw_base_qual 100");
sageArgs.add("-mnv_filter_enabled false");
sageArgs.add(String.format("-out %s", sageVcf));
sageArgs.add(String.format("-threads %s", Bash.allCpus()));
startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, sageArgs.toString()));
// Pave germline
String paveJar = String.format("%s/pave/%s/pave.jar", VmDirectories.TOOLS, Versions.PAVE);
final String paveGermlineVcf = String.format("%s/%s.sage.germline.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
StringJoiner paveGermlineArgs = new StringJoiner(" ");
paveGermlineArgs.add(String.format("-sample %s", sampleId));
paveGermlineArgs.add(String.format("-vcf_file %s", sageVcf));
paveGermlineArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
paveGermlineArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
paveGermlineArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
paveGermlineArgs.add("-filter_pass");
paveGermlineArgs.add(String.format("-output_vcf_file %s", paveGermlineVcf));
startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveGermlineArgs.toString()));
// upload output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Aggregations