use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class GripssPurpleLinx method execute.
// private static String PON_BP = "gridss_pon_breakpoint.37.sorted.bedpe";
// private static String PON_BE = "gridss_pon_single_breakend.37.sorted.bed";
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String[] inputArguments = descriptor.inputValue().split(",");
final List<String> sampleIds = Arrays.stream(inputArguments[0].split(";")).collect(Collectors.toList());
Map<String, SampleLocationData> sampleLocations = null;
if (inputArguments.length > 1) {
sampleLocations = SampleLocationData.loadSampleLocations(inputArguments[1], sampleIds);
} else {
sampleLocations = Maps.newHashMap();
}
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
// download required JARs and resources
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, GRIPSS_DIR, GRIPSS_JAR, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, PURPLE_DIR, PURPLE_JAR, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, LINX_DIR, LINX_JAR, VmDirectories.TOOLS));
for (String sampleId : sampleIds) {
runSample(startupScript, resourceFiles, sampleId, sampleLocations);
}
// upload output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gpl"), executionFlags));
// and copy the key output files to a single directory for convenience
String gripssCombined = String.format("%s/gripss/", COMBINED_OUTPUT_DIR);
String linxCombined = String.format("%s/linx/", COMBINED_OUTPUT_DIR);
String purpleCombined = String.format("%s/purple/", COMBINED_OUTPUT_DIR);
String paveCombined = String.format("%s/pave/", COMBINED_OUTPUT_DIR);
startupScript.addCommand(() -> format("gsutil -m cp %s/*gripss*vcf* %s", VmDirectories.OUTPUT, gripssCombined));
startupScript.addCommand(() -> format("gsutil -m cp %s/*sage.somatic.filtered.pave.vcf.gz* %s", VmDirectories.OUTPUT, paveCombined));
startupScript.addCommand(() -> format("gsutil -m cp %s/*sage.germline.filtered.pave.vcf.gz* %s", VmDirectories.OUTPUT, paveCombined));
// select files for subsequent Linx runs and/or comparison using Compar
startupScript.addCommand(() -> format("gsutil -m cp %s/*linx*.tsv %s", VmDirectories.OUTPUT, linxCombined));
startupScript.addCommand(() -> format("gsutil -m cp %s/*purple* %s", VmDirectories.OUTPUT, purpleCombined));
startupScript.addCommand(() -> format("gsutil -m cp %s/*driver.catalog* %s", VmDirectories.OUTPUT, purpleCombined));
return ImmutableVirtualMachineJobDefinition.builder().name("gpl").startupCommand(startupScript).performanceProfile(custom(12, 32)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class SageGermline method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String sampleId = descriptor.inputValue();
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
String tumorCramFile = tumorCramData[CRAM_FILENAME];
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
String referenceId = locations.getReference();
String[] refCramData = getCramFileData(locations.getReferenceAlignment());
String refCramFile = refCramData[CRAM_FILENAME];
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
// download tumor CRAM
String localTumorCram = String.format("%s/%s", VmDirectories.INPUT, tumorCramFile);
String localRefCram = String.format("%s/%s", VmDirectories.INPUT, refCramFile);
final String sageVcf = String.format("%s/%s.sage.germline.vcf.gz", VmDirectories.OUTPUT, sampleId);
final StringJoiner sageArgs = new StringJoiner(" ");
// not the switch on samples
sageArgs.add(String.format("-tumor %s", referenceId));
sageArgs.add(String.format("-tumor_bam %s", localRefCram));
sageArgs.add(String.format("-reference %s", sampleId));
sageArgs.add(String.format("-reference_bam %s", localTumorCram));
sageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
sageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
sageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
sageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
sageArgs.add(String.format("-hotspots %s", resourceFiles.sageGermlineHotspots()));
sageArgs.add(String.format("-panel_bed %s", resourceFiles.sageGermlineCodingPanel()));
sageArgs.add("-panel_only");
sageArgs.add("-hotspot_min_tumor_qual 50");
sageArgs.add("-panel_min_tumor_qual 75");
sageArgs.add("-hotspot_max_germline_vaf 100");
sageArgs.add("-hotspot_max_germline_rel_raw_base_qual 100");
sageArgs.add("-panel_max_germline_vaf 100");
sageArgs.add("-panel_max_germline_rel_raw_base_qual 100");
sageArgs.add("-mnv_filter_enabled false");
sageArgs.add(String.format("-out %s", sageVcf));
sageArgs.add(String.format("-threads %s", Bash.allCpus()));
startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, sageArgs.toString()));
// Pave germline
String paveJar = String.format("%s/pave/%s/pave.jar", VmDirectories.TOOLS, Versions.PAVE);
final String paveGermlineVcf = String.format("%s/%s.sage.germline.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
StringJoiner paveGermlineArgs = new StringJoiner(" ");
paveGermlineArgs.add(String.format("-sample %s", sampleId));
paveGermlineArgs.add(String.format("-vcf_file %s", sageVcf));
paveGermlineArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
paveGermlineArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
paveGermlineArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
paveGermlineArgs.add("-filter_pass");
paveGermlineArgs.add(String.format("-output_vcf_file %s", paveGermlineVcf));
startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveGermlineArgs.toString()));
// upload output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class SageGermlineOld method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final InputFileDescriptor biopsy = inputs.get("biopsy");
final LocalLocations localInput = new LocalLocations(new RemoteLocationsApi(biopsy));
final String tumorSampleName = localInput.getTumor();
final String referenceSampleName = localInput.getReference();
final String tumorAlignment = localInput.getTumorAlignment();
final String referenceAlignment = localInput.getReferenceAlignment();
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
// Download Inputs
commands.addCommands(localInput.generateDownloadCommands());
return VirtualMachineJobDefinition.sageGermlineCalling(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class SageRerunOld method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final String referenceSampleName = inputs.get("ref_sample").inputValue();
final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
final String localTumorFile = localFilename(remoteTumorFile);
final String localReferenceFile = localFilename(remoteReferenceFile);
final String localTumorBam = CONVERT_TO_BAM ? localTumorFile.replace("cram", "bam") : localTumorFile;
final String localReferenceBam = CONVERT_TO_BAM ? localReferenceFile.replace("cram", "bam") : localReferenceFile;
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
// Download tumor
commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
// Download normal
commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
final SageCommandBuilder sageCommandBuilder = new SageCommandBuilder(resourceFiles).addReference(referenceSampleName, localReferenceBam).addTumor(tumorSampleName, localTumorBam);
if (PANEL_ONLY) {
sageCommandBuilder.panelOnly();
}
if (inputs.contains("rna")) {
final InputFileDescriptor remoteRnaBam = inputs.get("rna");
final InputFileDescriptor remoteRnaBamIndex = remoteRnaBam.index();
final String localRnaBam = localFilename(remoteRnaBam);
// Download rna
commands.addCommand(() -> remoteRnaBam.toCommandForm(localRnaBam));
commands.addCommand(() -> remoteRnaBamIndex.toCommandForm(localFilename(remoteRnaBamIndex)));
// Add to sage application
sageCommandBuilder.addReference(referenceSampleName + "NA", localRnaBam);
}
// Convert to bam if necessary
if (!localTumorFile.equals(localTumorBam)) {
commands.addCommands(cramToBam(localTumorFile));
}
if (!localReferenceFile.equals(localReferenceBam)) {
commands.addCommands(cramToBam(localReferenceFile));
}
SageApplication sageApplication = new SageApplication(sageCommandBuilder);
SageSomaticPostProcess sagePostProcess = new SageSomaticPostProcess(tumorSampleName, resourceFiles);
SubStageInputOutput sageOutput = sageApplication.andThen(sagePostProcess).apply(SubStageInputOutput.empty(tumorSampleName));
commands.addCommands(sageOutput.bash());
// 8. Archive targeted output
final GoogleStorageLocation archiveStorageLocation = sageArchiveDirectory(set);
final OutputFile filteredOutputFile = sageOutput.outputFile();
final OutputFile filteredOutputFileIndex = filteredOutputFile.index(".tbi");
final OutputFile unfilteredOutputFile = sageApplication.apply(SubStageInputOutput.empty(tumorSampleName)).outputFile();
final OutputFile unfilteredOutputFileIndex = unfilteredOutputFile.index(".tbi");
commands.addCommand(() -> filteredOutputFile.copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> filteredOutputFileIndex.copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> unfilteredOutputFile.copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> unfilteredOutputFileIndex.copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> bqrFile(tumorSampleName, "png").copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> bqrFile(tumorSampleName, "tsv").copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> bqrFile(referenceSampleName, "png").copyToRemoteLocation(archiveStorageLocation));
commands.addCommand(() -> bqrFile(referenceSampleName, "tsv").copyToRemoteLocation(archiveStorageLocation));
// Store output
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
return VirtualMachineJobDefinition.sageSomaticCalling(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class RnaIsofox method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String batchInputs = descriptor.inputValue();
final String[] batchItems = batchInputs.split(",");
if (batchItems.length < 2) {
System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
return null;
}
final String sampleId = batchItems[COL_SAMPLE_ID];
final String readLength = batchItems[COL_READ_LENGTH];
final String functionsStr = batchItems.length > COL_FUNCTIONS ? batchItems[COL_FUNCTIONS] : FUNC_TRANSCRIPT_COUNTS + ";" + FUNC_NOVEL_LOCATIONS + ";" + FUNC_FUSIONS;
final RefGenomeVersion refGenomeVersion = batchItems.length > COL_REF_GENOME_VERSION ? RefGenomeVersion.valueOf(batchItems[COL_REF_GENOME_VERSION]) : V37;
final int maxRam = batchItems.length > COL_MAX_RAM ? Integer.parseInt(batchItems[COL_MAX_RAM]) : DEFAULT_MAX_RAM;
final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
// final String rnaCohortDirectory = getRnaCohortDirectory(refGenomeVersion);
final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
// copy down BAM and index file for this sample
final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
// copy down the executable
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
// startupScript.addCommand(() -> format("chmod a+x %s/%s", VmDirectories.TOOLS, ISOFOX_JAR));
// copy down required reference files
// startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s",
// getRnaResourceDirectory(refGenomeVersion, ENSEMBL_DATA_CACHE), VmDirectories.INPUT));
final String expectedCountsFile = readLength.equals(READ_LENGTH_76) ? EXP_COUNTS_READ_76 : EXP_COUNTS_READ_151;
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s", getRnaResourceDirectory(refGenomeVersion, "ensembl_data_cache"), VmDirectories.INPUT));
if (functionsStr.contains(FUNC_TRANSCRIPT_COUNTS)) {
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), expectedCountsFile, VmDirectories.INPUT));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), EXP_GC_COUNTS_READ_100, VmDirectories.INPUT));
}
if (functionsStr.equals(FUNC_FUSIONS)) {
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), COHORT_FUSION_FILE, VmDirectories.INPUT));
}
final String threadCount = Bash.allCpus();
startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
boolean writeExpData = false;
boolean writeCatCountsData = false;
final String neoEpitopeFile = String.format("%s.imu.neo_epitopes.csv", sampleId);
if (functionsStr.contains(FUNC_NEO_EPITOPES)) {
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", NEO_EPITOPE_DIR, neoEpitopeFile, VmDirectories.INPUT));
}
// run Isofox
StringJoiner isofoxArgs = new StringJoiner(" ");
isofoxArgs.add(String.format("-sample %s", sampleId));
isofoxArgs.add(String.format("-functions \"%s\"", functionsStr));
isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
// isofoxArgs.add(String.format("-ensembl_data_dir %s", VmDirectories.INPUT));
isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
isofoxArgs.add(String.format("-long_frag_limit %d", LONG_FRAG_LENGTH_LIMIT));
if (refGenomeVersion == RefGenomeVersion.V38) {
isofoxArgs.add(String.format("-ref_genome_version %s", "38"));
}
if (functionsStr.contains(FUNC_TRANSCRIPT_COUNTS)) {
isofoxArgs.add(String.format("-apply_exp_rates"));
isofoxArgs.add(String.format("-apply_calc_frag_lengths"));
isofoxArgs.add(String.format("-exp_counts_file %s/%s", VmDirectories.INPUT, expectedCountsFile));
isofoxArgs.add(String.format("-frag_length_min_count %d", FRAG_LENGTH_FRAG_COUNT));
isofoxArgs.add(String.format("-apply_gc_bias_adjust"));
isofoxArgs.add(String.format("-exp_gc_ratios_file %s/%s", VmDirectories.INPUT, EXP_GC_COUNTS_READ_100));
isofoxArgs.add(String.format("-apply_map_qual_adjust"));
isofoxArgs.add(String.format("-write_frag_lengths"));
isofoxArgs.add(String.format("-write_gc_data"));
if (writeCatCountsData)
isofoxArgs.add(String.format("-write_trans_combo_data"));
if (writeExpData)
isofoxArgs.add(String.format("-write_exp_rates"));
}
if (functionsStr.equals(FUNC_NOVEL_LOCATIONS)) {
isofoxArgs.add(String.format("-write_splice_sites"));
}
if (functionsStr.contains(FUNC_FUSIONS)) {
isofoxArgs.add(String.format("-known_fusion_file %s", resourceFiles.knownFusionData()));
isofoxArgs.add(String.format("-fusion_cohort_file %s/%s", VmDirectories.INPUT, COHORT_FUSION_FILE));
}
if (functionsStr.equals(FUNC_NEO_EPITOPES)) {
isofoxArgs.add(String.format("-neoepitope_file %s/%s", VmDirectories.INPUT, neoEpitopeFile));
}
isofoxArgs.add(String.format("-threads %s", threadCount));
startupScript.addCommand(() -> format("java -Xmx60G -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
// upload the results
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
if (functionsStr.equals(FUNC_FUSIONS)) {
startupScript.addCommand(() -> format("gsutil -m cp %s/*fusions.csv %s/%s/isofox/", VmDirectories.OUTPUT, samplesDir, sampleId));
} else {
// copy results to rna-analysis location on crunch
startupScript.addCommand(() -> format("gsutil -m cp %s/* %s/%s/isofox/", VmDirectories.OUTPUT, samplesDir, sampleId));
}
return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).performanceProfile(VirtualMachinePerformanceProfile.custom(DEFAULT_CORES, maxRam)).build();
}
Aggregations