use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class RnaIsofox method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String batchInputs = descriptor.inputValue();
final String[] batchItems = batchInputs.split(",");
if (batchItems.length < 2) {
System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
return null;
}
final String sampleId = batchItems[COL_SAMPLE_ID];
final String readLength = batchItems[COL_READ_LENGTH];
final String functionsStr = batchItems.length > COL_FUNCTIONS ? batchItems[COL_FUNCTIONS] : FUNC_TRANSCRIPT_COUNTS + ";" + FUNC_NOVEL_LOCATIONS + ";" + FUNC_FUSIONS;
final RefGenomeVersion refGenomeVersion = batchItems.length > COL_REF_GENOME_VERSION ? RefGenomeVersion.valueOf(batchItems[COL_REF_GENOME_VERSION]) : V37;
final int maxRam = batchItems.length > COL_MAX_RAM ? Integer.parseInt(batchItems[COL_MAX_RAM]) : DEFAULT_MAX_RAM;
final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
// final String rnaCohortDirectory = getRnaCohortDirectory(refGenomeVersion);
final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
// copy down BAM and index file for this sample
final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
// copy down the executable
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
// startupScript.addCommand(() -> format("chmod a+x %s/%s", VmDirectories.TOOLS, ISOFOX_JAR));
// copy down required reference files
// startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s",
// getRnaResourceDirectory(refGenomeVersion, ENSEMBL_DATA_CACHE), VmDirectories.INPUT));
final String expectedCountsFile = readLength.equals(READ_LENGTH_76) ? EXP_COUNTS_READ_76 : EXP_COUNTS_READ_151;
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/* %s", getRnaResourceDirectory(refGenomeVersion, "ensembl_data_cache"), VmDirectories.INPUT));
if (functionsStr.contains(FUNC_TRANSCRIPT_COUNTS)) {
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), expectedCountsFile, VmDirectories.INPUT));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), EXP_GC_COUNTS_READ_100, VmDirectories.INPUT));
}
if (functionsStr.equals(FUNC_FUSIONS)) {
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", getRnaResourceDirectory(refGenomeVersion, ISOFOX), COHORT_FUSION_FILE, VmDirectories.INPUT));
}
final String threadCount = Bash.allCpus();
startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
boolean writeExpData = false;
boolean writeCatCountsData = false;
final String neoEpitopeFile = String.format("%s.imu.neo_epitopes.csv", sampleId);
if (functionsStr.contains(FUNC_NEO_EPITOPES)) {
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", NEO_EPITOPE_DIR, neoEpitopeFile, VmDirectories.INPUT));
}
// run Isofox
StringJoiner isofoxArgs = new StringJoiner(" ");
isofoxArgs.add(String.format("-sample %s", sampleId));
isofoxArgs.add(String.format("-functions \"%s\"", functionsStr));
isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
// isofoxArgs.add(String.format("-ensembl_data_dir %s", VmDirectories.INPUT));
isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
isofoxArgs.add(String.format("-long_frag_limit %d", LONG_FRAG_LENGTH_LIMIT));
if (refGenomeVersion == RefGenomeVersion.V38) {
isofoxArgs.add(String.format("-ref_genome_version %s", "38"));
}
if (functionsStr.contains(FUNC_TRANSCRIPT_COUNTS)) {
isofoxArgs.add(String.format("-apply_exp_rates"));
isofoxArgs.add(String.format("-apply_calc_frag_lengths"));
isofoxArgs.add(String.format("-exp_counts_file %s/%s", VmDirectories.INPUT, expectedCountsFile));
isofoxArgs.add(String.format("-frag_length_min_count %d", FRAG_LENGTH_FRAG_COUNT));
isofoxArgs.add(String.format("-apply_gc_bias_adjust"));
isofoxArgs.add(String.format("-exp_gc_ratios_file %s/%s", VmDirectories.INPUT, EXP_GC_COUNTS_READ_100));
isofoxArgs.add(String.format("-apply_map_qual_adjust"));
isofoxArgs.add(String.format("-write_frag_lengths"));
isofoxArgs.add(String.format("-write_gc_data"));
if (writeCatCountsData)
isofoxArgs.add(String.format("-write_trans_combo_data"));
if (writeExpData)
isofoxArgs.add(String.format("-write_exp_rates"));
}
if (functionsStr.equals(FUNC_NOVEL_LOCATIONS)) {
isofoxArgs.add(String.format("-write_splice_sites"));
}
if (functionsStr.contains(FUNC_FUSIONS)) {
isofoxArgs.add(String.format("-known_fusion_file %s", resourceFiles.knownFusionData()));
isofoxArgs.add(String.format("-fusion_cohort_file %s/%s", VmDirectories.INPUT, COHORT_FUSION_FILE));
}
if (functionsStr.equals(FUNC_NEO_EPITOPES)) {
isofoxArgs.add(String.format("-neoepitope_file %s/%s", VmDirectories.INPUT, neoEpitopeFile));
}
isofoxArgs.add(String.format("-threads %s", threadCount));
startupScript.addCommand(() -> format("java -Xmx60G -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
// upload the results
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
if (functionsStr.equals(FUNC_FUSIONS)) {
startupScript.addCommand(() -> format("gsutil -m cp %s/*fusions.csv %s/%s/isofox/", VmDirectories.OUTPUT, samplesDir, sampleId));
} else {
// copy results to rna-analysis location on crunch
startupScript.addCommand(() -> format("gsutil -m cp %s/* %s/%s/isofox/", VmDirectories.OUTPUT, samplesDir, sampleId));
}
return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).performanceProfile(VirtualMachinePerformanceProfile.custom(DEFAULT_CORES, maxRam)).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class RnaIsofoxUnmapped method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String batchInputs = descriptor.inputValue();
final String[] batchItems = batchInputs.split(",");
if (batchItems.length < 2) {
System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
return null;
}
final String sampleId = batchItems[COL_SAMPLE_ID];
final RefGenomeVersion refGenomeVersion = V37;
final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
// copy down BAM and index file for this sample
final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
// copy down the executable
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
// run Isofox
StringJoiner isofoxArgs = new StringJoiner(" ");
isofoxArgs.add(String.format("-sample %s", sampleId));
isofoxArgs.add(String.format("-functions UNMAPPED_READS"));
isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
final String threadCount = Bash.allCpus();
isofoxArgs.add(String.format("-threads %s", threadCount));
startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
// upload the results
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class AmberRerunTumorOnly method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
final String localTumorFile = localFilename(remoteTumorFile);
// Download tumor
commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
commands.addCommand(() -> AmberCommandBuilder.newBuilder(resourceFiles).tumor(tumorSampleName, localTumorFile).build().asBash());
// Store output
final GoogleStorageLocation archiveStorageLocation = amberArchiveDirectory(set);
commands.addCommand(new CopyLogToOutput(executionFlags.log(), "run.log"));
commands.addCommand(new OutputUpload(archiveStorageLocation));
return VirtualMachineJobDefinition.amber(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class Bam2Fastq method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
String localCopyOfBam = format("%s/%s", VmDirectories.INPUT, new File(descriptor.inputValue()).getName());
startupScript.addCommand(() -> descriptor.toCommandForm(localCopyOfBam));
startupScript.addCommand(new PipeCommands(new SambambaCommand("view", "-H", localCopyOfBam), () -> "grep ^@RG", () -> "grep -cP \"_L00[1-8]_\""));
List<String> picargs = ImmutableList.of("SamToFastq", "ODIR=" + VmDirectories.OUTPUT, "OPRG=true", "RGT=ID", "NON_PF=true", "RC=true", "I=" + localCopyOfBam);
startupScript.addCommand(new JavaJarCommand("picard", "2.18.27", "picard.jar", "16G", picargs));
startupScript.addCommand(() -> format("rename 's/(.+)_(.+)_(.+)_(.+)_(.+)__(.+)\\.fastq/$1_$2_$3_$4_R$6_$5.fastq/' %s/*.fastq", VmDirectories.OUTPUT));
startupScript.addCommand(() -> format("pigz %s/*.fastq", VmDirectories.OUTPUT));
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "bam2fastq"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("bam2fastq").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(1800).performanceProfile(VirtualMachinePerformanceProfile.custom(4, 20)).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class CobaltRerun method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final String referenceSampleName = inputs.get("ref_sample").inputValue();
final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
final String localTumorFile = localFilename(remoteTumorFile);
final String localReferenceFile = localFilename(remoteReferenceFile);
// Download tumor
commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
// Download normal
commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
commands.addCommand(() -> CobaltCommandBuilder.newBuilder(resourceFiles).reference(referenceSampleName, localReferenceFile).tumor(tumorSampleName, localTumorFile).build().asBash());
// Store output
final GoogleStorageLocation archiveStorageLocation = cobaltArchiveDirectory(set);
commands.addCommand(new CopyLogToOutput(executionFlags.log(), "run.log"));
commands.addCommand(new OutputUpload(archiveStorageLocation));
return VirtualMachineJobDefinition.amber(commands, ResultsDirectory.defaultDirectory());
}
Aggregations