use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class SambambaCramaBam method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket bucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor input = inputs.get();
String outputFile = VmDirectories.outputFile(new File(input.inputValue()).getName().replaceAll("\\.bam$", ".cram"));
String localInput = String.format("%s/%s", VmDirectories.INPUT, new File(input.inputValue()).getName());
startupScript.addCommand(() -> input.toCommandForm(localInput));
final RefGenome37ResourceFiles resourceFiles = new RefGenome37ResourceFiles();
startupScript.addCommand(new VersionedToolCommand("sambamba", "sambamba", Versions.SAMBAMBA, "view", localInput, "-o", outputFile, "-t", Bash.allCpus(), "--format=cram", "-T", resourceFiles.refGenomeFile()));
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "cram"), executionFlags));
return VirtualMachineJobDefinition.builder().name("cram").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).performanceProfile(VirtualMachinePerformanceProfile.custom(4, 6)).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class LilacCtpacBatch method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
final InputFileDescriptor runData = inputs.get();
final String batchInputs = runData.inputValue();
final String[] batchItems = batchInputs.split(",");
String sampleId = batchItems[0];
String runDirectory = "run_cptac_02";
// download pilot Lilac jar
addLilacDownloadCommands(commands);
addSampleCommands(runData, commands, runDirectory, sampleId);
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
// and copy the run log files to a single directory for convenience
String commonLogDir = String.format("gs://%s/%s/logs/", LILAC_BATCH_BUCKET, runDirectory);
commands.addCommand(() -> format("gsutil -m cp /data/output/*.log %s", commonLogDir));
return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class LilacPanelBatch method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs: SampleId,ExpectedAlleles
final InputFileDescriptor runData = inputs.get();
final String batchInputs = runData.inputValue();
final String[] batchItems = batchInputs.split(",");
String sampleId = batchItems[0];
// download pilot Lilac jar
addLilacDownloadCommands(commands);
String tumorBam = String.format("%s.non_umi_dedup.bam", sampleId);
commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s* %s", PANEL_BAM_BUCKET, tumorBam, VmDirectories.INPUT));
// build Lilac arguments
// String sampleOutputDir = String.format("%s/%s/", VmDirectories.OUTPUT, sampleId);
// commands.addCommand(() -> format("mkdir -p %s", sampleOutputDir));
// String runDirectory = "run_panel";
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V38);
StringJoiner lilacArgs = new StringJoiner(" ");
lilacArgs.add(String.format("-sample %s", sampleId));
lilacArgs.add(String.format("-reference_bam %s/%s", VmDirectories.INPUT, tumorBam));
lilacArgs.add(String.format("-resource_dir %s/", VmDirectories.INPUT));
lilacArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
lilacArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
lilacArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
lilacArgs.add("-write_all_files");
lilacArgs.add(String.format("-threads %s", Bash.allCpus()));
String lilacJar = String.format("%s/%s", VmDirectories.TOOLS, LILAC_JAR);
// String lilacJar = String.format("%s/lilac/%s/lilac.jar", VmDirectories.TOOLS, Versions.LILAC);
commands.addCommand(() -> format("java -Xmx%s -jar %s %s", MAX_HEAP, lilacJar, lilacArgs.toString()));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).performanceProfile(custom(12, 32)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class LilacPcawgBatch method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs: SampleId,ExpectedAlleles
final InputFileDescriptor runData = inputs.get();
final String batchInputs = runData.inputValue();
final String[] batchItems = batchInputs.split(",");
String sampleId = batchItems[0];
String runDirectory = "run_pcawg_02";
// download pilot Lilac jar
addLilacDownloadCommands(commands);
addSampleCommands(runData, commands, runDirectory, sampleId);
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
// and copy the run log files to a single directory for convenience
String commonLogDir = String.format("gs://%s/%s/logs/", LILAC_BATCH_BUCKET, runDirectory);
commands.addCommand(() -> format("gsutil -m cp /data/output/*.log %s", commonLogDir));
return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.
the class PurpleRerun method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
final InputFileDescriptor biopsy = inputs.get("biopsy");
final RemoteLocationsApi storageLocations = new RemoteLocationsApi(biopsy);
commands.addCommands(bashCommands(storageLocations));
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "purple"), executionFlags));
return VirtualMachineJobDefinition.purple(commands, ResultsDirectory.defaultDirectory());
}
Aggregations