Search in sources :

Example 31 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class SambambaCramaBam method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket bucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor input = inputs.get();
    String outputFile = VmDirectories.outputFile(new File(input.inputValue()).getName().replaceAll("\\.bam$", ".cram"));
    String localInput = String.format("%s/%s", VmDirectories.INPUT, new File(input.inputValue()).getName());
    startupScript.addCommand(() -> input.toCommandForm(localInput));
    final RefGenome37ResourceFiles resourceFiles = new RefGenome37ResourceFiles();
    startupScript.addCommand(new VersionedToolCommand("sambamba", "sambamba", Versions.SAMBAMBA, "view", localInput, "-o", outputFile, "-t", Bash.allCpus(), "--format=cram", "-T", resourceFiles.refGenomeFile()));
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "cram"), executionFlags));
    return VirtualMachineJobDefinition.builder().name("cram").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).performanceProfile(VirtualMachinePerformanceProfile.custom(4, 6)).build();
}
Also used : OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenome37ResourceFiles(com.hartwig.pipeline.resource.RefGenome37ResourceFiles) File(java.io.File) VersionedToolCommand(com.hartwig.pipeline.calling.command.VersionedToolCommand)

Example 32 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class LilacCtpacBatch method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    final InputFileDescriptor runData = inputs.get();
    final String batchInputs = runData.inputValue();
    final String[] batchItems = batchInputs.split(",");
    String sampleId = batchItems[0];
    String runDirectory = "run_cptac_02";
    // download pilot Lilac jar
    addLilacDownloadCommands(commands);
    addSampleCommands(runData, commands, runDirectory, sampleId);
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
    // and copy the run log files to a single directory for convenience
    String commonLogDir = String.format("gs://%s/%s/logs/", LILAC_BATCH_BUCKET, runDirectory);
    commands.addCommand(() -> format("gsutil -m cp /data/output/*.log %s", commonLogDir));
    return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor)

Example 33 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class LilacPanelBatch method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs: SampleId,ExpectedAlleles
    final InputFileDescriptor runData = inputs.get();
    final String batchInputs = runData.inputValue();
    final String[] batchItems = batchInputs.split(",");
    String sampleId = batchItems[0];
    // download pilot Lilac jar
    addLilacDownloadCommands(commands);
    String tumorBam = String.format("%s.non_umi_dedup.bam", sampleId);
    commands.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s* %s", PANEL_BAM_BUCKET, tumorBam, VmDirectories.INPUT));
    // build Lilac arguments
    // String sampleOutputDir = String.format("%s/%s/", VmDirectories.OUTPUT, sampleId);
    // commands.addCommand(() -> format("mkdir -p %s", sampleOutputDir));
    // String runDirectory = "run_panel";
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V38);
    StringJoiner lilacArgs = new StringJoiner(" ");
    lilacArgs.add(String.format("-sample %s", sampleId));
    lilacArgs.add(String.format("-reference_bam %s/%s", VmDirectories.INPUT, tumorBam));
    lilacArgs.add(String.format("-resource_dir %s/", VmDirectories.INPUT));
    lilacArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    lilacArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
    lilacArgs.add(String.format("-output_dir %s", VmDirectories.OUTPUT));
    lilacArgs.add("-write_all_files");
    lilacArgs.add(String.format("-threads %s", Bash.allCpus()));
    String lilacJar = String.format("%s/%s", VmDirectories.TOOLS, LILAC_JAR);
    // String lilacJar = String.format("%s/lilac/%s/lilac.jar", VmDirectories.TOOLS, Versions.LILAC);
    commands.addCommand(() -> format("java -Xmx%s -jar %s %s", MAX_HEAP, lilacJar, lilacArgs.toString()));
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).performanceProfile(custom(12, 32)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) StringJoiner(java.util.StringJoiner)

Example 34 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class LilacPcawgBatch method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs: SampleId,ExpectedAlleles
    final InputFileDescriptor runData = inputs.get();
    final String batchInputs = runData.inputValue();
    final String[] batchItems = batchInputs.split(",");
    String sampleId = batchItems[0];
    String runDirectory = "run_pcawg_02";
    // download pilot Lilac jar
    addLilacDownloadCommands(commands);
    addSampleCommands(runData, commands, runDirectory, sampleId);
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "lilac"), executionFlags));
    // and copy the run log files to a single directory for convenience
    String commonLogDir = String.format("gs://%s/%s/logs/", LILAC_BATCH_BUCKET, runDirectory);
    commands.addCommand(() -> format("gsutil -m cp /data/output/*.log %s", commonLogDir));
    return ImmutableVirtualMachineJobDefinition.builder().name("lilac").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor)

Example 35 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class PurpleRerun method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    final InputFileDescriptor biopsy = inputs.get("biopsy");
    final RemoteLocationsApi storageLocations = new RemoteLocationsApi(biopsy);
    commands.addCommands(bashCommands(storageLocations));
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "purple"), executionFlags));
    return VirtualMachineJobDefinition.purple(commands, ResultsDirectory.defaultDirectory());
}
Also used : OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Aggregations

InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)36 OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)35 ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)23 StringJoiner (java.util.StringJoiner)12 RemoteLocationsApi (com.hartwig.batch.api.RemoteLocationsApi)8 GoogleStorageLocation (com.hartwig.pipeline.storage.GoogleStorageLocation)7 VersionedToolCommand (com.hartwig.pipeline.calling.command.VersionedToolCommand)5 RefGenomeVersion (com.hartwig.pipeline.resource.RefGenomeVersion)5 CopyLogToOutput (com.hartwig.pipeline.execution.vm.CopyLogToOutput)4 ResourceFilesFactory.buildResourceFiles (com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles)4 SubStageInputOutput (com.hartwig.pipeline.stages.SubStageInputOutput)4 File (java.io.File)4 BwaCommand (com.hartwig.pipeline.calling.command.BwaCommand)3 SamtoolsCommand (com.hartwig.pipeline.calling.command.SamtoolsCommand)3 OutputFile (com.hartwig.pipeline.execution.vm.OutputFile)3 ExportPathCommand (com.hartwig.pipeline.execution.vm.unix.ExportPathCommand)3 LocalLocations (com.hartwig.batch.api.LocalLocations)2 SageApplication (com.hartwig.pipeline.calling.sage.SageApplication)2 SageCommandBuilder (com.hartwig.pipeline.calling.sage.SageCommandBuilder)2 GridssAnnotation (com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)2