Search in sources :

Example 6 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class GridssRerun method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    final String set = inputs.get("set").inputValue();
    final String tumorSampleName = inputs.get("tumor_sample").inputValue();
    final String referenceSampleName = inputs.get("reference_sample").inputValue();
    final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
    final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
    final InputFileDescriptor runData = inputs.get();
    final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), tumorSampleName);
    InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
    InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
    InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
    InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
    final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
    final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
    final String localTumorFile = localFilename(remoteTumorFile);
    final String localReferenceFile = localFilename(remoteReferenceFile);
    final String tumorBamPath = localTumorFile.replace("cram", "bam");
    final String refBamPath = localReferenceFile.replace("cram", "bam");
    Driver driver = new Driver(resourceFiles, VmDirectories.outputFile(tumorSampleName + ".assembly.bam")).tumorSample(tumorSampleName, tumorBamPath).referenceSample(referenceSampleName, refBamPath);
    GridssAnnotation viralAnnotation = new GridssAnnotation(resourceFiles, false);
    SubStageInputOutput unfilteredVcfOutput = driver.andThen(viralAnnotation).apply(SubStageInputOutput.empty(tumorSampleName));
    final OutputFile unfilteredVcf = unfilteredVcfOutput.outputFile();
    final OutputFile unfilteredVcfIndex = unfilteredVcf.index(".tbi");
    final GoogleStorageLocation unfilteredVcfRemoteLocation = remoteUnfilteredVcfArchivePath(set, tumorSampleName);
    final GoogleStorageLocation unfilteredVcfIndexRemoteLocation = index(unfilteredVcfRemoteLocation, ".tbi");
    // COMMANDS
    commands.addCommand(new ExportPathCommand(new BwaCommand()));
    commands.addCommand(new ExportPathCommand(new SamtoolsCommand()));
    commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
    commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
    commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
    commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
    if (!localTumorFile.equals(tumorBamPath)) {
        commands.addCommands(cramToBam(localTumorFile));
    }
    if (!localReferenceFile.equals(refBamPath)) {
        commands.addCommands(cramToBam(localReferenceFile));
    }
    commands.addCommands(unfilteredVcfOutput.bash());
    commands.addCommand(() -> unfilteredVcf.copyToRemoteLocation(unfilteredVcfRemoteLocation));
    commands.addCommand(() -> unfilteredVcfIndex.copyToRemoteLocation(unfilteredVcfIndexRemoteLocation));
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gridss"), executionFlags));
    return VirtualMachineJobDefinition.structuralCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : OutputFile(com.hartwig.pipeline.execution.vm.OutputFile) ExportPathCommand(com.hartwig.pipeline.execution.vm.unix.ExportPathCommand) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) Driver(com.hartwig.pipeline.calling.structural.gridss.stage.Driver) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) GridssAnnotation(com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi) BwaCommand(com.hartwig.pipeline.calling.command.BwaCommand) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) SamtoolsCommand(com.hartwig.pipeline.calling.command.SamtoolsCommand) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputDownload(com.hartwig.pipeline.execution.vm.InputDownload) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation)

Example 7 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class GripssPurpleLinx method execute.

// private static String PON_BP = "gridss_pon_breakpoint.37.sorted.bedpe";
// private static String PON_BE = "gridss_pon_single_breakend.37.sorted.bed";
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String[] inputArguments = descriptor.inputValue().split(",");
    final List<String> sampleIds = Arrays.stream(inputArguments[0].split(";")).collect(Collectors.toList());
    Map<String, SampleLocationData> sampleLocations = null;
    if (inputArguments.length > 1) {
        sampleLocations = SampleLocationData.loadSampleLocations(inputArguments[1], sampleIds);
    } else {
        sampleLocations = Maps.newHashMap();
    }
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    // download required JARs and resources
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, GRIPSS_DIR, GRIPSS_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, PURPLE_DIR, PURPLE_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, LINX_DIR, LINX_JAR, VmDirectories.TOOLS));
    for (String sampleId : sampleIds) {
        runSample(startupScript, resourceFiles, sampleId, sampleLocations);
    }
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gpl"), executionFlags));
    // and copy the key output files to a single directory for convenience
    String gripssCombined = String.format("%s/gripss/", COMBINED_OUTPUT_DIR);
    String linxCombined = String.format("%s/linx/", COMBINED_OUTPUT_DIR);
    String purpleCombined = String.format("%s/purple/", COMBINED_OUTPUT_DIR);
    String paveCombined = String.format("%s/pave/", COMBINED_OUTPUT_DIR);
    startupScript.addCommand(() -> format("gsutil -m cp %s/*gripss*vcf* %s", VmDirectories.OUTPUT, gripssCombined));
    startupScript.addCommand(() -> format("gsutil -m cp %s/*sage.somatic.filtered.pave.vcf.gz* %s", VmDirectories.OUTPUT, paveCombined));
    startupScript.addCommand(() -> format("gsutil -m cp %s/*sage.germline.filtered.pave.vcf.gz* %s", VmDirectories.OUTPUT, paveCombined));
    // select files for subsequent Linx runs and/or comparison using Compar
    startupScript.addCommand(() -> format("gsutil -m cp %s/*linx*.tsv %s", VmDirectories.OUTPUT, linxCombined));
    startupScript.addCommand(() -> format("gsutil -m cp %s/*purple* %s", VmDirectories.OUTPUT, purpleCombined));
    startupScript.addCommand(() -> format("gsutil -m cp %s/*driver.catalog* %s", VmDirectories.OUTPUT, purpleCombined));
    return ImmutableVirtualMachineJobDefinition.builder().name("gpl").startupCommand(startupScript).performanceProfile(custom(12, 32)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) SampleLocationData(com.hartwig.batch.utils.SampleLocationData)

Example 8 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class SageGermline method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String sampleId = descriptor.inputValue();
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
    final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
    String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
    String tumorCramFile = tumorCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    String referenceId = locations.getReference();
    String[] refCramData = getCramFileData(locations.getReferenceAlignment());
    String refCramFile = refCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    // download tumor CRAM
    String localTumorCram = String.format("%s/%s", VmDirectories.INPUT, tumorCramFile);
    String localRefCram = String.format("%s/%s", VmDirectories.INPUT, refCramFile);
    final String sageVcf = String.format("%s/%s.sage.germline.vcf.gz", VmDirectories.OUTPUT, sampleId);
    final StringJoiner sageArgs = new StringJoiner(" ");
    // not the switch on samples
    sageArgs.add(String.format("-tumor %s", referenceId));
    sageArgs.add(String.format("-tumor_bam %s", localRefCram));
    sageArgs.add(String.format("-reference %s", sampleId));
    sageArgs.add(String.format("-reference_bam %s", localTumorCram));
    sageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
    sageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    sageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
    sageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    sageArgs.add(String.format("-hotspots %s", resourceFiles.sageGermlineHotspots()));
    sageArgs.add(String.format("-panel_bed %s", resourceFiles.sageGermlineCodingPanel()));
    sageArgs.add("-panel_only");
    sageArgs.add("-hotspot_min_tumor_qual 50");
    sageArgs.add("-panel_min_tumor_qual 75");
    sageArgs.add("-hotspot_max_germline_vaf 100");
    sageArgs.add("-hotspot_max_germline_rel_raw_base_qual 100");
    sageArgs.add("-panel_max_germline_vaf 100");
    sageArgs.add("-panel_max_germline_rel_raw_base_qual 100");
    sageArgs.add("-mnv_filter_enabled false");
    sageArgs.add(String.format("-out %s", sageVcf));
    sageArgs.add(String.format("-threads %s", Bash.allCpus()));
    startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, sageArgs.toString()));
    // Pave germline
    String paveJar = String.format("%s/pave/%s/pave.jar", VmDirectories.TOOLS, Versions.PAVE);
    final String paveGermlineVcf = String.format("%s/%s.sage.germline.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
    StringJoiner paveGermlineArgs = new StringJoiner(" ");
    paveGermlineArgs.add(String.format("-sample %s", sampleId));
    paveGermlineArgs.add(String.format("-vcf_file %s", sageVcf));
    paveGermlineArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    paveGermlineArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
    paveGermlineArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    paveGermlineArgs.add("-filter_pass");
    paveGermlineArgs.add(String.format("-output_vcf_file %s", paveGermlineVcf));
    startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveGermlineArgs.toString()));
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 9 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class SageGermlineOld method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final InputFileDescriptor biopsy = inputs.get("biopsy");
    final LocalLocations localInput = new LocalLocations(new RemoteLocationsApi(biopsy));
    final String tumorSampleName = localInput.getTumor();
    final String referenceSampleName = localInput.getReference();
    final String tumorAlignment = localInput.getTumorAlignment();
    final String referenceAlignment = localInput.getReferenceAlignment();
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    // Download Inputs
    commands.addCommands(localInput.generateDownloadCommands());
    return VirtualMachineJobDefinition.sageGermlineCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) LocalLocations(com.hartwig.batch.api.LocalLocations) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 10 with InputFileDescriptor

use of com.hartwig.batch.input.InputFileDescriptor in project pipeline5 by hartwigmedical.

the class SageRerunOld method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final String set = inputs.get("set").inputValue();
    final String tumorSampleName = inputs.get("tumor_sample").inputValue();
    final String referenceSampleName = inputs.get("ref_sample").inputValue();
    final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
    final InputFileDescriptor remoteReferenceFile = inputs.get("ref_cram");
    final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
    final InputFileDescriptor remoteReferenceIndex = remoteReferenceFile.index();
    final String localTumorFile = localFilename(remoteTumorFile);
    final String localReferenceFile = localFilename(remoteReferenceFile);
    final String localTumorBam = CONVERT_TO_BAM ? localTumorFile.replace("cram", "bam") : localTumorFile;
    final String localReferenceBam = CONVERT_TO_BAM ? localReferenceFile.replace("cram", "bam") : localReferenceFile;
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    // Download tumor
    commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
    commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
    // Download normal
    commands.addCommand(() -> remoteReferenceFile.toCommandForm(localReferenceFile));
    commands.addCommand(() -> remoteReferenceIndex.toCommandForm(localFilename(remoteReferenceIndex)));
    final SageCommandBuilder sageCommandBuilder = new SageCommandBuilder(resourceFiles).addReference(referenceSampleName, localReferenceBam).addTumor(tumorSampleName, localTumorBam);
    if (PANEL_ONLY) {
        sageCommandBuilder.panelOnly();
    }
    if (inputs.contains("rna")) {
        final InputFileDescriptor remoteRnaBam = inputs.get("rna");
        final InputFileDescriptor remoteRnaBamIndex = remoteRnaBam.index();
        final String localRnaBam = localFilename(remoteRnaBam);
        // Download rna
        commands.addCommand(() -> remoteRnaBam.toCommandForm(localRnaBam));
        commands.addCommand(() -> remoteRnaBamIndex.toCommandForm(localFilename(remoteRnaBamIndex)));
        // Add to sage application
        sageCommandBuilder.addReference(referenceSampleName + "NA", localRnaBam);
    }
    // Convert to bam if necessary
    if (!localTumorFile.equals(localTumorBam)) {
        commands.addCommands(cramToBam(localTumorFile));
    }
    if (!localReferenceFile.equals(localReferenceBam)) {
        commands.addCommands(cramToBam(localReferenceFile));
    }
    SageApplication sageApplication = new SageApplication(sageCommandBuilder);
    SageSomaticPostProcess sagePostProcess = new SageSomaticPostProcess(tumorSampleName, resourceFiles);
    SubStageInputOutput sageOutput = sageApplication.andThen(sagePostProcess).apply(SubStageInputOutput.empty(tumorSampleName));
    commands.addCommands(sageOutput.bash());
    // 8. Archive targeted output
    final GoogleStorageLocation archiveStorageLocation = sageArchiveDirectory(set);
    final OutputFile filteredOutputFile = sageOutput.outputFile();
    final OutputFile filteredOutputFileIndex = filteredOutputFile.index(".tbi");
    final OutputFile unfilteredOutputFile = sageApplication.apply(SubStageInputOutput.empty(tumorSampleName)).outputFile();
    final OutputFile unfilteredOutputFileIndex = unfilteredOutputFile.index(".tbi");
    commands.addCommand(() -> filteredOutputFile.copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> filteredOutputFileIndex.copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> unfilteredOutputFile.copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> unfilteredOutputFileIndex.copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> bqrFile(tumorSampleName, "png").copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> bqrFile(tumorSampleName, "tsv").copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> bqrFile(referenceSampleName, "png").copyToRemoteLocation(archiveStorageLocation));
    commands.addCommand(() -> bqrFile(referenceSampleName, "tsv").copyToRemoteLocation(archiveStorageLocation));
    // Store output
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
    return VirtualMachineJobDefinition.sageSomaticCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : OutputFile(com.hartwig.pipeline.execution.vm.OutputFile) ImmutableOutputFile(com.hartwig.pipeline.execution.vm.ImmutableOutputFile) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) SageSomaticPostProcess(com.hartwig.pipeline.calling.sage.SageSomaticPostProcess) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) SageCommandBuilder(com.hartwig.pipeline.calling.sage.SageCommandBuilder) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation) SageApplication(com.hartwig.pipeline.calling.sage.SageApplication)

Aggregations

InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)36 OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)35 ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)23 StringJoiner (java.util.StringJoiner)12 RemoteLocationsApi (com.hartwig.batch.api.RemoteLocationsApi)8 GoogleStorageLocation (com.hartwig.pipeline.storage.GoogleStorageLocation)7 VersionedToolCommand (com.hartwig.pipeline.calling.command.VersionedToolCommand)5 RefGenomeVersion (com.hartwig.pipeline.resource.RefGenomeVersion)5 CopyLogToOutput (com.hartwig.pipeline.execution.vm.CopyLogToOutput)4 ResourceFilesFactory.buildResourceFiles (com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles)4 SubStageInputOutput (com.hartwig.pipeline.stages.SubStageInputOutput)4 File (java.io.File)4 BwaCommand (com.hartwig.pipeline.calling.command.BwaCommand)3 SamtoolsCommand (com.hartwig.pipeline.calling.command.SamtoolsCommand)3 OutputFile (com.hartwig.pipeline.execution.vm.OutputFile)3 ExportPathCommand (com.hartwig.pipeline.execution.vm.unix.ExportPathCommand)3 LocalLocations (com.hartwig.batch.api.LocalLocations)2 SageApplication (com.hartwig.pipeline.calling.sage.SageApplication)2 SageCommandBuilder (com.hartwig.pipeline.calling.sage.SageCommandBuilder)2 GridssAnnotation (com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)2