Search in sources :

Example 6 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class SageGermlineOld method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final InputFileDescriptor biopsy = inputs.get("biopsy");
    final LocalLocations localInput = new LocalLocations(new RemoteLocationsApi(biopsy));
    final String tumorSampleName = localInput.getTumor();
    final String referenceSampleName = localInput.getReference();
    final String tumorAlignment = localInput.getTumorAlignment();
    final String referenceAlignment = localInput.getReferenceAlignment();
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    // Download Inputs
    commands.addCommands(localInput.generateDownloadCommands());
    return VirtualMachineJobDefinition.sageGermlineCalling(commands, ResultsDirectory.defaultDirectory());
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) LocalLocations(com.hartwig.batch.api.LocalLocations) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 7 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class GcpSampleDataExtractor method extractSampleLocations.

private void extractSampleLocations(final String sampleId) {
    try {
        final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
        SampleLocationData sampleLocations = SampleLocationData.fromRemoteLocationsApi(sampleId, locations);
        mWriter.write(sampleLocations.csvData());
        mWriter.newLine();
    } catch (Exception e) {
        LOGGER.severe(String.format("failed to write to sample(%s) GCP locations data: %s", sampleId, e.toString()));
    }
}
Also used : IOException(java.io.IOException) ParseException(org.apache.commons.cli.ParseException) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 8 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class GripssGermline method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String sampleId = descriptor.inputValue();
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    final LocalLocations inputFileFactory = new LocalLocations(new RemoteLocationsApi(descriptor.billedProject(), sampleId));
    final String referenceId = inputFileFactory.getReference();
    final String inputVcf = inputFileFactory.getStructuralVariantsGridss();
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, GRIPSS_DIR, GRIPSS_JAR, VmDirectories.TOOLS));
    startupScript.addCommands(inputFileFactory.generateDownloadCommands());
    // run GRIPSS
    final String outputVcf1 = String.format("%s/%s.gripss.vcf.gz", VmDirectories.OUTPUT, referenceId);
    final StringJoiner gripssArgs = new StringJoiner(" ");
    gripssArgs.add(String.format("-tumor %s", referenceId));
    gripssArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    gripssArgs.add(String.format("-breakpoint_hotspot %s", resourceFiles.knownFusionPairBedpe()));
    gripssArgs.add(String.format("-breakend_pon %s", resourceFiles.gridssBreakendPon()));
    gripssArgs.add(String.format("-breakpoint_pon %s", resourceFiles.gridssBreakpointPon()));
    gripssArgs.add(String.format("-pon_distance %d", 4));
    gripssArgs.add(String.format("-min_qual_break_end %d", 400));
    gripssArgs.add(String.format("-min_qual_rescue_mobile_element_insertion %d", 400));
    gripssArgs.add(String.format("-min_qual_break_point %d", 250));
    gripssArgs.add(String.format("-input_vcf %s", inputVcf));
    gripssArgs.add(String.format("-output_vcf %s", outputVcf1));
    startupScript.addCommand(() -> format("java -Xmx%s -cp %s/%s com.hartwig.hmftools.gripsskt.GripssApplicationKt %s", MAX_HEAP, VmDirectories.TOOLS, GRIPSS_JAR, gripssArgs.toString()));
    final String outputVcf2 = String.format("%s/%s.gripss.filtered.vcf.gz", VmDirectories.OUTPUT, referenceId);
    final StringJoiner gripss2Args = new StringJoiner(" ");
    gripss2Args.add(String.format("-input_vcf %s", outputVcf1));
    gripss2Args.add(String.format("-output_vcf %s", outputVcf2));
    startupScript.addCommand(() -> format("java -Xmx%s -cp %s/%s com.hartwig.hmftools.gripsskt.GripssHardFilterApplicationKt %s", MAX_HEAP, VmDirectories.TOOLS, GRIPSS_JAR, gripss2Args.toString()));
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gripss"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("gripss").startupCommand(startupScript).performanceProfile(custom(8, 30)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) LocalLocations(com.hartwig.batch.api.LocalLocations) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 9 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class TeloBatch method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    final String sampleId = inputs.get("sampleId").inputValue();
    Optional<String> specificChromosome = Optional.empty();
    try {
        specificChromosome = Optional.of(inputs.get("specificChromosome").inputValue());
    } catch (IllegalArgumentException ignored) {
    }
    final InputFileDescriptor runData = inputs.get();
    final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), sampleId);
    // download the telo.jar
    // InputDownload teloJarDownload = new InputDownload(GoogleStorageLocation.of(teloToolsBucket, teloToolsPath + "/telo.jar"), VmDirectories.TOOLS);
    // InputDownload teloJarDownload = downloadExperimentalVersion();
    // commands.addCommand(teloJarDownload);
    commands.addCommand(downloadExperimentalVersion());
    /*() -> format("gsutil -u hmf-crunch cp gs://%s/%s/%s %s",
                COMMON_RESOURCES, TELO_DIR, TELO_JAR, VmDirectories.TOOLS));*/
    // ref genome
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
    InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
    // download the tumour and reference bam / index files
    commands.addCommand(tumorBamDownload);
    commands.addCommand(tumorBamIndexDownload);
    commands.addCommand(makeTeloRunCommand(sampleId, "somatic", tumorBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
    // delete the tumor bam file to save disk space
    commands.addCommand(() -> format("rm -f %s", tumorBamDownload.getLocalTargetPath()));
    commands.addCommand(() -> format("rm -f %s", tumorBamIndexDownload.getLocalTargetPath()));
    InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
    InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
    commands.addCommand(referenceBamDownload);
    commands.addCommand(referenceBamIndexDownload);
    commands.addCommand(makeTeloRunCommand(sampleId, "germline", referenceBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
    // JavaJarCommand jarCommand = new JavaJarCommand("telo", TELO_VERSION, "telo.jar", "16G", teloArgs);
    // commands.addCommand(jarCommand);
    // Store output
    commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), sampleId), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("telo").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(500).performanceProfile(VirtualMachinePerformanceProfile.custom(16, MEMORY_GB)).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) InputDownload(com.hartwig.pipeline.execution.vm.InputDownload) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Example 10 with RemoteLocationsApi

use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.

the class SageRerun method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String sampleId = descriptor.inputValue();
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, PAVE_DIR, PAVE_JAR, VmDirectories.TOOLS));
    String ponFile = "SageGermlinePon.1000x.37.tsv.gz";
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_RESOURCE_BUCKET, SAGE_DIR, ponFile, VmDirectories.INPUT));
    // download tumor and ref CRAM
    final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
    String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
    String tumorCramFile = tumorCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    String referenceId = locations.getReference();
    String[] refCramData = getCramFileData(locations.getReferenceAlignment());
    String refCramFile = refCramData[CRAM_FILENAME];
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
    final String sageVcf = String.format("%s/%s.sage.somatic.vcf.gz", VmDirectories.OUTPUT, sampleId);
    // run Sage
    final StringJoiner sageArgs = new StringJoiner(" ");
    sageArgs.add(String.format("-tumor %s", sampleId));
    sageArgs.add(String.format("-tumor_bam %s/%s", VmDirectories.INPUT, tumorCramFile));
    sageArgs.add(String.format("-reference %s", referenceId));
    sageArgs.add(String.format("-reference_bam %s/%s", VmDirectories.INPUT, refCramFile));
    sageArgs.add(String.format("-hotspots %s", resourceFiles.sageSomaticHotspots()));
    sageArgs.add(String.format("-panel_bed %s", resourceFiles.sageSomaticCodingPanel()));
    sageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
    sageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    sageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
    sageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    sageArgs.add(String.format("-out %s", sageVcf));
    sageArgs.add(String.format("-perf_warn_time 50"));
    // sageArgs.add(String.format("-log_debug"));
    sageArgs.add(String.format("-threads %s", Bash.allCpus()));
    startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, sageArgs.toString()));
    // annotate with Pave - PON and gene impacts
    final StringJoiner paveArgs = new StringJoiner(" ");
    String ponFilters = "HOTSPOT:5:5;PANEL:2:5;UNKNOWN:2:0";
    final String paveVcf = String.format("%s/%s.sage.somatic.pon.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
    paveArgs.add(String.format("-sample %s", sampleId));
    // ponFilterVcf from BCF Tools
    paveArgs.add(String.format("-vcf_file %s", sageVcf));
    paveArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    paveArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
    paveArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
    paveArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    paveArgs.add(String.format("-pon_file %s/%s", VmDirectories.INPUT, ponFile));
    paveArgs.add(String.format("-pon_filters \"%s\"", ponFilters));
    paveArgs.add(String.format("-output_vcf_file %s", paveVcf));
    String paveJar = String.format("%s/%s", VmDirectories.TOOLS, PAVE_JAR);
    startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveArgs.toString()));
    // upload output
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) StringJoiner(java.util.StringJoiner) RemoteLocationsApi(com.hartwig.batch.api.RemoteLocationsApi)

Aggregations

RemoteLocationsApi (com.hartwig.batch.api.RemoteLocationsApi)11 InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)8 ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)8 OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)7 StringJoiner (java.util.StringJoiner)6 LocalLocations (com.hartwig.batch.api.LocalLocations)3 InputDownload (com.hartwig.pipeline.execution.vm.InputDownload)2 SampleLocationData (com.hartwig.batch.utils.SampleLocationData)1 BwaCommand (com.hartwig.pipeline.calling.command.BwaCommand)1 SamtoolsCommand (com.hartwig.pipeline.calling.command.SamtoolsCommand)1 Driver (com.hartwig.pipeline.calling.structural.gridss.stage.Driver)1 GridssAnnotation (com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)1 OutputFile (com.hartwig.pipeline.execution.vm.OutputFile)1 ExportPathCommand (com.hartwig.pipeline.execution.vm.unix.ExportPathCommand)1 SubStageInputOutput (com.hartwig.pipeline.stages.SubStageInputOutput)1 GoogleStorageLocation (com.hartwig.pipeline.storage.GoogleStorageLocation)1 IOException (java.io.IOException)1 ParseException (org.apache.commons.cli.ParseException)1