Search in sources :

Example 11 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class RnaIsofoxUnmapped method execute.

@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
    InputFileDescriptor descriptor = inputs.get();
    final String batchInputs = descriptor.inputValue();
    final String[] batchItems = batchInputs.split(",");
    if (batchItems.length < 2) {
        System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
        return null;
    }
    final String sampleId = batchItems[COL_SAMPLE_ID];
    final RefGenomeVersion refGenomeVersion = V37;
    final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
    final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
    // copy down BAM and index file for this sample
    final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
    final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
    // copy down the executable
    startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
    startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
    // run Isofox
    StringJoiner isofoxArgs = new StringJoiner(" ");
    isofoxArgs.add(String.format("-sample %s", sampleId));
    isofoxArgs.add(String.format("-functions UNMAPPED_READS"));
    isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
    isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
    isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
    isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
    final String threadCount = Bash.allCpus();
    isofoxArgs.add(String.format("-threads %s", threadCount));
    startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
    // upload the results
    startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
    return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
Also used : ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) RefGenomeVersion(com.hartwig.pipeline.resource.RefGenomeVersion) StringJoiner(java.util.StringJoiner)

Example 12 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class AmberRerunTumorOnly method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final String set = inputs.get("set").inputValue();
    final String tumorSampleName = inputs.get("tumor_sample").inputValue();
    final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
    final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
    final String localTumorFile = localFilename(remoteTumorFile);
    // Download tumor
    commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
    commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    commands.addCommand(() -> AmberCommandBuilder.newBuilder(resourceFiles).tumor(tumorSampleName, localTumorFile).build().asBash());
    // Store output
    final GoogleStorageLocation archiveStorageLocation = amberArchiveDirectory(set);
    commands.addCommand(new CopyLogToOutput(executionFlags.log(), "run.log"));
    commands.addCommand(new OutputUpload(archiveStorageLocation));
    return VirtualMachineJobDefinition.amber(commands, ResultsDirectory.defaultDirectory());
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) InputFileDescriptor(com.hartwig.batch.input.InputFileDescriptor) CopyLogToOutput(com.hartwig.pipeline.execution.vm.CopyLogToOutput) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation)

Example 13 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class CobaltMigration method execute.

@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
    // Inputs
    final String set = inputs.get("set").inputValue();
    final String tumorSampleName = inputs.get("tumor_sample").inputValue();
    final String referenceSampleName = inputs.get("ref_sample").inputValue();
    final GoogleStorageLocation remoteInputDirectory = cobaltArchiveDirectoryInput(set);
    // Download old files
    commands.addCommand(() -> copyInputCommand(remoteInputDirectory));
    final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
    commands.addCommand(() -> new CobaltMigrationCommand(resourceFiles, referenceSampleName, tumorSampleName).asBash());
    // Store output
    final GoogleStorageLocation archiveStorageLocation = cobaltArchiveDirectoryOutput(set);
    commands.addCommand(new CopyLogToOutput(executionFlags.log(), "run.log"));
    commands.addCommand(new OutputUpload(archiveStorageLocation));
    return VirtualMachineJobDefinition.cobalt(commands, ResultsDirectory.defaultDirectory());
}
Also used : ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) CobaltMigrationCommand(com.hartwig.pipeline.tertiary.cobalt.CobaltMigrationCommand) CopyLogToOutput(com.hartwig.pipeline.execution.vm.CopyLogToOutput) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation)

Example 14 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class BwaAligner method run.

public AlignmentOutput run(final SingleSampleRunMetadata metadata) throws Exception {
    StageTrace trace = new StageTrace(NAMESPACE, metadata.sampleName(), StageTrace.ExecutorType.COMPUTE_ENGINE).start();
    RuntimeBucket rootBucket = RuntimeBucket.from(storage, NAMESPACE, metadata, arguments, labels);
    Sample sample = sampleSource.sample(metadata);
    if (sample.bam().isPresent()) {
        String noPrefix = sample.bam().orElseThrow().replace("gs://", "");
        int firstSlash = noPrefix.indexOf("/");
        String bucket = noPrefix.substring(0, firstSlash);
        String path = noPrefix.substring(firstSlash + 1);
        return AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.PROVIDED).maybeAlignments(GoogleStorageLocation.of(bucket, path)).build();
    }
    final ResourceFiles resourceFiles = buildResourceFiles(arguments);
    sampleUpload.run(sample, rootBucket);
    List<Future<PipelineStatus>> futures = new ArrayList<>();
    List<GoogleStorageLocation> perLaneBams = new ArrayList<>();
    List<ReportComponent> laneLogComponents = new ArrayList<>();
    List<GoogleStorageLocation> laneFailedLogs = new ArrayList<>();
    for (Lane lane : sample.lanes()) {
        RuntimeBucket laneBucket = RuntimeBucket.from(storage, laneNamespace(lane), metadata, arguments, labels);
        BashStartupScript bash = BashStartupScript.of(laneBucket.name());
        InputDownload first = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.firstOfPairPath())));
        InputDownload second = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.secondOfPairPath())));
        bash.addCommand(first).addCommand(second);
        bash.addCommands(OverrideReferenceGenomeCommand.overrides(arguments));
        SubStageInputOutput alignment = new LaneAlignment(arguments.sbpApiRunId().isPresent(), resourceFiles.refGenomeFile(), first.getLocalTargetPath(), second.getLocalTargetPath(), metadata.sampleName(), lane).apply(SubStageInputOutput.empty(metadata.sampleName()));
        perLaneBams.add(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path(alignment.outputFile().fileName())));
        bash.addCommands(alignment.bash()).addCommand(new OutputUpload(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
        futures.add(executorService.submit(() -> runWithRetries(metadata, laneBucket, VirtualMachineJobDefinition.alignment(laneId(lane).toLowerCase(), bash, resultsDirectory))));
        laneLogComponents.add(new RunLogComponent(laneBucket, laneNamespace(lane), Folder.from(metadata), resultsDirectory));
        laneFailedLogs.add(GoogleStorageLocation.of(laneBucket.name(), RunLogComponent.LOG_FILE));
    }
    AlignmentOutput output;
    if (lanesSuccessfullyComplete(futures)) {
        List<InputDownload> laneBams = perLaneBams.stream().map(InputDownload::new).collect(Collectors.toList());
        BashStartupScript mergeMarkdupsBash = BashStartupScript.of(rootBucket.name());
        laneBams.forEach(mergeMarkdupsBash::addCommand);
        SubStageInputOutput merged = new MergeMarkDups(laneBams.stream().map(InputDownload::getLocalTargetPath).filter(path -> path.endsWith("bam")).collect(Collectors.toList())).apply(SubStageInputOutput.empty(metadata.sampleName()));
        mergeMarkdupsBash.addCommands(merged.bash());
        mergeMarkdupsBash.addCommand(new OutputUpload(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
        PipelineStatus status = runWithRetries(metadata, rootBucket, VirtualMachineJobDefinition.mergeMarkdups(mergeMarkdupsBash, resultsDirectory));
        ImmutableAlignmentOutput.Builder outputBuilder = AlignmentOutput.builder().sample(metadata.sampleName()).status(status).maybeAlignments(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path(merged.outputFile().fileName()))).addAllReportComponents(laneLogComponents).addAllFailedLogLocations(laneFailedLogs).addFailedLogLocations(GoogleStorageLocation.of(rootBucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new RunLogComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), resultsDirectory));
        if (!arguments.outputCram()) {
            outputBuilder.addReportComponents(new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bam(metadata.sampleName()), bam(metadata.sampleName()), resultsDirectory), new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bai(bam(metadata.sampleName())), bai(bam(metadata.sampleName())), resultsDirectory)).addDatatypes(new AddDatatype(DataType.ALIGNED_READS, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bam(metadata.sampleName()))), new AddDatatype(DataType.ALIGNED_READS_INDEX, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bai(metadata.sampleName()))));
        }
        output = outputBuilder.build();
    } else {
        output = AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.FAILED).build();
    }
    trace.stop();
    executorService.shutdown();
    return output;
}
Also used : Arguments(com.hartwig.pipeline.Arguments) StageTrace(com.hartwig.pipeline.trace.StageTrace) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) Aligner(com.hartwig.pipeline.alignment.Aligner) InputDownload(com.hartwig.pipeline.execution.vm.InputDownload) ArrayList(java.util.ArrayList) VirtualMachineJobDefinition(com.hartwig.pipeline.execution.vm.VirtualMachineJobDefinition) Future(java.util.concurrent.Future) RuntimeBucket(com.hartwig.pipeline.storage.RuntimeBucket) PipelineStatus(com.hartwig.pipeline.execution.PipelineStatus) ExecutorService(java.util.concurrent.ExecutorService) BashStartupScript(com.hartwig.pipeline.execution.vm.BashStartupScript) DataType(com.hartwig.pipeline.datatypes.DataType) FileTypes.bai(com.hartwig.pipeline.datatypes.FileTypes.bai) ImmutableAlignmentOutput(com.hartwig.pipeline.alignment.ImmutableAlignmentOutput) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation) Lane(com.hartwig.patient.Lane) ArchivePath(com.hartwig.pipeline.metadata.ArchivePath) SampleUpload(com.hartwig.pipeline.storage.SampleUpload) Folder(com.hartwig.pipeline.report.Folder) ResultsDirectory(com.hartwig.pipeline.ResultsDirectory) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) DefaultBackoffPolicy(com.hartwig.pipeline.failsafe.DefaultBackoffPolicy) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) File(java.io.File) SingleFileComponent(com.hartwig.pipeline.report.SingleFileComponent) Failsafe(net.jodah.failsafe.Failsafe) ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ExecutionException(java.util.concurrent.ExecutionException) List(java.util.List) Sample(com.hartwig.patient.Sample) AddDatatype(com.hartwig.pipeline.metadata.AddDatatype) AlignmentOutput(com.hartwig.pipeline.alignment.AlignmentOutput) OverrideReferenceGenomeCommand(com.hartwig.pipeline.resource.OverrideReferenceGenomeCommand) RuntimeFiles(com.hartwig.pipeline.execution.vm.RuntimeFiles) SingleSampleRunMetadata(com.hartwig.pipeline.metadata.SingleSampleRunMetadata) Storage(com.google.cloud.storage.Storage) Labels(com.hartwig.pipeline.labels.Labels) FileTypes.bam(com.hartwig.pipeline.datatypes.FileTypes.bam) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) ComputeEngine(com.hartwig.pipeline.execution.vm.ComputeEngine) ReportComponent(com.hartwig.pipeline.report.ReportComponent) RunLogComponent(com.hartwig.pipeline.report.RunLogComponent) SampleSource(com.hartwig.pipeline.alignment.sample.SampleSource) RunLogComponent(com.hartwig.pipeline.report.RunLogComponent) PipelineStatus(com.hartwig.pipeline.execution.PipelineStatus) ArrayList(java.util.ArrayList) ReportComponent(com.hartwig.pipeline.report.ReportComponent) AddDatatype(com.hartwig.pipeline.metadata.AddDatatype) ArchivePath(com.hartwig.pipeline.metadata.ArchivePath) BashStartupScript(com.hartwig.pipeline.execution.vm.BashStartupScript) InputDownload(com.hartwig.pipeline.execution.vm.InputDownload) SingleFileComponent(com.hartwig.pipeline.report.SingleFileComponent) Sample(com.hartwig.patient.Sample) Lane(com.hartwig.patient.Lane) SubStageInputOutput(com.hartwig.pipeline.stages.SubStageInputOutput) ImmutableAlignmentOutput(com.hartwig.pipeline.alignment.ImmutableAlignmentOutput) StageTrace(com.hartwig.pipeline.trace.StageTrace) ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) OutputUpload(com.hartwig.pipeline.execution.vm.OutputUpload) ImmutableAlignmentOutput(com.hartwig.pipeline.alignment.ImmutableAlignmentOutput) AlignmentOutput(com.hartwig.pipeline.alignment.AlignmentOutput) RuntimeBucket(com.hartwig.pipeline.storage.RuntimeBucket) Future(java.util.concurrent.Future) GoogleStorageLocation(com.hartwig.pipeline.storage.GoogleStorageLocation)

Example 15 with ResourceFiles

use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.

the class SingleSamplePipeline method run.

public PipelineState run(final SingleSampleRunMetadata metadata) throws Exception {
    LOGGER.info("Pipeline5 single sample pipeline starting for sample name [{}] with id [{}] {}", metadata.sampleName(), metadata.barcode(), arguments.runId().map(runId -> String.format("using run tag [%s]", runId)).orElse(""));
    PipelineState state = new PipelineState();
    final ResourceFiles resourceFiles = buildResourceFiles(arguments);
    AlignmentOutput alignmentOutput = convertCramsIfNecessary(arguments, metadata, state);
    eventListener.alignmentComplete(alignmentOutput);
    if (state.shouldProceed()) {
        Future<BamMetricsOutput> bamMetricsFuture = executorService.submit(() -> stageRunner.run(metadata, new BamMetrics(resourceFiles, alignmentOutput, persistedDataset, arguments)));
        Future<SnpGenotypeOutput> unifiedGenotyperFuture = executorService.submit(() -> stageRunner.run(metadata, new SnpGenotype(resourceFiles, alignmentOutput)));
        Future<FlagstatOutput> flagstatOutputFuture = executorService.submit(() -> stageRunner.run(metadata, new Flagstat(alignmentOutput, persistedDataset)));
        Future<CramOutput> cramOutputFuture = executorService.submit(() -> stageRunner.run(metadata, new CramConversion(alignmentOutput, metadata.type(), resourceFiles)));
        if (metadata.type().equals(SingleSampleRunMetadata.SampleType.REFERENCE)) {
            Future<GermlineCallerOutput> germlineCallerFuture = executorService.submit(() -> stageRunner.run(metadata, new GermlineCaller(alignmentOutput, resourceFiles, persistedDataset)));
            GermlineCallerOutput germlineCallerOutput = futurePayload(germlineCallerFuture);
            germlineCallerOutputQueue.put(germlineCallerOutput);
            report.add(state.add(germlineCallerOutput));
        }
        BamMetricsOutput bamMetricsOutput = futurePayload(bamMetricsFuture);
        metricsOutputQueue.put(bamMetricsOutput);
        FlagstatOutput flagstatOutput = futurePayload(flagstatOutputFuture);
        flagstatOutputQueue.put(flagstatOutput);
        report.add(state.add(bamMetricsOutput));
        report.add(state.add(futurePayload(unifiedGenotyperFuture)));
        report.add(state.add(flagstatOutput));
        report.add(state.add(futurePayload(cramOutputFuture)));
        report.compose(metadata, "SingleSample");
        eventListener.complete(state);
    }
    return state;
}
Also used : GermlineCallerOutput(com.hartwig.pipeline.calling.germline.GermlineCallerOutput) GermlineCaller(com.hartwig.pipeline.calling.germline.GermlineCaller) SnpGenotypeOutput(com.hartwig.pipeline.snpgenotype.SnpGenotypeOutput) ResourceFilesFactory.buildResourceFiles(com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles) ResourceFiles(com.hartwig.pipeline.resource.ResourceFiles) AlignmentOutput(com.hartwig.pipeline.alignment.AlignmentOutput) Flagstat(com.hartwig.pipeline.flagstat.Flagstat) BamMetrics(com.hartwig.pipeline.metrics.BamMetrics) SnpGenotype(com.hartwig.pipeline.snpgenotype.SnpGenotype) FlagstatOutput(com.hartwig.pipeline.flagstat.FlagstatOutput) BamMetricsOutput(com.hartwig.pipeline.metrics.BamMetricsOutput) CramOutput(com.hartwig.pipeline.cram.CramOutput) CramConversion(com.hartwig.pipeline.cram.CramConversion)

Aggregations

ResourceFiles (com.hartwig.pipeline.resource.ResourceFiles)33 OutputUpload (com.hartwig.pipeline.execution.vm.OutputUpload)24 InputFileDescriptor (com.hartwig.batch.input.InputFileDescriptor)23 StringJoiner (java.util.StringJoiner)13 GoogleStorageLocation (com.hartwig.pipeline.storage.GoogleStorageLocation)10 RemoteLocationsApi (com.hartwig.batch.api.RemoteLocationsApi)8 ResourceFilesFactory.buildResourceFiles (com.hartwig.pipeline.resource.ResourceFilesFactory.buildResourceFiles)7 CopyLogToOutput (com.hartwig.pipeline.execution.vm.CopyLogToOutput)5 SubStageInputOutput (com.hartwig.pipeline.stages.SubStageInputOutput)5 LocalLocations (com.hartwig.batch.api.LocalLocations)4 InputDownload (com.hartwig.pipeline.execution.vm.InputDownload)4 RefGenomeVersion (com.hartwig.pipeline.resource.RefGenomeVersion)4 BwaCommand (com.hartwig.pipeline.calling.command.BwaCommand)3 SamtoolsCommand (com.hartwig.pipeline.calling.command.SamtoolsCommand)3 OutputFile (com.hartwig.pipeline.execution.vm.OutputFile)3 AlignmentOutput (com.hartwig.pipeline.alignment.AlignmentOutput)2 SageApplication (com.hartwig.pipeline.calling.sage.SageApplication)2 SageCommandBuilder (com.hartwig.pipeline.calling.sage.SageCommandBuilder)2 GridssAnnotation (com.hartwig.pipeline.calling.structural.gridss.stage.GridssAnnotation)2 PipelineStatus (com.hartwig.pipeline.execution.PipelineStatus)2