use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class RnaIsofoxUnmapped method execute.
@Override
public VirtualMachineJobDefinition execute(InputBundle inputs, RuntimeBucket bucket, BashStartupScript startupScript, RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String batchInputs = descriptor.inputValue();
final String[] batchItems = batchInputs.split(",");
if (batchItems.length < 2) {
System.out.print(String.format("invalid input arguments(%s) - expected SampleId,ReadLength", batchInputs));
return null;
}
final String sampleId = batchItems[COL_SAMPLE_ID];
final RefGenomeVersion refGenomeVersion = V37;
final ResourceFiles resourceFiles = buildResourceFiles(refGenomeVersion);
final String samplesDir = String.format("%s/%s", getRnaCohortDirectory(refGenomeVersion), "samples");
// copy down BAM and index file for this sample
final String bamFile = String.format("%s%s", sampleId, RNA_BAM_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamFile, VmDirectories.INPUT));
final String bamIndexFile = String.format("%s%s", sampleId, RNA_BAM_INDEX_FILE_ID);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", samplesDir, sampleId, bamIndexFile, VmDirectories.INPUT));
// copy down the executable
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s %s", ISOFOX_LOCATION, ISOFOX_JAR, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("cd %s", VmDirectories.OUTPUT));
// run Isofox
StringJoiner isofoxArgs = new StringJoiner(" ");
isofoxArgs.add(String.format("-sample %s", sampleId));
isofoxArgs.add(String.format("-functions UNMAPPED_READS"));
isofoxArgs.add(String.format("-output_dir %s/", VmDirectories.OUTPUT));
isofoxArgs.add(String.format("-bam_file %s/%s", VmDirectories.INPUT, bamFile));
isofoxArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
isofoxArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
final String threadCount = Bash.allCpus();
isofoxArgs.add(String.format("-threads %s", threadCount));
startupScript.addCommand(() -> format("java -jar %s/%s %s", VmDirectories.TOOLS, ISOFOX_JAR, isofoxArgs.toString()));
// upload the results
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(bucket.name(), "isofox"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("rna-isofox").startupCommand(startupScript).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(MAX_EXPECTED_BAM_SIZE_GB).build();
}
use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class AmberRerunTumorOnly method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final InputFileDescriptor remoteTumorFile = inputs.get("tumor_cram");
final InputFileDescriptor remoteTumorIndex = remoteTumorFile.index();
final String localTumorFile = localFilename(remoteTumorFile);
// Download tumor
commands.addCommand(() -> remoteTumorFile.toCommandForm(localTumorFile));
commands.addCommand(() -> remoteTumorIndex.toCommandForm(localFilename(remoteTumorIndex)));
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
commands.addCommand(() -> AmberCommandBuilder.newBuilder(resourceFiles).tumor(tumorSampleName, localTumorFile).build().asBash());
// Store output
final GoogleStorageLocation archiveStorageLocation = amberArchiveDirectory(set);
commands.addCommand(new CopyLogToOutput(executionFlags.log(), "run.log"));
commands.addCommand(new OutputUpload(archiveStorageLocation));
return VirtualMachineJobDefinition.amber(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class CobaltMigration method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final String set = inputs.get("set").inputValue();
final String tumorSampleName = inputs.get("tumor_sample").inputValue();
final String referenceSampleName = inputs.get("ref_sample").inputValue();
final GoogleStorageLocation remoteInputDirectory = cobaltArchiveDirectoryInput(set);
// Download old files
commands.addCommand(() -> copyInputCommand(remoteInputDirectory));
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
commands.addCommand(() -> new CobaltMigrationCommand(resourceFiles, referenceSampleName, tumorSampleName).asBash());
// Store output
final GoogleStorageLocation archiveStorageLocation = cobaltArchiveDirectoryOutput(set);
commands.addCommand(new CopyLogToOutput(executionFlags.log(), "run.log"));
commands.addCommand(new OutputUpload(archiveStorageLocation));
return VirtualMachineJobDefinition.cobalt(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class BwaAligner method run.
public AlignmentOutput run(final SingleSampleRunMetadata metadata) throws Exception {
StageTrace trace = new StageTrace(NAMESPACE, metadata.sampleName(), StageTrace.ExecutorType.COMPUTE_ENGINE).start();
RuntimeBucket rootBucket = RuntimeBucket.from(storage, NAMESPACE, metadata, arguments, labels);
Sample sample = sampleSource.sample(metadata);
if (sample.bam().isPresent()) {
String noPrefix = sample.bam().orElseThrow().replace("gs://", "");
int firstSlash = noPrefix.indexOf("/");
String bucket = noPrefix.substring(0, firstSlash);
String path = noPrefix.substring(firstSlash + 1);
return AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.PROVIDED).maybeAlignments(GoogleStorageLocation.of(bucket, path)).build();
}
final ResourceFiles resourceFiles = buildResourceFiles(arguments);
sampleUpload.run(sample, rootBucket);
List<Future<PipelineStatus>> futures = new ArrayList<>();
List<GoogleStorageLocation> perLaneBams = new ArrayList<>();
List<ReportComponent> laneLogComponents = new ArrayList<>();
List<GoogleStorageLocation> laneFailedLogs = new ArrayList<>();
for (Lane lane : sample.lanes()) {
RuntimeBucket laneBucket = RuntimeBucket.from(storage, laneNamespace(lane), metadata, arguments, labels);
BashStartupScript bash = BashStartupScript.of(laneBucket.name());
InputDownload first = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.firstOfPairPath())));
InputDownload second = new InputDownload(GoogleStorageLocation.of(rootBucket.name(), fastQFileName(sample.name(), lane.secondOfPairPath())));
bash.addCommand(first).addCommand(second);
bash.addCommands(OverrideReferenceGenomeCommand.overrides(arguments));
SubStageInputOutput alignment = new LaneAlignment(arguments.sbpApiRunId().isPresent(), resourceFiles.refGenomeFile(), first.getLocalTargetPath(), second.getLocalTargetPath(), metadata.sampleName(), lane).apply(SubStageInputOutput.empty(metadata.sampleName()));
perLaneBams.add(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path(alignment.outputFile().fileName())));
bash.addCommands(alignment.bash()).addCommand(new OutputUpload(GoogleStorageLocation.of(laneBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
futures.add(executorService.submit(() -> runWithRetries(metadata, laneBucket, VirtualMachineJobDefinition.alignment(laneId(lane).toLowerCase(), bash, resultsDirectory))));
laneLogComponents.add(new RunLogComponent(laneBucket, laneNamespace(lane), Folder.from(metadata), resultsDirectory));
laneFailedLogs.add(GoogleStorageLocation.of(laneBucket.name(), RunLogComponent.LOG_FILE));
}
AlignmentOutput output;
if (lanesSuccessfullyComplete(futures)) {
List<InputDownload> laneBams = perLaneBams.stream().map(InputDownload::new).collect(Collectors.toList());
BashStartupScript mergeMarkdupsBash = BashStartupScript.of(rootBucket.name());
laneBams.forEach(mergeMarkdupsBash::addCommand);
SubStageInputOutput merged = new MergeMarkDups(laneBams.stream().map(InputDownload::getLocalTargetPath).filter(path -> path.endsWith("bam")).collect(Collectors.toList())).apply(SubStageInputOutput.empty(metadata.sampleName()));
mergeMarkdupsBash.addCommands(merged.bash());
mergeMarkdupsBash.addCommand(new OutputUpload(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path()), RuntimeFiles.typical()));
PipelineStatus status = runWithRetries(metadata, rootBucket, VirtualMachineJobDefinition.mergeMarkdups(mergeMarkdupsBash, resultsDirectory));
ImmutableAlignmentOutput.Builder outputBuilder = AlignmentOutput.builder().sample(metadata.sampleName()).status(status).maybeAlignments(GoogleStorageLocation.of(rootBucket.name(), resultsDirectory.path(merged.outputFile().fileName()))).addAllReportComponents(laneLogComponents).addAllFailedLogLocations(laneFailedLogs).addFailedLogLocations(GoogleStorageLocation.of(rootBucket.name(), RunLogComponent.LOG_FILE)).addReportComponents(new RunLogComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), resultsDirectory));
if (!arguments.outputCram()) {
outputBuilder.addReportComponents(new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bam(metadata.sampleName()), bam(metadata.sampleName()), resultsDirectory), new SingleFileComponent(rootBucket, Aligner.NAMESPACE, Folder.from(metadata), bai(bam(metadata.sampleName())), bai(bam(metadata.sampleName())), resultsDirectory)).addDatatypes(new AddDatatype(DataType.ALIGNED_READS, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bam(metadata.sampleName()))), new AddDatatype(DataType.ALIGNED_READS_INDEX, metadata.barcode(), new ArchivePath(Folder.from(metadata), BwaAligner.NAMESPACE, bai(metadata.sampleName()))));
}
output = outputBuilder.build();
} else {
output = AlignmentOutput.builder().sample(metadata.sampleName()).status(PipelineStatus.FAILED).build();
}
trace.stop();
executorService.shutdown();
return output;
}
use of com.hartwig.pipeline.resource.ResourceFiles in project pipeline5 by hartwigmedical.
the class SingleSamplePipeline method run.
public PipelineState run(final SingleSampleRunMetadata metadata) throws Exception {
LOGGER.info("Pipeline5 single sample pipeline starting for sample name [{}] with id [{}] {}", metadata.sampleName(), metadata.barcode(), arguments.runId().map(runId -> String.format("using run tag [%s]", runId)).orElse(""));
PipelineState state = new PipelineState();
final ResourceFiles resourceFiles = buildResourceFiles(arguments);
AlignmentOutput alignmentOutput = convertCramsIfNecessary(arguments, metadata, state);
eventListener.alignmentComplete(alignmentOutput);
if (state.shouldProceed()) {
Future<BamMetricsOutput> bamMetricsFuture = executorService.submit(() -> stageRunner.run(metadata, new BamMetrics(resourceFiles, alignmentOutput, persistedDataset, arguments)));
Future<SnpGenotypeOutput> unifiedGenotyperFuture = executorService.submit(() -> stageRunner.run(metadata, new SnpGenotype(resourceFiles, alignmentOutput)));
Future<FlagstatOutput> flagstatOutputFuture = executorService.submit(() -> stageRunner.run(metadata, new Flagstat(alignmentOutput, persistedDataset)));
Future<CramOutput> cramOutputFuture = executorService.submit(() -> stageRunner.run(metadata, new CramConversion(alignmentOutput, metadata.type(), resourceFiles)));
if (metadata.type().equals(SingleSampleRunMetadata.SampleType.REFERENCE)) {
Future<GermlineCallerOutput> germlineCallerFuture = executorService.submit(() -> stageRunner.run(metadata, new GermlineCaller(alignmentOutput, resourceFiles, persistedDataset)));
GermlineCallerOutput germlineCallerOutput = futurePayload(germlineCallerFuture);
germlineCallerOutputQueue.put(germlineCallerOutput);
report.add(state.add(germlineCallerOutput));
}
BamMetricsOutput bamMetricsOutput = futurePayload(bamMetricsFuture);
metricsOutputQueue.put(bamMetricsOutput);
FlagstatOutput flagstatOutput = futurePayload(flagstatOutputFuture);
flagstatOutputQueue.put(flagstatOutput);
report.add(state.add(bamMetricsOutput));
report.add(state.add(futurePayload(unifiedGenotyperFuture)));
report.add(state.add(flagstatOutput));
report.add(state.add(futurePayload(cramOutputFuture)));
report.compose(metadata, "SingleSample");
eventListener.complete(state);
}
return state;
}
Aggregations