use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class SageGermlineOld method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
// Inputs
final InputFileDescriptor biopsy = inputs.get("biopsy");
final LocalLocations localInput = new LocalLocations(new RemoteLocationsApi(biopsy));
final String tumorSampleName = localInput.getTumor();
final String referenceSampleName = localInput.getReference();
final String tumorAlignment = localInput.getTumorAlignment();
final String referenceAlignment = localInput.getReferenceAlignment();
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
// Download Inputs
commands.addCommands(localInput.generateDownloadCommands());
return VirtualMachineJobDefinition.sageGermlineCalling(commands, ResultsDirectory.defaultDirectory());
}
use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class GcpSampleDataExtractor method extractSampleLocations.
private void extractSampleLocations(final String sampleId) {
try {
final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
SampleLocationData sampleLocations = SampleLocationData.fromRemoteLocationsApi(sampleId, locations);
mWriter.write(sampleLocations.csvData());
mWriter.newLine();
} catch (Exception e) {
LOGGER.severe(String.format("failed to write to sample(%s) GCP locations data: %s", sampleId, e.toString()));
}
}
use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class GripssGermline method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String sampleId = descriptor.inputValue();
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
final LocalLocations inputFileFactory = new LocalLocations(new RemoteLocationsApi(descriptor.billedProject(), sampleId));
final String referenceId = inputFileFactory.getReference();
final String inputVcf = inputFileFactory.getStructuralVariantsGridss();
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, GRIPSS_DIR, GRIPSS_JAR, VmDirectories.TOOLS));
startupScript.addCommands(inputFileFactory.generateDownloadCommands());
// run GRIPSS
final String outputVcf1 = String.format("%s/%s.gripss.vcf.gz", VmDirectories.OUTPUT, referenceId);
final StringJoiner gripssArgs = new StringJoiner(" ");
gripssArgs.add(String.format("-tumor %s", referenceId));
gripssArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
gripssArgs.add(String.format("-breakpoint_hotspot %s", resourceFiles.knownFusionPairBedpe()));
gripssArgs.add(String.format("-breakend_pon %s", resourceFiles.gridssBreakendPon()));
gripssArgs.add(String.format("-breakpoint_pon %s", resourceFiles.gridssBreakpointPon()));
gripssArgs.add(String.format("-pon_distance %d", 4));
gripssArgs.add(String.format("-min_qual_break_end %d", 400));
gripssArgs.add(String.format("-min_qual_rescue_mobile_element_insertion %d", 400));
gripssArgs.add(String.format("-min_qual_break_point %d", 250));
gripssArgs.add(String.format("-input_vcf %s", inputVcf));
gripssArgs.add(String.format("-output_vcf %s", outputVcf1));
startupScript.addCommand(() -> format("java -Xmx%s -cp %s/%s com.hartwig.hmftools.gripsskt.GripssApplicationKt %s", MAX_HEAP, VmDirectories.TOOLS, GRIPSS_JAR, gripssArgs.toString()));
final String outputVcf2 = String.format("%s/%s.gripss.filtered.vcf.gz", VmDirectories.OUTPUT, referenceId);
final StringJoiner gripss2Args = new StringJoiner(" ");
gripss2Args.add(String.format("-input_vcf %s", outputVcf1));
gripss2Args.add(String.format("-output_vcf %s", outputVcf2));
startupScript.addCommand(() -> format("java -Xmx%s -cp %s/%s com.hartwig.hmftools.gripsskt.GripssHardFilterApplicationKt %s", MAX_HEAP, VmDirectories.TOOLS, GRIPSS_JAR, gripss2Args.toString()));
// upload output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "gripss"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("gripss").startupCommand(startupScript).performanceProfile(custom(8, 30)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class TeloBatch method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript commands, final RuntimeFiles executionFlags) {
final String sampleId = inputs.get("sampleId").inputValue();
Optional<String> specificChromosome = Optional.empty();
try {
specificChromosome = Optional.of(inputs.get("specificChromosome").inputValue());
} catch (IllegalArgumentException ignored) {
}
final InputFileDescriptor runData = inputs.get();
final RemoteLocationsApi locationsApi = new RemoteLocationsApi(runData.billedProject(), sampleId);
// download the telo.jar
// InputDownload teloJarDownload = new InputDownload(GoogleStorageLocation.of(teloToolsBucket, teloToolsPath + "/telo.jar"), VmDirectories.TOOLS);
// InputDownload teloJarDownload = downloadExperimentalVersion();
// commands.addCommand(teloJarDownload);
commands.addCommand(downloadExperimentalVersion());
/*() -> format("gsutil -u hmf-crunch cp gs://%s/%s/%s %s",
COMMON_RESOURCES, TELO_DIR, TELO_JAR, VmDirectories.TOOLS));*/
// ref genome
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
InputDownload tumorBamDownload = new InputDownload(locationsApi.getTumorAlignment());
InputDownload tumorBamIndexDownload = new InputDownload(locationsApi.getTumorAlignmentIndex());
// download the tumour and reference bam / index files
commands.addCommand(tumorBamDownload);
commands.addCommand(tumorBamIndexDownload);
commands.addCommand(makeTeloRunCommand(sampleId, "somatic", tumorBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
// delete the tumor bam file to save disk space
commands.addCommand(() -> format("rm -f %s", tumorBamDownload.getLocalTargetPath()));
commands.addCommand(() -> format("rm -f %s", tumorBamIndexDownload.getLocalTargetPath()));
InputDownload referenceBamDownload = new InputDownload(locationsApi.getReferenceAlignment());
InputDownload referenceBamIndexDownload = new InputDownload(locationsApi.getReferenceAlignmentIndex());
commands.addCommand(referenceBamDownload);
commands.addCommand(referenceBamIndexDownload);
commands.addCommand(makeTeloRunCommand(sampleId, "germline", referenceBamDownload.getLocalTargetPath(), resourceFiles.refGenomeFile(), specificChromosome));
// JavaJarCommand jarCommand = new JavaJarCommand("telo", TELO_VERSION, "telo.jar", "16G", teloArgs);
// commands.addCommand(jarCommand);
// Store output
commands.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), sampleId), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("telo").startupCommand(commands).namespacedResults(ResultsDirectory.defaultDirectory()).workingDiskSpaceGb(500).performanceProfile(VirtualMachinePerformanceProfile.custom(16, MEMORY_GB)).build();
}
use of com.hartwig.batch.api.RemoteLocationsApi in project pipeline5 by hartwigmedical.
the class SageRerun method execute.
@Override
public VirtualMachineJobDefinition execute(final InputBundle inputs, final RuntimeBucket runtimeBucket, final BashStartupScript startupScript, final RuntimeFiles executionFlags) {
InputFileDescriptor descriptor = inputs.get();
final String sampleId = descriptor.inputValue();
final ResourceFiles resourceFiles = ResourceFilesFactory.buildResourceFiles(RefGenomeVersion.V37);
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, SAGE_DIR, SAGE_JAR, VmDirectories.TOOLS));
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_TOOLS_BUCKET, PAVE_DIR, PAVE_JAR, VmDirectories.TOOLS));
String ponFile = "SageGermlinePon.1000x.37.tsv.gz";
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp %s/%s/%s %s", BATCH_RESOURCE_BUCKET, SAGE_DIR, ponFile, VmDirectories.INPUT));
// download tumor and ref CRAM
final RemoteLocationsApi locations = new RemoteLocationsApi("hmf-crunch", sampleId);
String[] tumorCramData = getCramFileData(locations.getTumorAlignment());
String tumorCramFile = tumorCramData[CRAM_FILENAME];
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", tumorCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
String referenceId = locations.getReference();
String[] refCramData = getCramFileData(locations.getReferenceAlignment());
String refCramFile = refCramData[CRAM_FILENAME];
startupScript.addCommand(() -> format("gsutil -u hmf-crunch cp gs://%s* %s", refCramData[CRAM_FULL_PATH], VmDirectories.INPUT));
final String sageVcf = String.format("%s/%s.sage.somatic.vcf.gz", VmDirectories.OUTPUT, sampleId);
// run Sage
final StringJoiner sageArgs = new StringJoiner(" ");
sageArgs.add(String.format("-tumor %s", sampleId));
sageArgs.add(String.format("-tumor_bam %s/%s", VmDirectories.INPUT, tumorCramFile));
sageArgs.add(String.format("-reference %s", referenceId));
sageArgs.add(String.format("-reference_bam %s/%s", VmDirectories.INPUT, refCramFile));
sageArgs.add(String.format("-hotspots %s", resourceFiles.sageSomaticHotspots()));
sageArgs.add(String.format("-panel_bed %s", resourceFiles.sageSomaticCodingPanel()));
sageArgs.add(String.format("-high_confidence_bed %s", resourceFiles.giabHighConfidenceBed()));
sageArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
sageArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
sageArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
sageArgs.add(String.format("-out %s", sageVcf));
sageArgs.add(String.format("-perf_warn_time 50"));
// sageArgs.add(String.format("-log_debug"));
sageArgs.add(String.format("-threads %s", Bash.allCpus()));
startupScript.addCommand(() -> format("java -Xmx48G -jar %s/%s %s", VmDirectories.TOOLS, SAGE_JAR, sageArgs.toString()));
// annotate with Pave - PON and gene impacts
final StringJoiner paveArgs = new StringJoiner(" ");
String ponFilters = "HOTSPOT:5:5;PANEL:2:5;UNKNOWN:2:0";
final String paveVcf = String.format("%s/%s.sage.somatic.pon.pave.vcf.gz", VmDirectories.OUTPUT, sampleId);
paveArgs.add(String.format("-sample %s", sampleId));
// ponFilterVcf from BCF Tools
paveArgs.add(String.format("-vcf_file %s", sageVcf));
paveArgs.add(String.format("-ref_genome %s", resourceFiles.refGenomeFile()));
paveArgs.add(String.format("-ref_genome_version %s", resourceFiles.version().toString()));
paveArgs.add(String.format("-driver_gene_panel %s", resourceFiles.driverGenePanel()));
paveArgs.add(String.format("-ensembl_data_dir %s", resourceFiles.ensemblDataCache()));
paveArgs.add(String.format("-pon_file %s/%s", VmDirectories.INPUT, ponFile));
paveArgs.add(String.format("-pon_filters \"%s\"", ponFilters));
paveArgs.add(String.format("-output_vcf_file %s", paveVcf));
String paveJar = String.format("%s/%s", VmDirectories.TOOLS, PAVE_JAR);
startupScript.addCommand(() -> format("java -jar %s %s", paveJar, paveArgs.toString()));
// upload output
startupScript.addCommand(new OutputUpload(GoogleStorageLocation.of(runtimeBucket.name(), "sage"), executionFlags));
return ImmutableVirtualMachineJobDefinition.builder().name("sage").startupCommand(startupScript).performanceProfile(custom(24, 64)).namespacedResults(ResultsDirectory.defaultDirectory()).build();
}
Aggregations