use of com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis in project hmftools by hartwigmedical.
the class PatientReporter method run.
@NotNull
public SequencedPatientReport run(@NotNull final String runDirectory, @Nullable final String comments) throws IOException {
final RunContext run = ProductionRunContextFactory.fromRunDirectory(runDirectory);
final GenomeAnalysis genomeAnalysis = analyseGenomeData(run.tumorSample(), runDirectory);
assert run.isSomaticRun() && run.tumorSample().equals(genomeAnalysis.sample());
final String tumorSample = genomeAnalysis.sample();
final VariantAnalysis variantAnalysis = genomeAnalysis.variantAnalysis();
final PurpleAnalysis purpleAnalysis = genomeAnalysis.purpleAnalysis();
final StructuralVariantAnalysis structuralVariantAnalysis = genomeAnalysis.structuralVariantAnalysis();
final List<GeneFusionData> reportableFusions = structuralVariantAnalysis.reportableFusions().stream().sorted(fusionComparator()).map(GeneFusionData::from).collect(Collectors.toList());
final List<GeneDisruptionData> reportableDisruptions = structuralVariantAnalysis.reportableDisruptions().stream().sorted(disruptionComparator(reporterData().panelGeneModel().transcriptMap())).map(GeneDisruptionData::from).collect(Collectors.toList());
final int passedVariantCount = variantAnalysis.passedVariants().size();
final int mutationalLoad = variantAnalysis.mutationalLoad();
final int consequentialVariantCount = variantAnalysis.consequentialVariants().size();
final int structuralVariantCount = structuralVariantAnalysis.annotations().size();
final String cancerType = PatientReporterHelper.extractCancerType(baseReporterData().patientsCancerTypes(), tumorSample);
final TumorLocationDoidMapping doidMapping = TumorLocationDoidMapping.fromResource("/tumor_location_doid_mapping.csv");
final List<Alteration> alterations = civicAnalyzer().run(variantAnalysis.findings(), purpleAnalysis.reportableGeneCopyNumbers(), reportableDisruptions, reportableFusions, reporterData().panelGeneModel(), doidMapping.doidsForTumorType(cancerType));
LOGGER.info(" Printing analysis results:");
LOGGER.info(" Number of passed variants : " + Integer.toString(passedVariantCount));
LOGGER.info(" Number of missense variants (mutational load) : " + Integer.toString(mutationalLoad));
LOGGER.info(" Number of consequential variants to report : " + Integer.toString(consequentialVariantCount));
LOGGER.info(" Determined copy number stats for " + Integer.toString(purpleAnalysis.genePanelSize()) + " genes which led to " + Integer.toString(purpleAnalysis.reportableGeneCopyNumbers().size()) + " copy numbers.");
LOGGER.info(" Number of unreported structural variants : " + Integer.toString(structuralVariantCount));
LOGGER.info(" Number of gene fusions to report : " + Integer.toString(reportableFusions.size()));
LOGGER.info(" Number of gene disruptions to report : " + Integer.toString(reportableDisruptions.size()));
LOGGER.info(" Number of CIViC alterations to report : " + alterations.size());
LOGGER.info(" Microsatellite analysis results: " + variantAnalysis.indelsPerMb() + " indels per MB");
final Lims lims = baseReporterData().limsModel();
final Double tumorPercentage = lims.tumorPercentageForSample(tumorSample);
final List<VariantReport> purpleEnrichedVariants = purpleAnalysis.enrichSomaticVariants(variantAnalysis.findings());
final String sampleRecipient = baseReporterData().centerModel().getAddresseeStringForSample(tumorSample);
final SampleReport sampleReport = ImmutableSampleReport.of(tumorSample, cancerType, tumorPercentage, lims.arrivalDateForSample(tumorSample), lims.arrivalDateForSample(run.refSample()), lims.labProceduresForSample(tumorSample), sampleRecipient);
return ImmutableSequencedPatientReport.of(sampleReport, purpleEnrichedVariants, mutationalLoad, variantAnalysis.indelsPerMb(), purpleAnalysis.reportableGeneCopyNumbers(), reportableDisruptions, reportableFusions, purpleAnalysis.purityString(), alterations, PatientReporterHelper.findCircosPlotPath(runDirectory, tumorSample), Optional.ofNullable(comments), baseReporterData().signaturePath());
}
use of com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis in project hmftools by hartwigmedical.
the class LoadStructuralVariants method main.
public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
final Options options = createBasicOptions();
final CommandLine cmd = createCommandLine(args, options);
boolean loadFromDB = cmd.hasOption(LOAD_FROM_DB);
final String tumorSample = cmd.getOptionValue(SAMPLE);
boolean runClustering = cmd.hasOption(CLUSTER_SVS);
boolean createFilteredPON = cmd.hasOption(WRITE_FILTERED_SVS);
boolean reannotateFromVCFs = cmd.hasOption(REANNOTATE_FROM_VCFS);
final DatabaseAccess dbAccess = cmd.hasOption(DB_URL) ? databaseAccess(cmd) : null;
if (cmd.hasOption(LOG_DEBUG)) {
Configurator.setRootLevel(Level.DEBUG);
}
if (createFilteredPON) {
LOGGER.info("reading VCF files including filtered SVs");
FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
filteredSvWriter.processVcfFiles();
LOGGER.info("reads complete");
return;
}
if (reannotateFromVCFs) {
LOGGER.info("reading VCF files to re-annotate");
// for now just re-read the VCFs and write out new annotations to file
// may later on turn into update SQL once clustering does the same
SvVCFAnnotator vcfAnnotator = new SvVCFAnnotator(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
vcfAnnotator.processVcfFiles();
return;
}
StructuralVariantClustering svClusterer = null;
if (runClustering) {
LOGGER.info("will run clustering logic");
SvClusteringConfig clusteringConfig = new SvClusteringConfig();
clusteringConfig.setOutputCsvPath(cmd.getOptionValue(DATA_OUTPUT_PATH));
clusteringConfig.setBaseDistance(Integer.parseInt(cmd.getOptionValue(CLUSTER_BASE_DISTANCE, "0")));
clusteringConfig.setUseCombinedOutputFile(tumorSample.equals("*"));
clusteringConfig.setSvPONFile(cmd.getOptionValue(SV_PON_FILE, ""));
clusteringConfig.setFragileSiteFile(cmd.getOptionValue(FRAGILE_SITE_FILE, ""));
clusteringConfig.setLineElementFile(cmd.getOptionValue(LINE_ELEMENT_FILE, ""));
clusteringConfig.setExternalAnnotationsFile(cmd.getOptionValue(EXTERNAL_ANNOTATIONS, ""));
svClusterer = new StructuralVariantClustering(clusteringConfig);
}
if (createFilteredPON) {
LOGGER.info("reading VCF file including filtered SVs");
FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
filteredSvWriter.processVcfFiles();
LOGGER.info("reads complete");
return;
}
if (!loadFromDB) {
boolean skipAnnotations = cmd.hasOption(SKIP_ANNOTATIONS);
LOGGER.info("reading VCF File");
final List<StructuralVariant> variants = readFromVcf(cmd.getOptionValue(VCF_FILE), true);
LOGGER.info("enriching structural variants based on purple data");
final List<EnrichedStructuralVariant> enrichedVariantWithoutPrimaryId = enrichStructuralVariants(variants, dbAccess, tumorSample);
LOGGER.info("persisting variants to database");
dbAccess.writeStructuralVariants(tumorSample, enrichedVariantWithoutPrimaryId);
// NEVA: We read after we write to populate the primaryId field
final List<EnrichedStructuralVariant> enrichedVariants = dbAccess.readStructuralVariants(tumorSample);
LOGGER.info("initialising MqSql annotator");
final VariantAnnotator annotator = MySQLAnnotator.make("jdbc:" + cmd.getOptionValue(ENSEMBL_DB));
LOGGER.info("loading Cosmic Fusion data");
final CosmicFusionModel cosmicGeneFusions = CosmicFusions.readFromCSV(cmd.getOptionValue(FUSION_CSV));
final StructuralVariantAnalyzer analyzer = new StructuralVariantAnalyzer(annotator, HmfGenePanelSupplier.hmfPanelGeneList(), cosmicGeneFusions);
LOGGER.info("analyzing structural variants for impact via disruptions and fusions");
final StructuralVariantAnalysis analysis = analyzer.run(enrichedVariants, skipAnnotations);
if (runClustering) {
svClusterer.loadFromEnrichedSVs(tumorSample, enrichedVariants);
svClusterer.runClustering();
}
LOGGER.info("persisting annotations to database");
final StructuralVariantAnnotationDAO annotationDAO = new StructuralVariantAnnotationDAO(dbAccess.context());
annotationDAO.write(analysis);
} else {
// KODU: Below assert feels somewhat risky!?
assert runClustering;
List<String> samplesList = Lists.newArrayList();
if (tumorSample.isEmpty() || tumorSample.equals("*")) {
samplesList = getStructuralVariantSamplesList(dbAccess);
} else if (tumorSample.contains(",")) {
String[] tumorList = tumorSample.split(",");
samplesList = Arrays.stream(tumorList).collect(Collectors.toList());
} else {
samplesList.add(tumorSample);
}
int count = 0;
for (final String sample : samplesList) {
++count;
LOGGER.info("clustering for sample({}), total({})", sample, count);
List<SvClusterData> svClusterData = queryStructuralVariantData(dbAccess, sample);
svClusterer.loadFromDatabase(sample, svClusterData);
// LOGGER.info("data loaded", sample, count);
svClusterer.runClustering();
// LOGGER.info("clustering complete", sample, count);
// if(count > 10)
// break;
}
}
svClusterer.close();
LOGGER.info("run complete");
}
use of com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis in project hmftools by hartwigmedical.
the class PatientReporter method analyseGenomeData.
@NotNull
private GenomeAnalysis analyseGenomeData(@NotNull final String sample, @NotNull final String runDirectory) throws IOException {
LOGGER.info(" Loading somatic snv and indels...");
final List<SomaticVariant> variants = PatientReporterHelper.loadPassedSomaticVariants(sample, runDirectory);
LOGGER.info(" " + variants.size() + " somatic passed snps, mnps and indels loaded for sample " + sample);
LOGGER.info(" Analyzing somatic snp/mnp and indels....");
final VariantAnalysis variantAnalysis = variantAnalyzer().run(variants);
LOGGER.info(" Loading purity numbers...");
final PurityContext context = PatientReporterHelper.loadPurity(runDirectory, sample);
if (context.status().equals(FittedPurityStatus.NO_TUMOR)) {
LOGGER.warn("PURPLE DID NOT DETECT A TUMOR. Proceed with utmost caution!");
}
final FittedPurity purity = context.bestFit();
final FittedPurityScore purityScore = context.score();
final List<PurpleCopyNumber> purpleCopyNumbers = PatientReporterHelper.loadPurpleCopyNumbers(runDirectory, sample);
final List<GeneCopyNumber> panelGeneCopyNumbers = PatientReporterHelper.loadPurpleGeneCopyNumbers(runDirectory, sample).stream().filter(x -> reporterData().panelGeneModel().panel().contains(x.gene())).collect(Collectors.toList());
LOGGER.info(" " + purpleCopyNumbers.size() + " purple copy number regions loaded for sample " + sample);
LOGGER.info(" Analyzing purple somatic copy numbers...");
final PurpleAnalysis purpleAnalysis = ImmutablePurpleAnalysis.builder().gender(context.gender()).status(context.status()).fittedPurity(purity).fittedScorePurity(purityScore).copyNumbers(purpleCopyNumbers).panelGeneCopyNumbers(panelGeneCopyNumbers).build();
final Path structuralVariantVCF = PatientReporterHelper.findStructuralVariantVCF(runDirectory);
LOGGER.info(" Loading structural variants...");
final List<StructuralVariant> structuralVariants = StructuralVariantFileLoader.fromFile(structuralVariantVCF.toString(), true);
LOGGER.info(" Enriching structural variants with purple data.");
final List<EnrichedStructuralVariant> enrichedStructuralVariants = purpleAnalysis.enrichStructuralVariants(structuralVariants);
LOGGER.info(" Analysing structural variants...");
final StructuralVariantAnalysis structuralVariantAnalysis = structuralVariantAnalyzer().run(enrichedStructuralVariants, false);
return ImmutableGenomeAnalysis.of(sample, variantAnalysis, purpleAnalysis, structuralVariantAnalysis);
}
Aggregations