Search in sources :

Example 1 with StructuralVariantAnnotationDAO

use of com.hartwig.hmftools.svannotation.dao.StructuralVariantAnnotationDAO in project hmftools by hartwigmedical.

the class LoadStructuralVariants method main.

public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
    final Options options = createBasicOptions();
    final CommandLine cmd = createCommandLine(args, options);
    boolean loadFromDB = cmd.hasOption(LOAD_FROM_DB);
    final String tumorSample = cmd.getOptionValue(SAMPLE);
    boolean runClustering = cmd.hasOption(CLUSTER_SVS);
    boolean createFilteredPON = cmd.hasOption(WRITE_FILTERED_SVS);
    boolean reannotateFromVCFs = cmd.hasOption(REANNOTATE_FROM_VCFS);
    final DatabaseAccess dbAccess = cmd.hasOption(DB_URL) ? databaseAccess(cmd) : null;
    if (cmd.hasOption(LOG_DEBUG)) {
        Configurator.setRootLevel(Level.DEBUG);
    }
    if (createFilteredPON) {
        LOGGER.info("reading VCF files including filtered SVs");
        FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        filteredSvWriter.processVcfFiles();
        LOGGER.info("reads complete");
        return;
    }
    if (reannotateFromVCFs) {
        LOGGER.info("reading VCF files to re-annotate");
        // for now just re-read the VCFs and write out new annotations to file
        // may later on turn into update SQL once clustering does the same
        SvVCFAnnotator vcfAnnotator = new SvVCFAnnotator(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        vcfAnnotator.processVcfFiles();
        return;
    }
    StructuralVariantClustering svClusterer = null;
    if (runClustering) {
        LOGGER.info("will run clustering logic");
        SvClusteringConfig clusteringConfig = new SvClusteringConfig();
        clusteringConfig.setOutputCsvPath(cmd.getOptionValue(DATA_OUTPUT_PATH));
        clusteringConfig.setBaseDistance(Integer.parseInt(cmd.getOptionValue(CLUSTER_BASE_DISTANCE, "0")));
        clusteringConfig.setUseCombinedOutputFile(tumorSample.equals("*"));
        clusteringConfig.setSvPONFile(cmd.getOptionValue(SV_PON_FILE, ""));
        clusteringConfig.setFragileSiteFile(cmd.getOptionValue(FRAGILE_SITE_FILE, ""));
        clusteringConfig.setLineElementFile(cmd.getOptionValue(LINE_ELEMENT_FILE, ""));
        clusteringConfig.setExternalAnnotationsFile(cmd.getOptionValue(EXTERNAL_ANNOTATIONS, ""));
        svClusterer = new StructuralVariantClustering(clusteringConfig);
    }
    if (createFilteredPON) {
        LOGGER.info("reading VCF file including filtered SVs");
        FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        filteredSvWriter.processVcfFiles();
        LOGGER.info("reads complete");
        return;
    }
    if (!loadFromDB) {
        boolean skipAnnotations = cmd.hasOption(SKIP_ANNOTATIONS);
        LOGGER.info("reading VCF File");
        final List<StructuralVariant> variants = readFromVcf(cmd.getOptionValue(VCF_FILE), true);
        LOGGER.info("enriching structural variants based on purple data");
        final List<EnrichedStructuralVariant> enrichedVariantWithoutPrimaryId = enrichStructuralVariants(variants, dbAccess, tumorSample);
        LOGGER.info("persisting variants to database");
        dbAccess.writeStructuralVariants(tumorSample, enrichedVariantWithoutPrimaryId);
        // NEVA: We read after we write to populate the primaryId field
        final List<EnrichedStructuralVariant> enrichedVariants = dbAccess.readStructuralVariants(tumorSample);
        LOGGER.info("initialising MqSql annotator");
        final VariantAnnotator annotator = MySQLAnnotator.make("jdbc:" + cmd.getOptionValue(ENSEMBL_DB));
        LOGGER.info("loading Cosmic Fusion data");
        final CosmicFusionModel cosmicGeneFusions = CosmicFusions.readFromCSV(cmd.getOptionValue(FUSION_CSV));
        final StructuralVariantAnalyzer analyzer = new StructuralVariantAnalyzer(annotator, HmfGenePanelSupplier.hmfPanelGeneList(), cosmicGeneFusions);
        LOGGER.info("analyzing structural variants for impact via disruptions and fusions");
        final StructuralVariantAnalysis analysis = analyzer.run(enrichedVariants, skipAnnotations);
        if (runClustering) {
            svClusterer.loadFromEnrichedSVs(tumorSample, enrichedVariants);
            svClusterer.runClustering();
        }
        LOGGER.info("persisting annotations to database");
        final StructuralVariantAnnotationDAO annotationDAO = new StructuralVariantAnnotationDAO(dbAccess.context());
        annotationDAO.write(analysis);
    } else {
        // KODU: Below assert feels somewhat risky!?
        assert runClustering;
        List<String> samplesList = Lists.newArrayList();
        if (tumorSample.isEmpty() || tumorSample.equals("*")) {
            samplesList = getStructuralVariantSamplesList(dbAccess);
        } else if (tumorSample.contains(",")) {
            String[] tumorList = tumorSample.split(",");
            samplesList = Arrays.stream(tumorList).collect(Collectors.toList());
        } else {
            samplesList.add(tumorSample);
        }
        int count = 0;
        for (final String sample : samplesList) {
            ++count;
            LOGGER.info("clustering for sample({}), total({})", sample, count);
            List<SvClusterData> svClusterData = queryStructuralVariantData(dbAccess, sample);
            svClusterer.loadFromDatabase(sample, svClusterData);
            // LOGGER.info("data loaded", sample, count);
            svClusterer.runClustering();
        // LOGGER.info("clustering complete", sample, count);
        // if(count > 10)
        // break;
        }
    }
    svClusterer.close();
    LOGGER.info("run complete");
}
Also used : Options(org.apache.commons.cli.Options) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) StructuralVariantAnalysis(com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis) StructuralVariantClustering(com.hartwig.hmftools.svannotation.analysis.StructuralVariantClustering) StructuralVariantAnalyzer(com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalyzer) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) StructuralVariantAnnotationDAO(com.hartwig.hmftools.svannotation.dao.StructuralVariantAnnotationDAO) CommandLine(org.apache.commons.cli.CommandLine) CosmicFusionModel(com.hartwig.hmftools.common.cosmic.fusions.CosmicFusionModel) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) SvClusterData(com.hartwig.hmftools.svannotation.analysis.SvClusterData) SvClusteringConfig(com.hartwig.hmftools.svannotation.analysis.SvClusteringConfig)

Aggregations

CosmicFusionModel (com.hartwig.hmftools.common.cosmic.fusions.CosmicFusionModel)1 EnrichedStructuralVariant (com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant)1 StructuralVariant (com.hartwig.hmftools.common.variant.structural.StructuralVariant)1 DatabaseAccess (com.hartwig.hmftools.patientdb.dao.DatabaseAccess)1 StructuralVariantAnalysis (com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis)1 StructuralVariantAnalyzer (com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalyzer)1 StructuralVariantClustering (com.hartwig.hmftools.svannotation.analysis.StructuralVariantClustering)1 SvClusterData (com.hartwig.hmftools.svannotation.analysis.SvClusterData)1 SvClusteringConfig (com.hartwig.hmftools.svannotation.analysis.SvClusteringConfig)1 StructuralVariantAnnotationDAO (com.hartwig.hmftools.svannotation.dao.StructuralVariantAnnotationDAO)1 CommandLine (org.apache.commons.cli.CommandLine)1 Options (org.apache.commons.cli.Options)1