Search in sources :

Example 1 with DatabaseAccess

use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.

the class LoadStructuralVariants method main.

public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
    final Options options = createBasicOptions();
    final CommandLine cmd = createCommandLine(args, options);
    boolean loadFromDB = cmd.hasOption(LOAD_FROM_DB);
    final String tumorSample = cmd.getOptionValue(SAMPLE);
    boolean runClustering = cmd.hasOption(CLUSTER_SVS);
    boolean createFilteredPON = cmd.hasOption(WRITE_FILTERED_SVS);
    boolean reannotateFromVCFs = cmd.hasOption(REANNOTATE_FROM_VCFS);
    final DatabaseAccess dbAccess = cmd.hasOption(DB_URL) ? databaseAccess(cmd) : null;
    if (cmd.hasOption(LOG_DEBUG)) {
        Configurator.setRootLevel(Level.DEBUG);
    }
    if (createFilteredPON) {
        LOGGER.info("reading VCF files including filtered SVs");
        FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        filteredSvWriter.processVcfFiles();
        LOGGER.info("reads complete");
        return;
    }
    if (reannotateFromVCFs) {
        LOGGER.info("reading VCF files to re-annotate");
        // for now just re-read the VCFs and write out new annotations to file
        // may later on turn into update SQL once clustering does the same
        SvVCFAnnotator vcfAnnotator = new SvVCFAnnotator(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        vcfAnnotator.processVcfFiles();
        return;
    }
    StructuralVariantClustering svClusterer = null;
    if (runClustering) {
        LOGGER.info("will run clustering logic");
        SvClusteringConfig clusteringConfig = new SvClusteringConfig();
        clusteringConfig.setOutputCsvPath(cmd.getOptionValue(DATA_OUTPUT_PATH));
        clusteringConfig.setBaseDistance(Integer.parseInt(cmd.getOptionValue(CLUSTER_BASE_DISTANCE, "0")));
        clusteringConfig.setUseCombinedOutputFile(tumorSample.equals("*"));
        clusteringConfig.setSvPONFile(cmd.getOptionValue(SV_PON_FILE, ""));
        clusteringConfig.setFragileSiteFile(cmd.getOptionValue(FRAGILE_SITE_FILE, ""));
        clusteringConfig.setLineElementFile(cmd.getOptionValue(LINE_ELEMENT_FILE, ""));
        clusteringConfig.setExternalAnnotationsFile(cmd.getOptionValue(EXTERNAL_ANNOTATIONS, ""));
        svClusterer = new StructuralVariantClustering(clusteringConfig);
    }
    if (createFilteredPON) {
        LOGGER.info("reading VCF file including filtered SVs");
        FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        filteredSvWriter.processVcfFiles();
        LOGGER.info("reads complete");
        return;
    }
    if (!loadFromDB) {
        boolean skipAnnotations = cmd.hasOption(SKIP_ANNOTATIONS);
        LOGGER.info("reading VCF File");
        final List<StructuralVariant> variants = readFromVcf(cmd.getOptionValue(VCF_FILE), true);
        LOGGER.info("enriching structural variants based on purple data");
        final List<EnrichedStructuralVariant> enrichedVariantWithoutPrimaryId = enrichStructuralVariants(variants, dbAccess, tumorSample);
        LOGGER.info("persisting variants to database");
        dbAccess.writeStructuralVariants(tumorSample, enrichedVariantWithoutPrimaryId);
        // NEVA: We read after we write to populate the primaryId field
        final List<EnrichedStructuralVariant> enrichedVariants = dbAccess.readStructuralVariants(tumorSample);
        LOGGER.info("initialising MqSql annotator");
        final VariantAnnotator annotator = MySQLAnnotator.make("jdbc:" + cmd.getOptionValue(ENSEMBL_DB));
        LOGGER.info("loading Cosmic Fusion data");
        final CosmicFusionModel cosmicGeneFusions = CosmicFusions.readFromCSV(cmd.getOptionValue(FUSION_CSV));
        final StructuralVariantAnalyzer analyzer = new StructuralVariantAnalyzer(annotator, HmfGenePanelSupplier.hmfPanelGeneList(), cosmicGeneFusions);
        LOGGER.info("analyzing structural variants for impact via disruptions and fusions");
        final StructuralVariantAnalysis analysis = analyzer.run(enrichedVariants, skipAnnotations);
        if (runClustering) {
            svClusterer.loadFromEnrichedSVs(tumorSample, enrichedVariants);
            svClusterer.runClustering();
        }
        LOGGER.info("persisting annotations to database");
        final StructuralVariantAnnotationDAO annotationDAO = new StructuralVariantAnnotationDAO(dbAccess.context());
        annotationDAO.write(analysis);
    } else {
        // KODU: Below assert feels somewhat risky!?
        assert runClustering;
        List<String> samplesList = Lists.newArrayList();
        if (tumorSample.isEmpty() || tumorSample.equals("*")) {
            samplesList = getStructuralVariantSamplesList(dbAccess);
        } else if (tumorSample.contains(",")) {
            String[] tumorList = tumorSample.split(",");
            samplesList = Arrays.stream(tumorList).collect(Collectors.toList());
        } else {
            samplesList.add(tumorSample);
        }
        int count = 0;
        for (final String sample : samplesList) {
            ++count;
            LOGGER.info("clustering for sample({}), total({})", sample, count);
            List<SvClusterData> svClusterData = queryStructuralVariantData(dbAccess, sample);
            svClusterer.loadFromDatabase(sample, svClusterData);
            // LOGGER.info("data loaded", sample, count);
            svClusterer.runClustering();
        // LOGGER.info("clustering complete", sample, count);
        // if(count > 10)
        // break;
        }
    }
    svClusterer.close();
    LOGGER.info("run complete");
}
Also used : Options(org.apache.commons.cli.Options) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) StructuralVariantAnalysis(com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis) StructuralVariantClustering(com.hartwig.hmftools.svannotation.analysis.StructuralVariantClustering) StructuralVariantAnalyzer(com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalyzer) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) StructuralVariantAnnotationDAO(com.hartwig.hmftools.svannotation.dao.StructuralVariantAnnotationDAO) CommandLine(org.apache.commons.cli.CommandLine) CosmicFusionModel(com.hartwig.hmftools.common.cosmic.fusions.CosmicFusionModel) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) SvClusterData(com.hartwig.hmftools.svannotation.analysis.SvClusterData) SvClusteringConfig(com.hartwig.hmftools.svannotation.analysis.SvClusteringConfig)

Example 2 with DatabaseAccess

use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.

the class LoadClinicalData method main.

public static void main(@NotNull final String[] args) throws ParseException, IOException, XMLStreamException, SQLException {
    LOGGER.info("Running patient-db v{}", VERSION);
    final Options basicOptions = createBasicOptions();
    final Options clinicalOptions = createLimsOptions();
    final Options ecrfOptions = createEcrfOptions();
    final Options options = mergeOptions(basicOptions, clinicalOptions, ecrfOptions);
    final CommandLine cmd = createCommandLine(args, options);
    final String runsFolderPath = cmd.getOptionValue(RUNS_DIR);
    final String userName = cmd.getOptionValue(DB_USER);
    final String password = cmd.getOptionValue(DB_PASS);
    final String databaseUrl = cmd.getOptionValue(DB_URL);
    final boolean loadRawEcrf = cmd.hasOption(DO_LOAD_RAW_ECRF);
    if (Utils.anyNull(runsFolderPath, userName, password, databaseUrl)) {
        final HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("patient-db", options);
    } else {
        final File runDirectory = new File(runsFolderPath);
        if (runDirectory.isDirectory()) {
            LOGGER.info("Running clinical data import.");
            final List<RunContext> runContexts = RunsFolderReader.getRunContexts(runDirectory);
            final String jdbcUrl = "jdbc:" + databaseUrl;
            final DatabaseAccess dbWriter = new DatabaseAccess(userName, password, jdbcUrl);
            if (loadRawEcrf) {
                writeRawEcrf(ecrfOptions, cmd, runContexts, dbWriter);
            }
            writeClinicalData(clinicalOptions, cmd, runContexts, dbWriter);
        } else {
            if (!runDirectory.exists()) {
                LOGGER.warn("dir " + runDirectory + " does not exist.");
            }
            final HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("patient-db", basicOptions);
        }
    }
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) RunContext(com.hartwig.hmftools.common.context.RunContext) File(java.io.File)

Example 3 with DatabaseAccess

use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.

the class LoadDrupEcrfData method main.

public static void main(@NotNull final String[] args) throws ParseException, IOException, XMLStreamException, SQLException {
    final Options options = createOptions();
    final CommandLine cmd = createCommandLine(args, options);
    final String userName = cmd.getOptionValue(DB_USER);
    final String password = cmd.getOptionValue(DB_PASS);
    final String databaseUrl = cmd.getOptionValue(DB_URL);
    final String ecrfFile = cmd.getOptionValue(ECRF_FILE);
    final String runsFolderPath = cmd.getOptionValue(RUNS_DIR);
    if (Utils.anyNull(userName, password, databaseUrl, ecrfFile, runsFolderPath)) {
        final HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("patient-db - load DRUP ecrf", options);
    } else {
        final File runsDirectory = new File(runsFolderPath);
        if (runsDirectory.isDirectory()) {
            final String jdbcUrl = "jdbc:" + databaseUrl;
            final DatabaseAccess dbWriter = new DatabaseAccess(userName, password, jdbcUrl);
            dbWriter.clearDrupEcrf();
            LOGGER.info("Importing DRUP ecrf data from: {}", ecrfFile);
            final CpctEcrfModel model = CpctEcrfModel.loadFromXML(ecrfFile, new ImmutableFormStatusModel(Maps.newHashMap()));
            final List<RunContext> runContexts = RunsFolderReader.getRunContexts(runsDirectory);
            final Set<String> sequencedPatients = Utils.sequencedPatientIds(runContexts);
            LOGGER.info("Writing raw ecrf data for " + model.patientCount() + " patients.");
            dbWriter.writeDrupEcrf(model, sequencedPatients);
            LOGGER.info("Done writing raw ecrf data for " + model.patientCount() + " patients!");
        } else {
            if (!runsDirectory.exists()) {
                LOGGER.warn("dir " + runsDirectory + " does not exist.");
            }
            final HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("patient-db - load DRUP ecrf", options);
        }
    }
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) RunContext(com.hartwig.hmftools.common.context.RunContext) File(java.io.File) ImmutableFormStatusModel(com.hartwig.hmftools.common.ecrf.formstatus.ImmutableFormStatusModel) CpctEcrfModel(com.hartwig.hmftools.common.ecrf.CpctEcrfModel)

Example 4 with DatabaseAccess

use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.

the class LoadMetricsData method main.

public static void main(@NotNull final String[] args) throws ParseException, SQLException, IOException {
    final Options options = createOptions();
    final CommandLine cmd = createCommandLine(args, options);
    final String userName = cmd.getOptionValue(DB_USER);
    final String password = cmd.getOptionValue(DB_PASS);
    final String databaseUrl = cmd.getOptionValue(DB_URL);
    final String runDirectoryPath = cmd.getOptionValue(RUN_DIR);
    if (Utils.anyNull(userName, password, databaseUrl, runDirectoryPath)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("patient-db - load metrics data", options);
    } else {
        final File runDirectory = new File(runDirectoryPath);
        if (runDirectory.isDirectory()) {
            final String jdbcUrl = "jdbc:" + databaseUrl;
            final DatabaseAccess dbWriter = new DatabaseAccess(userName, password, jdbcUrl);
            RunContext runContext = ProductionRunContextFactory.fromRunDirectory(runDirectory.toPath().toString());
            LOGGER.info(String.format("Extracting and writing metrics for %s", runContext.runDirectory()));
            try {
                WGSMetrics metrics = generateMetricsForRun(runContext);
                dbWriter.writeMetrics(runContext.tumorSample(), metrics);
            } catch (IOException e) {
                LOGGER.warn(String.format("Cannot extract metrics for %s.", runContext.runDirectory()));
            }
        } else {
            if (!runDirectory.exists()) {
                LOGGER.warn("dir " + runDirectory + " does not exist.");
            }
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("patient-db - load metrics data", options);
        }
    }
}
Also used : HelpFormatter(org.apache.commons.cli.HelpFormatter) Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) WGSMetrics(com.hartwig.hmftools.common.metrics.WGSMetrics) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) RunContext(com.hartwig.hmftools.common.context.RunContext) IOException(java.io.IOException) File(java.io.File) WGSMetricsFile(com.hartwig.hmftools.common.metrics.WGSMetricsFile)

Example 5 with DatabaseAccess

use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.

the class LoadPurpleData method main.

public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
    final Options options = createBasicOptions();
    final CommandLine cmd = createCommandLine(args, options);
    final DatabaseAccess dbAccess = databaseAccess(cmd);
    final String tumorSample = cmd.getOptionValue(SAMPLE);
    final String purplePath = cmd.getOptionValue(PURPLE_DIR);
    LOGGER.info("Persisting purity data");
    final PurpleQC purpleQC = PurpleQCFile.read(PurpleQCFile.generateFilename(purplePath, tumorSample));
    final PurityContext purityContext = FittedPurityFile.read(purplePath, tumorSample);
    final List<FittedPurity> bestFitPerPurity = FittedPurityRangeFile.read(purplePath, tumorSample);
    dbAccess.writePurity(tumorSample, purityContext, purpleQC);
    dbAccess.writeBestFitPerPurity(tumorSample, bestFitPerPurity);
    LOGGER.info("Persisting copy numbers");
    final List<PurpleCopyNumber> copyNumbers = PurpleCopyNumberFile.read(purplePath, tumorSample);
    dbAccess.writeCopynumbers(tumorSample, copyNumbers);
    LOGGER.info("Persisting gene copy numbers");
    final List<GeneCopyNumber> geneCopyNumbers = GeneCopyNumberFile.read(GeneCopyNumberFile.generateFilename(purplePath, tumorSample));
    dbAccess.writeGeneCopynumberRegions(tumorSample, geneCopyNumbers);
    LOGGER.info("Complete");
}
Also used : Options(org.apache.commons.cli.Options) CommandLine(org.apache.commons.cli.CommandLine) PurpleQC(com.hartwig.hmftools.common.purple.qc.PurpleQC) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) PurityContext(com.hartwig.hmftools.common.purple.purity.PurityContext) FittedPurity(com.hartwig.hmftools.common.purple.purity.FittedPurity) PurpleCopyNumber(com.hartwig.hmftools.common.purple.copynumber.PurpleCopyNumber) GeneCopyNumber(com.hartwig.hmftools.common.gene.GeneCopyNumber)

Aggregations

DatabaseAccess (com.hartwig.hmftools.patientdb.dao.DatabaseAccess)7 CommandLine (org.apache.commons.cli.CommandLine)7 Options (org.apache.commons.cli.Options)7 File (java.io.File)4 RunContext (com.hartwig.hmftools.common.context.RunContext)3 HelpFormatter (org.apache.commons.cli.HelpFormatter)3 PurpleCopyNumber (com.hartwig.hmftools.common.purple.copynumber.PurpleCopyNumber)2 PurityContext (com.hartwig.hmftools.common.purple.purity.PurityContext)2 CosmicFusionModel (com.hartwig.hmftools.common.cosmic.fusions.CosmicFusionModel)1 CpctEcrfModel (com.hartwig.hmftools.common.ecrf.CpctEcrfModel)1 ImmutableFormStatusModel (com.hartwig.hmftools.common.ecrf.formstatus.ImmutableFormStatusModel)1 CanonicalTranscript (com.hartwig.hmftools.common.gene.CanonicalTranscript)1 GeneCopyNumber (com.hartwig.hmftools.common.gene.GeneCopyNumber)1 WGSMetrics (com.hartwig.hmftools.common.metrics.WGSMetrics)1 WGSMetricsFile (com.hartwig.hmftools.common.metrics.WGSMetricsFile)1 PurityAdjuster (com.hartwig.hmftools.common.purple.PurityAdjuster)1 FittedPurity (com.hartwig.hmftools.common.purple.purity.FittedPurity)1 PurpleQC (com.hartwig.hmftools.common.purple.qc.PurpleQC)1 FittedRegion (com.hartwig.hmftools.common.purple.region.FittedRegion)1 GenomeRegion (com.hartwig.hmftools.common.region.GenomeRegion)1