use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.
the class LoadStructuralVariants method main.
public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
final Options options = createBasicOptions();
final CommandLine cmd = createCommandLine(args, options);
boolean loadFromDB = cmd.hasOption(LOAD_FROM_DB);
final String tumorSample = cmd.getOptionValue(SAMPLE);
boolean runClustering = cmd.hasOption(CLUSTER_SVS);
boolean createFilteredPON = cmd.hasOption(WRITE_FILTERED_SVS);
boolean reannotateFromVCFs = cmd.hasOption(REANNOTATE_FROM_VCFS);
final DatabaseAccess dbAccess = cmd.hasOption(DB_URL) ? databaseAccess(cmd) : null;
if (cmd.hasOption(LOG_DEBUG)) {
Configurator.setRootLevel(Level.DEBUG);
}
if (createFilteredPON) {
LOGGER.info("reading VCF files including filtered SVs");
FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
filteredSvWriter.processVcfFiles();
LOGGER.info("reads complete");
return;
}
if (reannotateFromVCFs) {
LOGGER.info("reading VCF files to re-annotate");
// for now just re-read the VCFs and write out new annotations to file
// may later on turn into update SQL once clustering does the same
SvVCFAnnotator vcfAnnotator = new SvVCFAnnotator(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
vcfAnnotator.processVcfFiles();
return;
}
StructuralVariantClustering svClusterer = null;
if (runClustering) {
LOGGER.info("will run clustering logic");
SvClusteringConfig clusteringConfig = new SvClusteringConfig();
clusteringConfig.setOutputCsvPath(cmd.getOptionValue(DATA_OUTPUT_PATH));
clusteringConfig.setBaseDistance(Integer.parseInt(cmd.getOptionValue(CLUSTER_BASE_DISTANCE, "0")));
clusteringConfig.setUseCombinedOutputFile(tumorSample.equals("*"));
clusteringConfig.setSvPONFile(cmd.getOptionValue(SV_PON_FILE, ""));
clusteringConfig.setFragileSiteFile(cmd.getOptionValue(FRAGILE_SITE_FILE, ""));
clusteringConfig.setLineElementFile(cmd.getOptionValue(LINE_ELEMENT_FILE, ""));
clusteringConfig.setExternalAnnotationsFile(cmd.getOptionValue(EXTERNAL_ANNOTATIONS, ""));
svClusterer = new StructuralVariantClustering(clusteringConfig);
}
if (createFilteredPON) {
LOGGER.info("reading VCF file including filtered SVs");
FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
filteredSvWriter.processVcfFiles();
LOGGER.info("reads complete");
return;
}
if (!loadFromDB) {
boolean skipAnnotations = cmd.hasOption(SKIP_ANNOTATIONS);
LOGGER.info("reading VCF File");
final List<StructuralVariant> variants = readFromVcf(cmd.getOptionValue(VCF_FILE), true);
LOGGER.info("enriching structural variants based on purple data");
final List<EnrichedStructuralVariant> enrichedVariantWithoutPrimaryId = enrichStructuralVariants(variants, dbAccess, tumorSample);
LOGGER.info("persisting variants to database");
dbAccess.writeStructuralVariants(tumorSample, enrichedVariantWithoutPrimaryId);
// NEVA: We read after we write to populate the primaryId field
final List<EnrichedStructuralVariant> enrichedVariants = dbAccess.readStructuralVariants(tumorSample);
LOGGER.info("initialising MqSql annotator");
final VariantAnnotator annotator = MySQLAnnotator.make("jdbc:" + cmd.getOptionValue(ENSEMBL_DB));
LOGGER.info("loading Cosmic Fusion data");
final CosmicFusionModel cosmicGeneFusions = CosmicFusions.readFromCSV(cmd.getOptionValue(FUSION_CSV));
final StructuralVariantAnalyzer analyzer = new StructuralVariantAnalyzer(annotator, HmfGenePanelSupplier.hmfPanelGeneList(), cosmicGeneFusions);
LOGGER.info("analyzing structural variants for impact via disruptions and fusions");
final StructuralVariantAnalysis analysis = analyzer.run(enrichedVariants, skipAnnotations);
if (runClustering) {
svClusterer.loadFromEnrichedSVs(tumorSample, enrichedVariants);
svClusterer.runClustering();
}
LOGGER.info("persisting annotations to database");
final StructuralVariantAnnotationDAO annotationDAO = new StructuralVariantAnnotationDAO(dbAccess.context());
annotationDAO.write(analysis);
} else {
// KODU: Below assert feels somewhat risky!?
assert runClustering;
List<String> samplesList = Lists.newArrayList();
if (tumorSample.isEmpty() || tumorSample.equals("*")) {
samplesList = getStructuralVariantSamplesList(dbAccess);
} else if (tumorSample.contains(",")) {
String[] tumorList = tumorSample.split(",");
samplesList = Arrays.stream(tumorList).collect(Collectors.toList());
} else {
samplesList.add(tumorSample);
}
int count = 0;
for (final String sample : samplesList) {
++count;
LOGGER.info("clustering for sample({}), total({})", sample, count);
List<SvClusterData> svClusterData = queryStructuralVariantData(dbAccess, sample);
svClusterer.loadFromDatabase(sample, svClusterData);
// LOGGER.info("data loaded", sample, count);
svClusterer.runClustering();
// LOGGER.info("clustering complete", sample, count);
// if(count > 10)
// break;
}
}
svClusterer.close();
LOGGER.info("run complete");
}
use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.
the class LoadClinicalData method main.
public static void main(@NotNull final String[] args) throws ParseException, IOException, XMLStreamException, SQLException {
LOGGER.info("Running patient-db v{}", VERSION);
final Options basicOptions = createBasicOptions();
final Options clinicalOptions = createLimsOptions();
final Options ecrfOptions = createEcrfOptions();
final Options options = mergeOptions(basicOptions, clinicalOptions, ecrfOptions);
final CommandLine cmd = createCommandLine(args, options);
final String runsFolderPath = cmd.getOptionValue(RUNS_DIR);
final String userName = cmd.getOptionValue(DB_USER);
final String password = cmd.getOptionValue(DB_PASS);
final String databaseUrl = cmd.getOptionValue(DB_URL);
final boolean loadRawEcrf = cmd.hasOption(DO_LOAD_RAW_ECRF);
if (Utils.anyNull(runsFolderPath, userName, password, databaseUrl)) {
final HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("patient-db", options);
} else {
final File runDirectory = new File(runsFolderPath);
if (runDirectory.isDirectory()) {
LOGGER.info("Running clinical data import.");
final List<RunContext> runContexts = RunsFolderReader.getRunContexts(runDirectory);
final String jdbcUrl = "jdbc:" + databaseUrl;
final DatabaseAccess dbWriter = new DatabaseAccess(userName, password, jdbcUrl);
if (loadRawEcrf) {
writeRawEcrf(ecrfOptions, cmd, runContexts, dbWriter);
}
writeClinicalData(clinicalOptions, cmd, runContexts, dbWriter);
} else {
if (!runDirectory.exists()) {
LOGGER.warn("dir " + runDirectory + " does not exist.");
}
final HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("patient-db", basicOptions);
}
}
}
use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.
the class LoadDrupEcrfData method main.
public static void main(@NotNull final String[] args) throws ParseException, IOException, XMLStreamException, SQLException {
final Options options = createOptions();
final CommandLine cmd = createCommandLine(args, options);
final String userName = cmd.getOptionValue(DB_USER);
final String password = cmd.getOptionValue(DB_PASS);
final String databaseUrl = cmd.getOptionValue(DB_URL);
final String ecrfFile = cmd.getOptionValue(ECRF_FILE);
final String runsFolderPath = cmd.getOptionValue(RUNS_DIR);
if (Utils.anyNull(userName, password, databaseUrl, ecrfFile, runsFolderPath)) {
final HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("patient-db - load DRUP ecrf", options);
} else {
final File runsDirectory = new File(runsFolderPath);
if (runsDirectory.isDirectory()) {
final String jdbcUrl = "jdbc:" + databaseUrl;
final DatabaseAccess dbWriter = new DatabaseAccess(userName, password, jdbcUrl);
dbWriter.clearDrupEcrf();
LOGGER.info("Importing DRUP ecrf data from: {}", ecrfFile);
final CpctEcrfModel model = CpctEcrfModel.loadFromXML(ecrfFile, new ImmutableFormStatusModel(Maps.newHashMap()));
final List<RunContext> runContexts = RunsFolderReader.getRunContexts(runsDirectory);
final Set<String> sequencedPatients = Utils.sequencedPatientIds(runContexts);
LOGGER.info("Writing raw ecrf data for " + model.patientCount() + " patients.");
dbWriter.writeDrupEcrf(model, sequencedPatients);
LOGGER.info("Done writing raw ecrf data for " + model.patientCount() + " patients!");
} else {
if (!runsDirectory.exists()) {
LOGGER.warn("dir " + runsDirectory + " does not exist.");
}
final HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("patient-db - load DRUP ecrf", options);
}
}
}
use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.
the class LoadMetricsData method main.
public static void main(@NotNull final String[] args) throws ParseException, SQLException, IOException {
final Options options = createOptions();
final CommandLine cmd = createCommandLine(args, options);
final String userName = cmd.getOptionValue(DB_USER);
final String password = cmd.getOptionValue(DB_PASS);
final String databaseUrl = cmd.getOptionValue(DB_URL);
final String runDirectoryPath = cmd.getOptionValue(RUN_DIR);
if (Utils.anyNull(userName, password, databaseUrl, runDirectoryPath)) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("patient-db - load metrics data", options);
} else {
final File runDirectory = new File(runDirectoryPath);
if (runDirectory.isDirectory()) {
final String jdbcUrl = "jdbc:" + databaseUrl;
final DatabaseAccess dbWriter = new DatabaseAccess(userName, password, jdbcUrl);
RunContext runContext = ProductionRunContextFactory.fromRunDirectory(runDirectory.toPath().toString());
LOGGER.info(String.format("Extracting and writing metrics for %s", runContext.runDirectory()));
try {
WGSMetrics metrics = generateMetricsForRun(runContext);
dbWriter.writeMetrics(runContext.tumorSample(), metrics);
} catch (IOException e) {
LOGGER.warn(String.format("Cannot extract metrics for %s.", runContext.runDirectory()));
}
} else {
if (!runDirectory.exists()) {
LOGGER.warn("dir " + runDirectory + " does not exist.");
}
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("patient-db - load metrics data", options);
}
}
}
use of com.hartwig.hmftools.patientdb.dao.DatabaseAccess in project hmftools by hartwigmedical.
the class LoadPurpleData method main.
public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
final Options options = createBasicOptions();
final CommandLine cmd = createCommandLine(args, options);
final DatabaseAccess dbAccess = databaseAccess(cmd);
final String tumorSample = cmd.getOptionValue(SAMPLE);
final String purplePath = cmd.getOptionValue(PURPLE_DIR);
LOGGER.info("Persisting purity data");
final PurpleQC purpleQC = PurpleQCFile.read(PurpleQCFile.generateFilename(purplePath, tumorSample));
final PurityContext purityContext = FittedPurityFile.read(purplePath, tumorSample);
final List<FittedPurity> bestFitPerPurity = FittedPurityRangeFile.read(purplePath, tumorSample);
dbAccess.writePurity(tumorSample, purityContext, purpleQC);
dbAccess.writeBestFitPerPurity(tumorSample, bestFitPerPurity);
LOGGER.info("Persisting copy numbers");
final List<PurpleCopyNumber> copyNumbers = PurpleCopyNumberFile.read(purplePath, tumorSample);
dbAccess.writeCopynumbers(tumorSample, copyNumbers);
LOGGER.info("Persisting gene copy numbers");
final List<GeneCopyNumber> geneCopyNumbers = GeneCopyNumberFile.read(GeneCopyNumberFile.generateFilename(purplePath, tumorSample));
dbAccess.writeGeneCopynumberRegions(tumorSample, geneCopyNumbers);
LOGGER.info("Complete");
}
Aggregations