use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.
the class LoadStructuralVariants method main.
public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
final Options options = createBasicOptions();
final CommandLine cmd = createCommandLine(args, options);
boolean loadFromDB = cmd.hasOption(LOAD_FROM_DB);
final String tumorSample = cmd.getOptionValue(SAMPLE);
boolean runClustering = cmd.hasOption(CLUSTER_SVS);
boolean createFilteredPON = cmd.hasOption(WRITE_FILTERED_SVS);
boolean reannotateFromVCFs = cmd.hasOption(REANNOTATE_FROM_VCFS);
final DatabaseAccess dbAccess = cmd.hasOption(DB_URL) ? databaseAccess(cmd) : null;
if (cmd.hasOption(LOG_DEBUG)) {
Configurator.setRootLevel(Level.DEBUG);
}
if (createFilteredPON) {
LOGGER.info("reading VCF files including filtered SVs");
FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
filteredSvWriter.processVcfFiles();
LOGGER.info("reads complete");
return;
}
if (reannotateFromVCFs) {
LOGGER.info("reading VCF files to re-annotate");
// for now just re-read the VCFs and write out new annotations to file
// may later on turn into update SQL once clustering does the same
SvVCFAnnotator vcfAnnotator = new SvVCFAnnotator(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
vcfAnnotator.processVcfFiles();
return;
}
StructuralVariantClustering svClusterer = null;
if (runClustering) {
LOGGER.info("will run clustering logic");
SvClusteringConfig clusteringConfig = new SvClusteringConfig();
clusteringConfig.setOutputCsvPath(cmd.getOptionValue(DATA_OUTPUT_PATH));
clusteringConfig.setBaseDistance(Integer.parseInt(cmd.getOptionValue(CLUSTER_BASE_DISTANCE, "0")));
clusteringConfig.setUseCombinedOutputFile(tumorSample.equals("*"));
clusteringConfig.setSvPONFile(cmd.getOptionValue(SV_PON_FILE, ""));
clusteringConfig.setFragileSiteFile(cmd.getOptionValue(FRAGILE_SITE_FILE, ""));
clusteringConfig.setLineElementFile(cmd.getOptionValue(LINE_ELEMENT_FILE, ""));
clusteringConfig.setExternalAnnotationsFile(cmd.getOptionValue(EXTERNAL_ANNOTATIONS, ""));
svClusterer = new StructuralVariantClustering(clusteringConfig);
}
if (createFilteredPON) {
LOGGER.info("reading VCF file including filtered SVs");
FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
filteredSvWriter.processVcfFiles();
LOGGER.info("reads complete");
return;
}
if (!loadFromDB) {
boolean skipAnnotations = cmd.hasOption(SKIP_ANNOTATIONS);
LOGGER.info("reading VCF File");
final List<StructuralVariant> variants = readFromVcf(cmd.getOptionValue(VCF_FILE), true);
LOGGER.info("enriching structural variants based on purple data");
final List<EnrichedStructuralVariant> enrichedVariantWithoutPrimaryId = enrichStructuralVariants(variants, dbAccess, tumorSample);
LOGGER.info("persisting variants to database");
dbAccess.writeStructuralVariants(tumorSample, enrichedVariantWithoutPrimaryId);
// NEVA: We read after we write to populate the primaryId field
final List<EnrichedStructuralVariant> enrichedVariants = dbAccess.readStructuralVariants(tumorSample);
LOGGER.info("initialising MqSql annotator");
final VariantAnnotator annotator = MySQLAnnotator.make("jdbc:" + cmd.getOptionValue(ENSEMBL_DB));
LOGGER.info("loading Cosmic Fusion data");
final CosmicFusionModel cosmicGeneFusions = CosmicFusions.readFromCSV(cmd.getOptionValue(FUSION_CSV));
final StructuralVariantAnalyzer analyzer = new StructuralVariantAnalyzer(annotator, HmfGenePanelSupplier.hmfPanelGeneList(), cosmicGeneFusions);
LOGGER.info("analyzing structural variants for impact via disruptions and fusions");
final StructuralVariantAnalysis analysis = analyzer.run(enrichedVariants, skipAnnotations);
if (runClustering) {
svClusterer.loadFromEnrichedSVs(tumorSample, enrichedVariants);
svClusterer.runClustering();
}
LOGGER.info("persisting annotations to database");
final StructuralVariantAnnotationDAO annotationDAO = new StructuralVariantAnnotationDAO(dbAccess.context());
annotationDAO.write(analysis);
} else {
// KODU: Below assert feels somewhat risky!?
assert runClustering;
List<String> samplesList = Lists.newArrayList();
if (tumorSample.isEmpty() || tumorSample.equals("*")) {
samplesList = getStructuralVariantSamplesList(dbAccess);
} else if (tumorSample.contains(",")) {
String[] tumorList = tumorSample.split(",");
samplesList = Arrays.stream(tumorList).collect(Collectors.toList());
} else {
samplesList.add(tumorSample);
}
int count = 0;
for (final String sample : samplesList) {
++count;
LOGGER.info("clustering for sample({}), total({})", sample, count);
List<SvClusterData> svClusterData = queryStructuralVariantData(dbAccess, sample);
svClusterer.loadFromDatabase(sample, svClusterData);
// LOGGER.info("data loaded", sample, count);
svClusterer.runClustering();
// LOGGER.info("clustering complete", sample, count);
// if(count > 10)
// break;
}
}
svClusterer.close();
LOGGER.info("run complete");
}
use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.
the class MySQLAnnotator method annotateBreakend.
@NotNull
private List<GeneAnnotation> annotateBreakend(@NotNull EnrichedStructuralVariant variant, final boolean isStart, @NotNull String chromosome, final long position) {
final List<GeneAnnotation> result = Lists.newArrayList();
final Result<?> genes = queryGenesOnChromosomeAndPosition(chromosome, position);
for (final Record gene : genes) {
final UInteger geneId = gene.get(GENE.GENE_ID);
final String geneName = gene.get(XREF.DISPLAY_LABEL);
final String geneStableId = gene.get(GENE.STABLE_ID);
final UInteger canonicalTranscriptId = gene.get(GENE.CANONICAL_TRANSCRIPT_ID);
final int geneStrand = gene.get(GENE.SEQ_REGION_STRAND);
final List<Integer> entrezIds = Arrays.stream(gene.get(ENTREZ_IDS, String.class).split(",")).map(Integer::parseInt).collect(Collectors.toList());
final String karyotypeBand = gene.get(KARYOTYPE_BAND, String.class);
final List<String> synonyms = context.select(XREF.DBPRIMARY_ACC).from(XREF).innerJoin(OBJECT_XREF).on(OBJECT_XREF.XREF_ID.eq(XREF.XREF_ID)).and(OBJECT_XREF.ENSEMBL_ID.eq(geneId)).and(OBJECT_XREF.ENSEMBL_OBJECT_TYPE.eq(ObjectXrefEnsemblObjectType.Gene)).fetch().stream().map(r -> r.get(XREF.DBPRIMARY_ACC)).collect(Collectors.toList());
final GeneAnnotation geneAnnotation = new GeneAnnotation(variant, isStart, geneName, geneStableId, geneStrand, synonyms, entrezIds, karyotypeBand);
final Result<?> transcripts = context.select(TRANSCRIPT.TRANSCRIPT_ID, TRANSCRIPT.STABLE_ID).from(TRANSCRIPT).where(TRANSCRIPT.GENE_ID.eq(geneId)).fetch();
for (final Record transcriptRecord : transcripts) {
Transcript transcript = buildTranscript(geneAnnotation, transcriptRecord, position, canonicalTranscriptId, geneStrand > 0);
if (transcript != null) {
geneAnnotation.addTranscript(transcript);
}
}
if (!geneAnnotation.transcripts().isEmpty()) {
result.add(geneAnnotation);
}
}
return result;
}
use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.
the class StructuralVariantClustering method loadFromEnrichedSVs.
public void loadFromEnrichedSVs(final String sampleId, final List<EnrichedStructuralVariant> variants) {
if (variants.isEmpty())
return;
clearState();
mSampleId = sampleId;
for (final EnrichedStructuralVariant enrichedSV : variants) {
mAllVariants.add(SvClusterData.from(enrichedSV));
}
}
use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.
the class StructuralVariantDAO method readEnrichedData.
@NotNull
List<EnrichedStructuralVariant> readEnrichedData(@NotNull final String sample) {
final List<EnrichedStructuralVariant> regions = Lists.newArrayList();
final Result<Record> result = context.select().from(STRUCTURALVARIANT).where(STRUCTURALVARIANT.SAMPLEID.eq(sample)).fetch();
for (Record record : result) {
final EnrichedStructuralVariantLeg start = ImmutableEnrichedStructuralVariantLeg.builder().chromosome(record.getValue(STRUCTURALVARIANT.STARTCHROMOSOME)).position(record.getValue(STRUCTURALVARIANT.STARTPOSITION)).orientation(record.getValue(STRUCTURALVARIANT.STARTORIENTATION)).homology(record.getValue(STRUCTURALVARIANT.STARTHOMOLOGYSEQUENCE)).alleleFrequency(record.getValue(STRUCTURALVARIANT.STARTAF)).adjustedAlleleFrequency(record.getValue(STRUCTURALVARIANT.ADJUSTEDSTARTAF)).adjustedCopyNumber(record.getValue(STRUCTURALVARIANT.ADJUSTEDSTARTCOPYNUMBER)).adjustedCopyNumberChange(record.getValue(STRUCTURALVARIANT.ADJUSTEDSTARTCOPYNUMBERCHANGE)).build();
final EnrichedStructuralVariantLeg end = ImmutableEnrichedStructuralVariantLeg.builder().chromosome(record.getValue(STRUCTURALVARIANT.ENDCHROMOSOME)).position(record.getValue(STRUCTURALVARIANT.ENDPOSITION)).orientation(record.getValue(STRUCTURALVARIANT.ENDORIENTATION)).homology(record.getValue(STRUCTURALVARIANT.ENDHOMOLOGYSEQUENCE)).alleleFrequency(record.getValue(STRUCTURALVARIANT.ENDAF)).adjustedAlleleFrequency(record.getValue(STRUCTURALVARIANT.ADJUSTEDENDAF)).adjustedCopyNumber(record.getValue(STRUCTURALVARIANT.ADJUSTEDENDCOPYNUMBER)).adjustedCopyNumberChange(record.getValue(STRUCTURALVARIANT.ADJUSTEDENDCOPYNUMBERCHANGE)).build();
final EnrichedStructuralVariant variant = ImmutableEnrichedStructuralVariant.builder().primaryKey(record.getValue(STRUCTURALVARIANT.ID)).id(record.getValue(STRUCTURALVARIANT.ID).toString()).start(start).end(end).insertSequence(record.getValue(STRUCTURALVARIANT.INSERTSEQUENCE)).type(StructuralVariantType.fromAttribute(record.getValue(STRUCTURALVARIANT.TYPE))).ploidy(record.getValue(STRUCTURALVARIANT.PLOIDY)).build();
regions.add(variant);
}
return regions;
}
use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.
the class StructuralVariantDAO method write.
void write(@NotNull final String sample, @NotNull final List<EnrichedStructuralVariant> variants) {
Timestamp timestamp = new Timestamp(new Date().getTime());
final Result<Record1<UInteger>> breakendsToDelete = context.select(STRUCTURALVARIANTBREAKEND.ID).from(STRUCTURALVARIANTBREAKEND).innerJoin(STRUCTURALVARIANT).on(STRUCTURALVARIANT.ID.eq(STRUCTURALVARIANTBREAKEND.STRUCTURALVARIANTID)).where(STRUCTURALVARIANT.SAMPLEID.eq(sample)).fetch();
// first delete annotations
context.delete(STRUCTURALVARIANTDISRUPTION).where(STRUCTURALVARIANTDISRUPTION.BREAKENDID.in(breakendsToDelete)).execute();
context.delete(STRUCTURALVARIANTFUSION).where(STRUCTURALVARIANTFUSION.FIVEPRIMEBREAKENDID.in(breakendsToDelete)).execute();
context.delete(STRUCTURALVARIANTBREAKEND).where(STRUCTURALVARIANTBREAKEND.ID.in(breakendsToDelete)).execute();
// and then the structural variants
context.delete(STRUCTURALVARIANT).where(STRUCTURALVARIANT.SAMPLEID.eq(sample)).execute();
for (List<EnrichedStructuralVariant> batch : Iterables.partition(variants, DB_BATCH_INSERT_SIZE)) {
InsertValuesStep21 inserter = context.insertInto(STRUCTURALVARIANT, STRUCTURALVARIANT.SAMPLEID, STRUCTURALVARIANT.STARTCHROMOSOME, STRUCTURALVARIANT.ENDCHROMOSOME, STRUCTURALVARIANT.STARTPOSITION, STRUCTURALVARIANT.ENDPOSITION, STRUCTURALVARIANT.STARTORIENTATION, STRUCTURALVARIANT.ENDORIENTATION, STRUCTURALVARIANT.STARTHOMOLOGYSEQUENCE, STRUCTURALVARIANT.ENDHOMOLOGYSEQUENCE, STRUCTURALVARIANT.INSERTSEQUENCE, STRUCTURALVARIANT.TYPE, STRUCTURALVARIANT.STARTAF, STRUCTURALVARIANT.ADJUSTEDSTARTAF, STRUCTURALVARIANT.ADJUSTEDSTARTCOPYNUMBER, STRUCTURALVARIANT.ADJUSTEDSTARTCOPYNUMBERCHANGE, STRUCTURALVARIANT.ENDAF, STRUCTURALVARIANT.ADJUSTEDENDAF, STRUCTURALVARIANT.ADJUSTEDENDCOPYNUMBER, STRUCTURALVARIANT.ADJUSTEDENDCOPYNUMBERCHANGE, STRUCTURALVARIANT.PLOIDY, STRUCTURALVARIANT.MODIFIED);
batch.forEach(entry -> addRecord(timestamp, inserter, sample, entry));
inserter.execute();
}
}
Aggregations