Search in sources :

Example 1 with EnrichedStructuralVariant

use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.

the class LoadStructuralVariants method main.

public static void main(@NotNull final String[] args) throws ParseException, IOException, SQLException {
    final Options options = createBasicOptions();
    final CommandLine cmd = createCommandLine(args, options);
    boolean loadFromDB = cmd.hasOption(LOAD_FROM_DB);
    final String tumorSample = cmd.getOptionValue(SAMPLE);
    boolean runClustering = cmd.hasOption(CLUSTER_SVS);
    boolean createFilteredPON = cmd.hasOption(WRITE_FILTERED_SVS);
    boolean reannotateFromVCFs = cmd.hasOption(REANNOTATE_FROM_VCFS);
    final DatabaseAccess dbAccess = cmd.hasOption(DB_URL) ? databaseAccess(cmd) : null;
    if (cmd.hasOption(LOG_DEBUG)) {
        Configurator.setRootLevel(Level.DEBUG);
    }
    if (createFilteredPON) {
        LOGGER.info("reading VCF files including filtered SVs");
        FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        filteredSvWriter.processVcfFiles();
        LOGGER.info("reads complete");
        return;
    }
    if (reannotateFromVCFs) {
        LOGGER.info("reading VCF files to re-annotate");
        // for now just re-read the VCFs and write out new annotations to file
        // may later on turn into update SQL once clustering does the same
        SvVCFAnnotator vcfAnnotator = new SvVCFAnnotator(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        vcfAnnotator.processVcfFiles();
        return;
    }
    StructuralVariantClustering svClusterer = null;
    if (runClustering) {
        LOGGER.info("will run clustering logic");
        SvClusteringConfig clusteringConfig = new SvClusteringConfig();
        clusteringConfig.setOutputCsvPath(cmd.getOptionValue(DATA_OUTPUT_PATH));
        clusteringConfig.setBaseDistance(Integer.parseInt(cmd.getOptionValue(CLUSTER_BASE_DISTANCE, "0")));
        clusteringConfig.setUseCombinedOutputFile(tumorSample.equals("*"));
        clusteringConfig.setSvPONFile(cmd.getOptionValue(SV_PON_FILE, ""));
        clusteringConfig.setFragileSiteFile(cmd.getOptionValue(FRAGILE_SITE_FILE, ""));
        clusteringConfig.setLineElementFile(cmd.getOptionValue(LINE_ELEMENT_FILE, ""));
        clusteringConfig.setExternalAnnotationsFile(cmd.getOptionValue(EXTERNAL_ANNOTATIONS, ""));
        svClusterer = new StructuralVariantClustering(clusteringConfig);
    }
    if (createFilteredPON) {
        LOGGER.info("reading VCF file including filtered SVs");
        FilteredSVWriter filteredSvWriter = new FilteredSVWriter(cmd.getOptionValue(VCF_FILE), cmd.getOptionValue(DATA_OUTPUT_PATH));
        filteredSvWriter.processVcfFiles();
        LOGGER.info("reads complete");
        return;
    }
    if (!loadFromDB) {
        boolean skipAnnotations = cmd.hasOption(SKIP_ANNOTATIONS);
        LOGGER.info("reading VCF File");
        final List<StructuralVariant> variants = readFromVcf(cmd.getOptionValue(VCF_FILE), true);
        LOGGER.info("enriching structural variants based on purple data");
        final List<EnrichedStructuralVariant> enrichedVariantWithoutPrimaryId = enrichStructuralVariants(variants, dbAccess, tumorSample);
        LOGGER.info("persisting variants to database");
        dbAccess.writeStructuralVariants(tumorSample, enrichedVariantWithoutPrimaryId);
        // NEVA: We read after we write to populate the primaryId field
        final List<EnrichedStructuralVariant> enrichedVariants = dbAccess.readStructuralVariants(tumorSample);
        LOGGER.info("initialising MqSql annotator");
        final VariantAnnotator annotator = MySQLAnnotator.make("jdbc:" + cmd.getOptionValue(ENSEMBL_DB));
        LOGGER.info("loading Cosmic Fusion data");
        final CosmicFusionModel cosmicGeneFusions = CosmicFusions.readFromCSV(cmd.getOptionValue(FUSION_CSV));
        final StructuralVariantAnalyzer analyzer = new StructuralVariantAnalyzer(annotator, HmfGenePanelSupplier.hmfPanelGeneList(), cosmicGeneFusions);
        LOGGER.info("analyzing structural variants for impact via disruptions and fusions");
        final StructuralVariantAnalysis analysis = analyzer.run(enrichedVariants, skipAnnotations);
        if (runClustering) {
            svClusterer.loadFromEnrichedSVs(tumorSample, enrichedVariants);
            svClusterer.runClustering();
        }
        LOGGER.info("persisting annotations to database");
        final StructuralVariantAnnotationDAO annotationDAO = new StructuralVariantAnnotationDAO(dbAccess.context());
        annotationDAO.write(analysis);
    } else {
        // KODU: Below assert feels somewhat risky!?
        assert runClustering;
        List<String> samplesList = Lists.newArrayList();
        if (tumorSample.isEmpty() || tumorSample.equals("*")) {
            samplesList = getStructuralVariantSamplesList(dbAccess);
        } else if (tumorSample.contains(",")) {
            String[] tumorList = tumorSample.split(",");
            samplesList = Arrays.stream(tumorList).collect(Collectors.toList());
        } else {
            samplesList.add(tumorSample);
        }
        int count = 0;
        for (final String sample : samplesList) {
            ++count;
            LOGGER.info("clustering for sample({}), total({})", sample, count);
            List<SvClusterData> svClusterData = queryStructuralVariantData(dbAccess, sample);
            svClusterer.loadFromDatabase(sample, svClusterData);
            // LOGGER.info("data loaded", sample, count);
            svClusterer.runClustering();
        // LOGGER.info("clustering complete", sample, count);
        // if(count > 10)
        // break;
        }
    }
    svClusterer.close();
    LOGGER.info("run complete");
}
Also used : Options(org.apache.commons.cli.Options) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) StructuralVariantAnalysis(com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis) StructuralVariantClustering(com.hartwig.hmftools.svannotation.analysis.StructuralVariantClustering) StructuralVariantAnalyzer(com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalyzer) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) StructuralVariantAnnotationDAO(com.hartwig.hmftools.svannotation.dao.StructuralVariantAnnotationDAO) CommandLine(org.apache.commons.cli.CommandLine) CosmicFusionModel(com.hartwig.hmftools.common.cosmic.fusions.CosmicFusionModel) DatabaseAccess(com.hartwig.hmftools.patientdb.dao.DatabaseAccess) SvClusterData(com.hartwig.hmftools.svannotation.analysis.SvClusterData) SvClusteringConfig(com.hartwig.hmftools.svannotation.analysis.SvClusteringConfig)

Example 2 with EnrichedStructuralVariant

use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.

the class MySQLAnnotator method annotateBreakend.

@NotNull
private List<GeneAnnotation> annotateBreakend(@NotNull EnrichedStructuralVariant variant, final boolean isStart, @NotNull String chromosome, final long position) {
    final List<GeneAnnotation> result = Lists.newArrayList();
    final Result<?> genes = queryGenesOnChromosomeAndPosition(chromosome, position);
    for (final Record gene : genes) {
        final UInteger geneId = gene.get(GENE.GENE_ID);
        final String geneName = gene.get(XREF.DISPLAY_LABEL);
        final String geneStableId = gene.get(GENE.STABLE_ID);
        final UInteger canonicalTranscriptId = gene.get(GENE.CANONICAL_TRANSCRIPT_ID);
        final int geneStrand = gene.get(GENE.SEQ_REGION_STRAND);
        final List<Integer> entrezIds = Arrays.stream(gene.get(ENTREZ_IDS, String.class).split(",")).map(Integer::parseInt).collect(Collectors.toList());
        final String karyotypeBand = gene.get(KARYOTYPE_BAND, String.class);
        final List<String> synonyms = context.select(XREF.DBPRIMARY_ACC).from(XREF).innerJoin(OBJECT_XREF).on(OBJECT_XREF.XREF_ID.eq(XREF.XREF_ID)).and(OBJECT_XREF.ENSEMBL_ID.eq(geneId)).and(OBJECT_XREF.ENSEMBL_OBJECT_TYPE.eq(ObjectXrefEnsemblObjectType.Gene)).fetch().stream().map(r -> r.get(XREF.DBPRIMARY_ACC)).collect(Collectors.toList());
        final GeneAnnotation geneAnnotation = new GeneAnnotation(variant, isStart, geneName, geneStableId, geneStrand, synonyms, entrezIds, karyotypeBand);
        final Result<?> transcripts = context.select(TRANSCRIPT.TRANSCRIPT_ID, TRANSCRIPT.STABLE_ID).from(TRANSCRIPT).where(TRANSCRIPT.GENE_ID.eq(geneId)).fetch();
        for (final Record transcriptRecord : transcripts) {
            Transcript transcript = buildTranscript(geneAnnotation, transcriptRecord, position, canonicalTranscriptId, geneStrand > 0);
            if (transcript != null) {
                geneAnnotation.addTranscript(transcript);
            }
        }
        if (!geneAnnotation.transcripts().isEmpty()) {
            result.add(geneAnnotation);
        }
    }
    return result;
}
Also used : UInteger(org.jooq.types.UInteger) EXON_TRANSCRIPT(org.ensembl.database.homo_sapiens_core.Tables.EXON_TRANSCRIPT) Arrays(java.util.Arrays) Connection(java.sql.Connection) DSL(org.jooq.impl.DSL) Xref(org.ensembl.database.homo_sapiens_core.tables.Xref) StructuralVariantAnnotation(com.hartwig.hmftools.svannotation.annotations.StructuralVariantAnnotation) SEQ_REGION(org.ensembl.database.homo_sapiens_core.Tables.SEQ_REGION) DSL.decode(org.jooq.impl.DSL.decode) ObjectXrefEnsemblObjectType(org.ensembl.database.homo_sapiens_core.enums.ObjectXrefEnsemblObjectType) TRANSCRIPT(org.ensembl.database.homo_sapiens_core.Tables.TRANSCRIPT) Condition(org.jooq.Condition) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) KARYOTYPE(org.ensembl.database.homo_sapiens_core.Tables.KARYOTYPE) UInteger(org.jooq.types.UInteger) GeneStatus(org.ensembl.database.homo_sapiens_core.enums.GeneStatus) DSLContext(org.jooq.DSLContext) SQLDialect(org.jooq.SQLDialect) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) Record(org.jooq.Record) GeneAnnotation(com.hartwig.hmftools.svannotation.annotations.GeneAnnotation) OBJECT_XREF(org.ensembl.database.homo_sapiens_core.Tables.OBJECT_XREF) Result(org.jooq.Result) Collectors(java.util.stream.Collectors) Transcript(com.hartwig.hmftools.svannotation.annotations.Transcript) XREF(org.ensembl.database.homo_sapiens_core.Tables.XREF) COORD_SYSTEM(org.ensembl.database.homo_sapiens_core.Tables.COORD_SYSTEM) Nullable(org.jetbrains.annotations.Nullable) EXON(org.ensembl.database.homo_sapiens_core.Tables.EXON) List(java.util.List) DSL.groupConcatDistinct(org.jooq.impl.DSL.groupConcatDistinct) GENE(org.ensembl.database.homo_sapiens_core.Tables.GENE) NotNull(org.jetbrains.annotations.NotNull) DriverManager(java.sql.DriverManager) GeneAnnotation(com.hartwig.hmftools.svannotation.annotations.GeneAnnotation) Transcript(com.hartwig.hmftools.svannotation.annotations.Transcript) UInteger(org.jooq.types.UInteger) Record(org.jooq.Record) NotNull(org.jetbrains.annotations.NotNull)

Example 3 with EnrichedStructuralVariant

use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.

the class StructuralVariantClustering method loadFromEnrichedSVs.

public void loadFromEnrichedSVs(final String sampleId, final List<EnrichedStructuralVariant> variants) {
    if (variants.isEmpty())
        return;
    clearState();
    mSampleId = sampleId;
    for (final EnrichedStructuralVariant enrichedSV : variants) {
        mAllVariants.add(SvClusterData.from(enrichedSV));
    }
}
Also used : EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant)

Example 4 with EnrichedStructuralVariant

use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.

the class StructuralVariantDAO method readEnrichedData.

@NotNull
List<EnrichedStructuralVariant> readEnrichedData(@NotNull final String sample) {
    final List<EnrichedStructuralVariant> regions = Lists.newArrayList();
    final Result<Record> result = context.select().from(STRUCTURALVARIANT).where(STRUCTURALVARIANT.SAMPLEID.eq(sample)).fetch();
    for (Record record : result) {
        final EnrichedStructuralVariantLeg start = ImmutableEnrichedStructuralVariantLeg.builder().chromosome(record.getValue(STRUCTURALVARIANT.STARTCHROMOSOME)).position(record.getValue(STRUCTURALVARIANT.STARTPOSITION)).orientation(record.getValue(STRUCTURALVARIANT.STARTORIENTATION)).homology(record.getValue(STRUCTURALVARIANT.STARTHOMOLOGYSEQUENCE)).alleleFrequency(record.getValue(STRUCTURALVARIANT.STARTAF)).adjustedAlleleFrequency(record.getValue(STRUCTURALVARIANT.ADJUSTEDSTARTAF)).adjustedCopyNumber(record.getValue(STRUCTURALVARIANT.ADJUSTEDSTARTCOPYNUMBER)).adjustedCopyNumberChange(record.getValue(STRUCTURALVARIANT.ADJUSTEDSTARTCOPYNUMBERCHANGE)).build();
        final EnrichedStructuralVariantLeg end = ImmutableEnrichedStructuralVariantLeg.builder().chromosome(record.getValue(STRUCTURALVARIANT.ENDCHROMOSOME)).position(record.getValue(STRUCTURALVARIANT.ENDPOSITION)).orientation(record.getValue(STRUCTURALVARIANT.ENDORIENTATION)).homology(record.getValue(STRUCTURALVARIANT.ENDHOMOLOGYSEQUENCE)).alleleFrequency(record.getValue(STRUCTURALVARIANT.ENDAF)).adjustedAlleleFrequency(record.getValue(STRUCTURALVARIANT.ADJUSTEDENDAF)).adjustedCopyNumber(record.getValue(STRUCTURALVARIANT.ADJUSTEDENDCOPYNUMBER)).adjustedCopyNumberChange(record.getValue(STRUCTURALVARIANT.ADJUSTEDENDCOPYNUMBERCHANGE)).build();
        final EnrichedStructuralVariant variant = ImmutableEnrichedStructuralVariant.builder().primaryKey(record.getValue(STRUCTURALVARIANT.ID)).id(record.getValue(STRUCTURALVARIANT.ID).toString()).start(start).end(end).insertSequence(record.getValue(STRUCTURALVARIANT.INSERTSEQUENCE)).type(StructuralVariantType.fromAttribute(record.getValue(STRUCTURALVARIANT.TYPE))).ploidy(record.getValue(STRUCTURALVARIANT.PLOIDY)).build();
        regions.add(variant);
    }
    return regions;
}
Also used : ImmutableEnrichedStructuralVariantLeg(com.hartwig.hmftools.common.variant.structural.ImmutableEnrichedStructuralVariantLeg) EnrichedStructuralVariantLeg(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariantLeg) EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) ImmutableEnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.ImmutableEnrichedStructuralVariant) Record(org.jooq.Record) NotNull(org.jetbrains.annotations.NotNull)

Example 5 with EnrichedStructuralVariant

use of com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant in project hmftools by hartwigmedical.

the class StructuralVariantDAO method write.

void write(@NotNull final String sample, @NotNull final List<EnrichedStructuralVariant> variants) {
    Timestamp timestamp = new Timestamp(new Date().getTime());
    final Result<Record1<UInteger>> breakendsToDelete = context.select(STRUCTURALVARIANTBREAKEND.ID).from(STRUCTURALVARIANTBREAKEND).innerJoin(STRUCTURALVARIANT).on(STRUCTURALVARIANT.ID.eq(STRUCTURALVARIANTBREAKEND.STRUCTURALVARIANTID)).where(STRUCTURALVARIANT.SAMPLEID.eq(sample)).fetch();
    // first delete annotations
    context.delete(STRUCTURALVARIANTDISRUPTION).where(STRUCTURALVARIANTDISRUPTION.BREAKENDID.in(breakendsToDelete)).execute();
    context.delete(STRUCTURALVARIANTFUSION).where(STRUCTURALVARIANTFUSION.FIVEPRIMEBREAKENDID.in(breakendsToDelete)).execute();
    context.delete(STRUCTURALVARIANTBREAKEND).where(STRUCTURALVARIANTBREAKEND.ID.in(breakendsToDelete)).execute();
    // and then the structural variants
    context.delete(STRUCTURALVARIANT).where(STRUCTURALVARIANT.SAMPLEID.eq(sample)).execute();
    for (List<EnrichedStructuralVariant> batch : Iterables.partition(variants, DB_BATCH_INSERT_SIZE)) {
        InsertValuesStep21 inserter = context.insertInto(STRUCTURALVARIANT, STRUCTURALVARIANT.SAMPLEID, STRUCTURALVARIANT.STARTCHROMOSOME, STRUCTURALVARIANT.ENDCHROMOSOME, STRUCTURALVARIANT.STARTPOSITION, STRUCTURALVARIANT.ENDPOSITION, STRUCTURALVARIANT.STARTORIENTATION, STRUCTURALVARIANT.ENDORIENTATION, STRUCTURALVARIANT.STARTHOMOLOGYSEQUENCE, STRUCTURALVARIANT.ENDHOMOLOGYSEQUENCE, STRUCTURALVARIANT.INSERTSEQUENCE, STRUCTURALVARIANT.TYPE, STRUCTURALVARIANT.STARTAF, STRUCTURALVARIANT.ADJUSTEDSTARTAF, STRUCTURALVARIANT.ADJUSTEDSTARTCOPYNUMBER, STRUCTURALVARIANT.ADJUSTEDSTARTCOPYNUMBERCHANGE, STRUCTURALVARIANT.ENDAF, STRUCTURALVARIANT.ADJUSTEDENDAF, STRUCTURALVARIANT.ADJUSTEDENDCOPYNUMBER, STRUCTURALVARIANT.ADJUSTEDENDCOPYNUMBERCHANGE, STRUCTURALVARIANT.PLOIDY, STRUCTURALVARIANT.MODIFIED);
        batch.forEach(entry -> addRecord(timestamp, inserter, sample, entry));
        inserter.execute();
    }
}
Also used : EnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant) ImmutableEnrichedStructuralVariant(com.hartwig.hmftools.common.variant.structural.ImmutableEnrichedStructuralVariant) InsertValuesStep21(org.jooq.InsertValuesStep21) Timestamp(java.sql.Timestamp) Date(java.util.Date) Record1(org.jooq.Record1)

Aggregations

EnrichedStructuralVariant (com.hartwig.hmftools.common.variant.structural.EnrichedStructuralVariant)6 ImmutableEnrichedStructuralVariant (com.hartwig.hmftools.common.variant.structural.ImmutableEnrichedStructuralVariant)2 StructuralVariant (com.hartwig.hmftools.common.variant.structural.StructuralVariant)2 StructuralVariantAnalysis (com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalysis)2 StructuralVariantAnalyzer (com.hartwig.hmftools.svannotation.analysis.StructuralVariantAnalyzer)2 Transcript (com.hartwig.hmftools.svannotation.annotations.Transcript)2 List (java.util.List)2 NotNull (org.jetbrains.annotations.NotNull)2 Record (org.jooq.Record)2 Lists (com.google.common.collect.Lists)1 ProductionRunContextFactory (com.hartwig.hmftools.common.context.ProductionRunContextFactory)1 RunContext (com.hartwig.hmftools.common.context.RunContext)1 CosmicFusionModel (com.hartwig.hmftools.common.cosmic.fusions.CosmicFusionModel)1 TumorLocationDoidMapping (com.hartwig.hmftools.common.ecrf.doid.TumorLocationDoidMapping)1 GeneCopyNumber (com.hartwig.hmftools.common.gene.GeneCopyNumber)1 Lims (com.hartwig.hmftools.common.lims.Lims)1 PurpleCopyNumber (com.hartwig.hmftools.common.purple.copynumber.PurpleCopyNumber)1 FittedPurity (com.hartwig.hmftools.common.purple.purity.FittedPurity)1 FittedPurityScore (com.hartwig.hmftools.common.purple.purity.FittedPurityScore)1 FittedPurityStatus (com.hartwig.hmftools.common.purple.purity.FittedPurityStatus)1