Search in sources :

Example 1 with BioSequence2GeneProduct

use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.

the class ArrayDesignAnnotationServiceImpl method generateAnnotationFile.

@Override
public int generateAnnotationFile(Writer writer, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> genesWithSpecificity, OutputType ty) throws IOException {
    int compositeSequencesProcessed = 0;
    int simple = 0;
    int empty = 0;
    int complex = 0;
    // we used LinkedHasSets to keep everything in a predictable order - this is important for the gene symbols,
    // descriptions and NCBIIds (but not important for GO terms). When a probe maps to multiple genes, we list those
    // three items for the genes in the same order. There is a feature request to make
    // the order deterministic (i.e.,lexicographic sort), this could be done by using little gene objects or whatever.
    Collection<OntologyTerm> goTerms = new LinkedHashSet<>();
    Set<String> genes = new LinkedHashSet<>();
    Set<String> geneDescriptions = new LinkedHashSet<>();
    Set<String> geneIds = new LinkedHashSet<>();
    Set<String> ncbiIds = new LinkedHashSet<>();
    Map<Gene, Collection<VocabCharacteristic>> goMappings = this.getGOMappings(genesWithSpecificity);
    for (CompositeSequence cs : genesWithSpecificity.keySet()) {
        Collection<BioSequence2GeneProduct> geneclusters = genesWithSpecificity.get(cs);
        if (++compositeSequencesProcessed % 2000 == 0 && ArrayDesignAnnotationServiceImpl.log.isInfoEnabled()) {
            ArrayDesignAnnotationServiceImpl.log.info("Processed " + compositeSequencesProcessed + "/" + genesWithSpecificity.size() + " compositeSequences " + empty + " empty; " + simple + " simple; " + complex + " complex;");
        }
        if (geneclusters.isEmpty()) {
            this.writeAnnotationLine(writer, cs.getName(), "", "", null, "", "");
            empty++;
            continue;
        }
        if (geneclusters.size() == 1) {
            // common case, do it quickly.
            BioSequence2GeneProduct b2g = geneclusters.iterator().next();
            Gene g = b2g.getGeneProduct().getGene();
            goTerms = this.getGoTerms(goMappings.get(g), ty);
            String gemmaId = g.getId() == null ? "" : g.getId().toString();
            String ncbiId = g.getNcbiGeneId() == null ? "" : g.getNcbiGeneId().toString();
            this.writeAnnotationLine(writer, cs.getName(), g.getOfficialSymbol(), g.getOfficialName(), goTerms, gemmaId, ncbiId);
            simple++;
            continue;
        }
        goTerms.clear();
        genes.clear();
        geneDescriptions.clear();
        geneIds.clear();
        ncbiIds.clear();
        for (BioSequence2GeneProduct bioSequence2GeneProduct : geneclusters) {
            Gene g = bioSequence2GeneProduct.getGeneProduct().getGene();
            genes.add(g.getOfficialSymbol());
            geneDescriptions.add(g.getOfficialName());
            geneIds.add(g.getId().toString());
            Integer ncbiGeneId = g.getNcbiGeneId();
            if (ncbiGeneId != null) {
                ncbiIds.add(ncbiGeneId.toString());
            }
            goTerms.addAll(this.getGoTerms(goMappings.get(g), ty));
        }
        String geneString = StringUtils.join(genes, "|");
        String geneDescriptionString = StringUtils.join(geneDescriptions, "|");
        String geneIdsString = StringUtils.join(geneIds, "|");
        String ncbiIdsString = StringUtils.join(ncbiIds, "|");
        this.writeAnnotationLine(writer, cs.getName(), geneString, geneDescriptionString, goTerms, geneIdsString, ncbiIdsString);
        complex++;
    }
    writer.close();
    return compositeSequencesProcessed;
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) OntologyTerm(ubic.basecode.ontology.model.OntologyTerm) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) Gene(ubic.gemma.model.genome.Gene)

Example 2 with BioSequence2GeneProduct

use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.

the class GoldenPathSequenceAnalysis method findAssociations.

/**
 * Given a physical location, identify overlapping genes or predicted genes.
 *
 * @param chromosome The chromosome name (the organism is set by the constructor)
 * @param queryStart The start base of the region to query (the start of the alignment to the genome)
 * @param queryEnd The end base of the region to query (the end of the alignment to the genome)
 * @param starts Locations of alignment block starts in target. (comma-delimited from blat)
 * @param sizes Sizes of alignment blocks (comma-delimited from blat)
 * @param strand Either + or - indicating the strand to look on, or null to search both strands.
 * @param method The constant representing the method to use to locate the 3' distance.
 * @param config configuration
 * @return A list of BioSequence2GeneProduct objects. The distance stored by a ThreePrimeData will be 0 if the
 *         sequence overhangs the found genes (rather than providing a negative distance). If no genes are found,
 *         the result is null; These are transient instances, not from Gemma's database
 */
public Collection<BlatAssociation> findAssociations(String chromosome, Long queryStart, Long queryEnd, String starts, String sizes, String strand, ThreePrimeDistanceMethod method, ProbeMapperConfig config) {
    if (GoldenPath.log.isDebugEnabled())
        GoldenPath.log.debug("Seeking gene overlaps with: chrom=" + chromosome + " start=" + queryStart + " end=" + queryEnd + " strand=" + strand);
    if (queryEnd < queryStart)
        throw new IllegalArgumentException("End must not be less than start");
    /*
         * These are transient instances only
         */
    Collection<GeneProduct> geneProducts = new HashSet<>();
    if (config.isUseRefGene()) {
        // starting with refgene means we can get the correct transcript name etc.
        geneProducts.addAll(this.findRefGenesByLocation(chromosome, queryStart, queryEnd, strand));
    }
    if (config.isUseKnownGene()) {
        // get known genes as well, in case all we got was an intron. Currently does not work with rat (rn6)
        geneProducts.addAll(this.findKnownGenesByLocation(chromosome, queryStart, queryEnd, strand));
    }
    if (geneProducts.size() == 0)
        return null;
    Collection<BlatAssociation> results = new HashSet<>();
    for (GeneProduct geneProduct : geneProducts) {
        if (GoldenPath.log.isDebugEnabled())
            GoldenPath.log.debug(geneProduct);
        BlatAssociation blatAssociation = this.computeLocationInGene(chromosome, queryStart, queryEnd, starts, sizes, geneProduct, method, config);
        /*
             * We check against the actual threshold later. We can't fully check it now because not all the slots are
             * populated yet.
             */
        if (config.getMinimumExonOverlapFraction() > 0.0 && blatAssociation.getOverlap() == 0) {
            GoldenPath.log.debug("Result failed to meet exon overlap threshold (0)");
            continue;
        }
        results.add(blatAssociation);
    }
    return results;
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Example 3 with BioSequence2GeneProduct

use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.

the class CompositeSequenceDaoImpl method thaw.

@Override
public CompositeSequence thaw(CompositeSequence compositeSequence) {
    if (compositeSequence == null)
        return null;
    // noinspection unchecked
    Hibernate.initialize(compositeSequence.getBiologicalCharacteristic());
    if (compositeSequence.getBiologicalCharacteristic() != null) {
        Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getTaxon());
        if (compositeSequence.getBiologicalCharacteristic().getTaxon() != null) {
            Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getTaxon().getExternalDatabase());
            Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getTaxon().getParentTaxon());
            if (compositeSequence.getBiologicalCharacteristic().getTaxon().getParentTaxon() != null)
                Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getTaxon().getParentTaxon().getExternalDatabase());
        }
        Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry());
        if (compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry() != null) {
            Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry().getExternalDatabase());
        }
        Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getBioSequence2GeneProduct());
        for (BioSequence2GeneProduct bsgp : compositeSequence.getBiologicalCharacteristic().getBioSequence2GeneProduct()) {
            if (bsgp != null) {
                Hibernate.initialize(bsgp);
                if (bsgp.getGeneProduct() != null) {
                    Hibernate.initialize(bsgp.getGeneProduct());
                    Hibernate.initialize(bsgp.getGeneProduct().getGene());
                    if (bsgp.getGeneProduct().getGene() != null) {
                        Hibernate.initialize(bsgp.getGeneProduct().getGene().getAliases());
                        Hibernate.initialize(bsgp.getGeneProduct().getGene().getAccessions());
                    }
                }
            }
        }
    }
    Hibernate.initialize(compositeSequence.getArrayDesign());
    return compositeSequence;
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct)

Example 4 with BioSequence2GeneProduct

use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.

the class CompositeSequenceDaoImpl method thaw.

@Override
public void thaw(final Collection<CompositeSequence> compositeSequences) {
    HibernateTemplate templ = this.getHibernateTemplate();
    templ.executeWithNativeSession(new org.springframework.orm.hibernate3.HibernateCallback<Object>() {

        @Override
        public Object doInHibernate(org.hibernate.Session session) throws org.hibernate.HibernateException {
            int i = 0;
            int numToDo = compositeSequences.size();
            for (CompositeSequence cs : compositeSequences) {
                session.buildLockRequest(LockOptions.NONE).lock(cs);
                Hibernate.initialize(cs.getArrayDesign());
                BioSequence bs = cs.getBiologicalCharacteristic();
                if (bs == null) {
                    continue;
                }
                session.buildLockRequest(LockOptions.NONE).lock(bs);
                Hibernate.initialize(bs);
                Hibernate.initialize(bs.getTaxon());
                DatabaseEntry dbEntry = bs.getSequenceDatabaseEntry();
                if (dbEntry != null) {
                    Hibernate.initialize(dbEntry);
                    Hibernate.initialize(dbEntry.getExternalDatabase());
                    session.evict(dbEntry);
                    session.evict(dbEntry.getExternalDatabase());
                }
                if (bs.getBioSequence2GeneProduct() == null) {
                    continue;
                }
                for (BioSequence2GeneProduct bs2gp : bs.getBioSequence2GeneProduct()) {
                    if (bs2gp == null) {
                        continue;
                    }
                    GeneProduct geneProduct = bs2gp.getGeneProduct();
                    if (geneProduct != null && geneProduct.getGene() != null) {
                        Gene g = geneProduct.getGene();
                        g.getAliases().size();
                        session.evict(g);
                        session.evict(geneProduct);
                    }
                }
                if (++i % 2000 == 0) {
                    AbstractDao.log.info("Progress: " + i + "/" + numToDo + "...");
                    try {
                        Thread.sleep(10);
                    } catch (InterruptedException e) {
                    // 
                    }
                }
                session.evict(bs);
            }
            session.clear();
            return null;
        }
    });
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) HibernateTemplate(org.springframework.orm.hibernate3.HibernateTemplate) BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) org.hibernate(org.hibernate) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Gene(ubic.gemma.model.genome.Gene) CompositeSequenceValueObject(ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)

Example 5 with BioSequence2GeneProduct

use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.

the class CompositeSequenceDaoImpl method batchGetGenesWithSpecificity.

/**
 * @param batch   of composite sequences to process
 * @param results - adding to this
 */
private void batchGetGenesWithSpecificity(Collection<CompositeSequence> batch, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> results) {
    if (batch.size() == 0) {
        return;
    }
    // language=HQL
    final String queryString = "select cs,bas from CompositeSequence cs, BioSequence2GeneProduct bas inner join cs.biologicalCharacteristic bs " + "inner join fetch bas.geneProduct gp inner join fetch gp.gene gene " + "where bas.bioSequence=bs and cs in (:cs)";
    List<?> qr = this.getHibernateTemplate().findByNamedParam(queryString, "cs", batch);
    for (Object o : qr) {
        Object[] oa = (Object[]) o;
        CompositeSequence csa = (CompositeSequence) oa[0];
        BioSequence2GeneProduct ba = (BioSequence2GeneProduct) oa[1];
        if (ba instanceof BlatAssociation) {
            BlatResult blatResult = ((BlatAssociation) ba).getBlatResult();
            PhysicalLocation pl = blatResult.getTargetAlignedRegion();
            /*
                 * We didn't always used to fill in the targetAlignedRegion ... this is just in case.
                 */
            if (pl == null) {
                pl = PhysicalLocation.Factory.newInstance();
                pl.setChromosome(blatResult.getTargetChromosome());
                pl.setNucleotide(blatResult.getTargetStart());
                pl.setNucleotideLength(blatResult.getTargetEnd().intValue() - blatResult.getTargetStart().intValue());
                pl.setStrand(blatResult.getStrand());
            // Note: not bothering to fill in the bin.
            }
        }
        if (!results.containsKey(csa)) {
            results.put(csa, new HashSet<BioSequence2GeneProduct>());
        }
        results.get(csa).add(ba);
    }
    /*
         * This is kind of important. We ensure we return an empty map for probes that do not have a mapping.
         */
    for (CompositeSequence cs : batch) {
        if (!results.containsKey(cs)) {
            results.put(cs, new HashSet<BioSequence2GeneProduct>());
        }
    }
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) CompositeSequenceValueObject(ubic.gemma.model.expression.designElement.CompositeSequenceValueObject) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Aggregations

BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)11 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)6 Gene (ubic.gemma.model.genome.Gene)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)4 HashSet (java.util.HashSet)2 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)2 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)2 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)2 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)2 org.hibernate (org.hibernate)1 HibernateTemplate (org.springframework.orm.hibernate3.HibernateTemplate)1 OntologyTerm (ubic.basecode.ontology.model.OntologyTerm)1 GeneMappingSummary (ubic.gemma.core.analysis.sequence.GeneMappingSummary)1 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)1 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)1 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)1 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)1 GeneProductValueObject (ubic.gemma.model.genome.gene.GeneProductValueObject)1 GeneValueObject (ubic.gemma.model.genome.gene.GeneValueObject)1 AnnotationAssociation (ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation)1