Search in sources :

Example 16 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class GenomePersister method handleGeneProductChangedGIs.

/**
 * Check for deletions or changed GIs. If we have a GI that is not in the collection, then we might remove it from
 * the system.
 *
 * @param usedGIs return toRemove
 */
private Collection<GeneProduct> handleGeneProductChangedGIs(Gene existingGene, Map<String, GeneProduct> usedGIs) {
    Collection<String> switchedGis = new HashSet<>();
    Collection<GeneProduct> toRemove = new HashSet<>();
    for (GeneProduct existingGp : existingGene.getProducts()) {
        if (StringUtils.isBlank(existingGp.getNcbiGi()) || usedGIs.containsKey(existingGp.getNcbiGi())) {
            continue;
        }
        /*
             * Check to make sure this isn't an updated GI situation (actually common, whenever a sequence is updated).
             * That is, this gene product (already in the system) is actually a match for one of the imports: it's just
             * that the GI of our version is no longer valid. There are two situations. In the simplest case, we just
             * have to update the GI on our record. However, it might be that we _also_ have the one with the correct
             * GI. If that happens there are three situations. First, if the other one is already associated with this
             * gene, we should proceed with deleting the outdated copy and just keep the other one. Second, if the other
             * one is not associated with any gene, we should remove that one and update the outdated record. Third, the
             * other one might be associated with a _different_ gene, in which case we remove _that gp_ and update the
             * outdated record attached to _this_ gene.
             */
        boolean deleteIt = true;
        for (GeneProduct ngp : usedGIs.values()) {
            if (!existingGp.getName().equals(ngp.getName())) {
                // identifiers when you don't have a GI, this is reasonable.
                continue;
            }
            /*
                 * Check if this GI is already associated with some other gene.
                 */
            GeneProduct otherGpUsingThisGi = geneProductDao.findByNcbiId(ngp.getNcbiGi());
            if (otherGpUsingThisGi == null) {
                /*
                     * HOWEVER, if we ALREADY applied the same GI to some other product of the same gene, we have to
                     * remove the duplicate. This is due to cruft, we shouldn't have such duplicates.
                     */
                if (switchedGis.contains(ngp.getNcbiGi())) {
                    AbstractPersister.log.warn("Another gene product with the same intended GI will be deleted: " + existingGp);
                    deleteIt = true;
                    continue;
                }
                // ok
                AbstractPersister.log.warn("Updating the GI for " + existingGp + " -> GI:" + ngp.getNcbiGi());
                existingGp.setNcbiGi(ngp.getNcbiGi());
                deleteIt = false;
                switchedGis.add(ngp.getNcbiGi());
                continue;
            }
            // handle less common cases, largely due to database cruft.
            otherGpUsingThisGi = geneProductDao.thaw(otherGpUsingThisGi);
            Gene oldGeneForExistingGeneProduct = otherGpUsingThisGi.getGene();
            if (oldGeneForExistingGeneProduct == null) {
                AbstractPersister.log.warn("Updating the GI for " + existingGp + " -> GI:" + ngp.getNcbiGi() + " and deleting orphan GP with same GI: " + otherGpUsingThisGi);
                existingGp.setNcbiGi(ngp.getNcbiGi());
                // remove the old one, which was an orphan already.
                toRemove.add(otherGpUsingThisGi);
                deleteIt = false;
            } else if (oldGeneForExistingGeneProduct.equals(existingGene)) {
                // this is the common case, for crufted database.
                AbstractPersister.log.warn("Removing outdated gp for which there is already an existing copy: " + existingGp + " (already have " + otherGpUsingThisGi + ")");
                deleteIt = true;
            } else {
                /*
                     * That GI is associated with another gene's products. In effect, switch it to this gene. This
                     * should not generally happen.
                     */
                AbstractPersister.log.warn("Removing gene product: " + otherGpUsingThisGi + " and effectively switching to " + existingGene + " -- detected during GI update checks ");
                // Here we just remove its old association.
                oldGeneForExistingGeneProduct = geneDao.thaw(oldGeneForExistingGeneProduct);
                oldGeneForExistingGeneProduct.getProducts().remove(otherGpUsingThisGi);
                geneDao.update(oldGeneForExistingGeneProduct);
                // but we keep the one we have here.
                existingGp.setNcbiGi(ngp.getNcbiGi());
                deleteIt = false;
            }
        }
        if (deleteIt) {
            toRemove.add(existingGp);
            // we are erasing this association as we assume it is no longer
            existingGp.setGene(null);
            // valid.
            AbstractPersister.log.warn("Removing gene product from system: " + existingGp + ", it is no longer listed as a product of " + existingGene);
        }
    }
    // finalize any deletions.
    if (!toRemove.isEmpty()) {
        existingGene.getProducts().removeAll(toRemove);
    }
    return toRemove;
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) HashSet(java.util.HashSet)

Example 17 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class GenomePersister method persistOrUpdateGeneProduct.

private GeneProduct persistOrUpdateGeneProduct(GeneProduct geneProduct) {
    if (geneProduct == null)
        return null;
    GeneProduct existing;
    if (geneProduct.getId() != null) {
        existing = geneProductDao.load(geneProduct.getId());
    } else {
        existing = geneProductDao.find(geneProduct);
    }
    if (existing == null) {
        return this.persistGeneProduct(geneProduct);
    }
    this.updateGeneProduct(existing, geneProduct);
    return existing;
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct)

Example 18 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class GenomePersister method persistGene.

private Gene persistGene(Gene gene, boolean checkFirst) {
    if (gene == null)
        return null;
    if (!this.isTransient(gene))
        return gene;
    if (checkFirst) {
        Gene existingGene = geneDao.find(gene);
        if (existingGene != null) {
            if (AbstractPersister.log.isDebugEnabled())
                AbstractPersister.log.debug("Gene exists, will not update");
            return existingGene;
        }
    }
    if (gene.getAccessions().size() > 0) {
        for (DatabaseEntry de : gene.getAccessions()) {
            this.fillInDatabaseEntry(de);
        }
    }
    Collection<GeneProduct> tempGeneProduct = gene.getProducts();
    gene.setProducts(null);
    gene.setTaxon(this.persistTaxon(gene.getTaxon()));
    this.fillChromosomeLocationAssociations(gene.getPhysicalLocation(), gene.getTaxon());
    if (AbstractPersister.log.isInfoEnabled())
        AbstractPersister.log.info("New gene: " + gene);
    gene = geneDao.create(gene);
    Collection<GeneProduct> geneProductsForNewGene = new HashSet<>();
    for (GeneProduct product : tempGeneProduct) {
        GeneProduct existingProduct = geneProductDao.find(product);
        if (existingProduct != null) {
            /*
                 * A geneProduct is being moved to a gene that didn't exist in the system already
                 */
            Gene previousGeneForProduct = existingProduct.getGene();
            previousGeneForProduct.getProducts().remove(existingProduct);
            // we aren't going to make it, this isn't really necessary.
            product.setGene(null);
            existingProduct.setGene(gene);
            geneProductsForNewGene.add(existingProduct);
            AbstractPersister.log.warn("While creating new gene: Gene product: [New=" + product + "] is already associated with a gene [Old=" + existingProduct + "], will move to associate with new gene: " + gene);
        } else {
            product.setGene(gene);
            geneProductsForNewGene.add(product);
        }
    }
    // attach the products.
    gene.setProducts(geneProductsForNewGene);
    for (GeneProduct gp : gene.getProducts()) {
        this.fillInGeneProductAssociations(gp);
    }
    try {
        // we do a separate create because the cascade doesn't trigger auditing correctly - otherwise the
        // products are not persistent until the session is flushed, later. There might be a better way around this,
        // but so far as I know this is the only place this happens.
        // noinspection unchecked
        gene.setProducts(geneProductDao.create(gene.getProducts()));
        geneDao.update(gene);
        return gene;
    } catch (Exception e) {
        AbstractPersister.log.error("**** Error while creating gene: " + gene + "; products:");
        for (GeneProduct gp : gene.getProducts()) {
            System.err.println(gp);
        }
        throw new RuntimeException(e);
    }
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) HashSet(java.util.HashSet)

Example 19 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class GenomePersister method removeGeneProducts.

private void removeGeneProducts(Collection<GeneProduct> toRemove) {
    Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
    if (!associations.isEmpty()) {
        AbstractPersister.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
        this.blatAssociationDao.remove(associations);
    }
    Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
    if (!annotationAssociations.isEmpty()) {
        AbstractPersister.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
        this.annotationAssociationDao.remove(annotationAssociations);
    }
    // remove associations to database entries that are still associated with sequences.
    for (GeneProduct gp : toRemove) {
        Collection<DatabaseEntry> accessions = gp.getAccessions();
        Collection<DatabaseEntry> toRelease = new HashSet<>();
        for (DatabaseEntry de : accessions) {
            if (this.bioSequenceDao.findByAccession(de) != null) {
                toRelease.add(de);
            }
        }
        gp.getAccessions().removeAll(toRelease);
        this.geneProductDao.remove(gp);
    }
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Example 20 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class BlatAssociationScorer method organizeBlatAssociationsByGeneProductAndInitializeScores.

/**
 * Break results down by gene product, and throw out duplicates (only allow one result per gene product), fills in
 * score and initializes specificity
 *
 * @param blatAssociations blat assocs
 * @return map
 */
private static Map<GeneProduct, Collection<BlatAssociation>> organizeBlatAssociationsByGeneProductAndInitializeScores(Collection<BlatAssociation> blatAssociations) {
    Map<GeneProduct, Collection<BlatAssociation>> geneProducts = new HashMap<>();
    Collection<BioSequence> sequences = new HashSet<>();
    for (BlatAssociation blatAssociation : blatAssociations) {
        assert blatAssociation.getBioSequence() != null;
        BlatAssociationScorer.computeScore(blatAssociation);
        sequences.add(blatAssociation.getBioSequence());
        if (sequences.size() > 1) {
            throw new IllegalArgumentException("Blat associations must all be for the same query sequence");
        }
        assert blatAssociation.getGeneProduct() != null;
        GeneProduct geneProduct = blatAssociation.getGeneProduct();
        if (!geneProducts.containsKey(geneProduct)) {
            geneProducts.put(geneProduct, new HashSet<BlatAssociation>());
        }
        geneProducts.get(geneProduct).add(blatAssociation);
        // an initial value.
        blatAssociation.setSpecificity(1.0);
    }
    return geneProducts;
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) HashMap(java.util.HashMap) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Collection(java.util.Collection) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Aggregations

GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)41 Gene (ubic.gemma.model.genome.Gene)20 HashSet (java.util.HashSet)16 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)12 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)8 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)8 Test (org.junit.Test)6 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)5 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)5 AnnotationAssociation (ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation)5 HashMap (java.util.HashMap)4 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)4 Criteria (org.hibernate.Criteria)3 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 GeneProductValueObject (ubic.gemma.model.genome.gene.GeneProductValueObject)2 BufferedReader (java.io.BufferedReader)1 FileReader (java.io.FileReader)1