use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class GenomePersister method handleGeneProductChangedGIs.
/**
* Check for deletions or changed GIs. If we have a GI that is not in the collection, then we might remove it from
* the system.
*
* @param usedGIs return toRemove
*/
private Collection<GeneProduct> handleGeneProductChangedGIs(Gene existingGene, Map<String, GeneProduct> usedGIs) {
Collection<String> switchedGis = new HashSet<>();
Collection<GeneProduct> toRemove = new HashSet<>();
for (GeneProduct existingGp : existingGene.getProducts()) {
if (StringUtils.isBlank(existingGp.getNcbiGi()) || usedGIs.containsKey(existingGp.getNcbiGi())) {
continue;
}
/*
* Check to make sure this isn't an updated GI situation (actually common, whenever a sequence is updated).
* That is, this gene product (already in the system) is actually a match for one of the imports: it's just
* that the GI of our version is no longer valid. There are two situations. In the simplest case, we just
* have to update the GI on our record. However, it might be that we _also_ have the one with the correct
* GI. If that happens there are three situations. First, if the other one is already associated with this
* gene, we should proceed with deleting the outdated copy and just keep the other one. Second, if the other
* one is not associated with any gene, we should remove that one and update the outdated record. Third, the
* other one might be associated with a _different_ gene, in which case we remove _that gp_ and update the
* outdated record attached to _this_ gene.
*/
boolean deleteIt = true;
for (GeneProduct ngp : usedGIs.values()) {
if (!existingGp.getName().equals(ngp.getName())) {
// identifiers when you don't have a GI, this is reasonable.
continue;
}
/*
* Check if this GI is already associated with some other gene.
*/
GeneProduct otherGpUsingThisGi = geneProductDao.findByNcbiId(ngp.getNcbiGi());
if (otherGpUsingThisGi == null) {
/*
* HOWEVER, if we ALREADY applied the same GI to some other product of the same gene, we have to
* remove the duplicate. This is due to cruft, we shouldn't have such duplicates.
*/
if (switchedGis.contains(ngp.getNcbiGi())) {
AbstractPersister.log.warn("Another gene product with the same intended GI will be deleted: " + existingGp);
deleteIt = true;
continue;
}
// ok
AbstractPersister.log.warn("Updating the GI for " + existingGp + " -> GI:" + ngp.getNcbiGi());
existingGp.setNcbiGi(ngp.getNcbiGi());
deleteIt = false;
switchedGis.add(ngp.getNcbiGi());
continue;
}
// handle less common cases, largely due to database cruft.
otherGpUsingThisGi = geneProductDao.thaw(otherGpUsingThisGi);
Gene oldGeneForExistingGeneProduct = otherGpUsingThisGi.getGene();
if (oldGeneForExistingGeneProduct == null) {
AbstractPersister.log.warn("Updating the GI for " + existingGp + " -> GI:" + ngp.getNcbiGi() + " and deleting orphan GP with same GI: " + otherGpUsingThisGi);
existingGp.setNcbiGi(ngp.getNcbiGi());
// remove the old one, which was an orphan already.
toRemove.add(otherGpUsingThisGi);
deleteIt = false;
} else if (oldGeneForExistingGeneProduct.equals(existingGene)) {
// this is the common case, for crufted database.
AbstractPersister.log.warn("Removing outdated gp for which there is already an existing copy: " + existingGp + " (already have " + otherGpUsingThisGi + ")");
deleteIt = true;
} else {
/*
* That GI is associated with another gene's products. In effect, switch it to this gene. This
* should not generally happen.
*/
AbstractPersister.log.warn("Removing gene product: " + otherGpUsingThisGi + " and effectively switching to " + existingGene + " -- detected during GI update checks ");
// Here we just remove its old association.
oldGeneForExistingGeneProduct = geneDao.thaw(oldGeneForExistingGeneProduct);
oldGeneForExistingGeneProduct.getProducts().remove(otherGpUsingThisGi);
geneDao.update(oldGeneForExistingGeneProduct);
// but we keep the one we have here.
existingGp.setNcbiGi(ngp.getNcbiGi());
deleteIt = false;
}
}
if (deleteIt) {
toRemove.add(existingGp);
// we are erasing this association as we assume it is no longer
existingGp.setGene(null);
// valid.
AbstractPersister.log.warn("Removing gene product from system: " + existingGp + ", it is no longer listed as a product of " + existingGene);
}
}
// finalize any deletions.
if (!toRemove.isEmpty()) {
existingGene.getProducts().removeAll(toRemove);
}
return toRemove;
}
use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class GenomePersister method persistOrUpdateGeneProduct.
private GeneProduct persistOrUpdateGeneProduct(GeneProduct geneProduct) {
if (geneProduct == null)
return null;
GeneProduct existing;
if (geneProduct.getId() != null) {
existing = geneProductDao.load(geneProduct.getId());
} else {
existing = geneProductDao.find(geneProduct);
}
if (existing == null) {
return this.persistGeneProduct(geneProduct);
}
this.updateGeneProduct(existing, geneProduct);
return existing;
}
use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class GenomePersister method persistGene.
private Gene persistGene(Gene gene, boolean checkFirst) {
if (gene == null)
return null;
if (!this.isTransient(gene))
return gene;
if (checkFirst) {
Gene existingGene = geneDao.find(gene);
if (existingGene != null) {
if (AbstractPersister.log.isDebugEnabled())
AbstractPersister.log.debug("Gene exists, will not update");
return existingGene;
}
}
if (gene.getAccessions().size() > 0) {
for (DatabaseEntry de : gene.getAccessions()) {
this.fillInDatabaseEntry(de);
}
}
Collection<GeneProduct> tempGeneProduct = gene.getProducts();
gene.setProducts(null);
gene.setTaxon(this.persistTaxon(gene.getTaxon()));
this.fillChromosomeLocationAssociations(gene.getPhysicalLocation(), gene.getTaxon());
if (AbstractPersister.log.isInfoEnabled())
AbstractPersister.log.info("New gene: " + gene);
gene = geneDao.create(gene);
Collection<GeneProduct> geneProductsForNewGene = new HashSet<>();
for (GeneProduct product : tempGeneProduct) {
GeneProduct existingProduct = geneProductDao.find(product);
if (existingProduct != null) {
/*
* A geneProduct is being moved to a gene that didn't exist in the system already
*/
Gene previousGeneForProduct = existingProduct.getGene();
previousGeneForProduct.getProducts().remove(existingProduct);
// we aren't going to make it, this isn't really necessary.
product.setGene(null);
existingProduct.setGene(gene);
geneProductsForNewGene.add(existingProduct);
AbstractPersister.log.warn("While creating new gene: Gene product: [New=" + product + "] is already associated with a gene [Old=" + existingProduct + "], will move to associate with new gene: " + gene);
} else {
product.setGene(gene);
geneProductsForNewGene.add(product);
}
}
// attach the products.
gene.setProducts(geneProductsForNewGene);
for (GeneProduct gp : gene.getProducts()) {
this.fillInGeneProductAssociations(gp);
}
try {
// we do a separate create because the cascade doesn't trigger auditing correctly - otherwise the
// products are not persistent until the session is flushed, later. There might be a better way around this,
// but so far as I know this is the only place this happens.
// noinspection unchecked
gene.setProducts(geneProductDao.create(gene.getProducts()));
geneDao.update(gene);
return gene;
} catch (Exception e) {
AbstractPersister.log.error("**** Error while creating gene: " + gene + "; products:");
for (GeneProduct gp : gene.getProducts()) {
System.err.println(gp);
}
throw new RuntimeException(e);
}
}
use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class GenomePersister method removeGeneProducts.
private void removeGeneProducts(Collection<GeneProduct> toRemove) {
Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
if (!associations.isEmpty()) {
AbstractPersister.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
this.blatAssociationDao.remove(associations);
}
Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
if (!annotationAssociations.isEmpty()) {
AbstractPersister.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
this.annotationAssociationDao.remove(annotationAssociations);
}
// remove associations to database entries that are still associated with sequences.
for (GeneProduct gp : toRemove) {
Collection<DatabaseEntry> accessions = gp.getAccessions();
Collection<DatabaseEntry> toRelease = new HashSet<>();
for (DatabaseEntry de : accessions) {
if (this.bioSequenceDao.findByAccession(de) != null) {
toRelease.add(de);
}
}
gp.getAccessions().removeAll(toRelease);
this.geneProductDao.remove(gp);
}
}
use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class BlatAssociationScorer method organizeBlatAssociationsByGeneProductAndInitializeScores.
/**
* Break results down by gene product, and throw out duplicates (only allow one result per gene product), fills in
* score and initializes specificity
*
* @param blatAssociations blat assocs
* @return map
*/
private static Map<GeneProduct, Collection<BlatAssociation>> organizeBlatAssociationsByGeneProductAndInitializeScores(Collection<BlatAssociation> blatAssociations) {
Map<GeneProduct, Collection<BlatAssociation>> geneProducts = new HashMap<>();
Collection<BioSequence> sequences = new HashSet<>();
for (BlatAssociation blatAssociation : blatAssociations) {
assert blatAssociation.getBioSequence() != null;
BlatAssociationScorer.computeScore(blatAssociation);
sequences.add(blatAssociation.getBioSequence());
if (sequences.size() > 1) {
throw new IllegalArgumentException("Blat associations must all be for the same query sequence");
}
assert blatAssociation.getGeneProduct() != null;
GeneProduct geneProduct = blatAssociation.getGeneProduct();
if (!geneProducts.containsKey(geneProduct)) {
geneProducts.put(geneProduct, new HashSet<BlatAssociation>());
}
geneProducts.get(geneProduct).add(blatAssociation);
// an initial value.
blatAssociation.setSpecificity(1.0);
}
return geneProducts;
}
Aggregations