Search in sources :

Example 1 with AnnotationAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method processArrayDesign.

@Override
public void processArrayDesign(ArrayDesign arrayDesign, Taxon taxon, File source, ExternalDatabase sourceDB, boolean ncbiIds) throws IOException {
    if (taxon == null && !ncbiIds) {
        throw new IllegalArgumentException("You must provide a taxon unless passing ncbiIds = true");
    }
    if (arrayDesign.getTechnologyType().equals(TechnologyType.NONE)) {
        throw new IllegalArgumentException("Do not use this service to process platforms that do not use an probe-based technology.");
    }
    try (BufferedReader b = new BufferedReader(new FileReader(source))) {
        String line;
        int numSkipped = 0;
        ArrayDesignProbeMapperServiceImpl.log.info("Removing any old associations");
        arrayDesignService.deleteGeneProductAssociations(arrayDesign);
        while ((line = b.readLine()) != null) {
            if (StringUtils.isBlank(line)) {
                continue;
            }
            if (line.startsWith("#")) {
                continue;
            }
            String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
            if (fields.length != 3) {
                throw new IOException("Illegal format, expected three columns, got " + fields.length);
            }
            String probeId = fields[0];
            String seqName = fields[1];
            /*
                 * FIXME. We have to allow NCBI gene ids here.
                 */
            String geneSymbol = fields[2];
            if (StringUtils.isBlank(geneSymbol)) {
                numSkipped++;
                continue;
            }
            CompositeSequence c = compositeSequenceService.findByName(arrayDesign, probeId);
            if (c == null) {
                if (ArrayDesignProbeMapperServiceImpl.log.isDebugEnabled())
                    ArrayDesignProbeMapperServiceImpl.log.debug("No probe found for '" + probeId + "' on " + arrayDesign + ", skipping");
                numSkipped++;
                continue;
            }
            // a probe can have more than one gene associated with it if so they are piped |
            Collection<Gene> geneListProbe = new HashSet<>();
            // indicate multiple genes
            Gene geneDetails;
            StringTokenizer st = new StringTokenizer(geneSymbol, "|");
            while (st.hasMoreTokens()) {
                String geneToken = st.nextToken().trim();
                if (ncbiIds) {
                    geneDetails = geneService.findByNCBIId(Integer.parseInt(geneToken));
                } else {
                    geneDetails = geneService.findByOfficialSymbol(geneToken, taxon);
                }
                if (geneDetails != null) {
                    geneListProbe.add(geneDetails);
                }
            }
            if (geneListProbe.size() == 0) {
                ArrayDesignProbeMapperServiceImpl.log.warn("No gene(s) found for '" + geneSymbol + "' in " + taxon + ", skipping");
                numSkipped++;
                continue;
            } else if (geneListProbe.size() > 1) {
                // this is a common situation, when the geneSymbol actually has |-separated genes, so no need to
                // make a
                // lot of fuss.
                ArrayDesignProbeMapperServiceImpl.log.debug("More than one gene found for '" + geneSymbol + "' in " + taxon);
            }
            BioSequence bs = c.getBiologicalCharacteristic();
            if (bs != null) {
                if (StringUtils.isNotBlank(seqName)) {
                    bs = bioSequenceService.thaw(bs);
                    if (!bs.getName().equals(seqName)) {
                        ArrayDesignProbeMapperServiceImpl.log.warn("Sequence name '" + seqName + "' given for " + probeId + " does not match existing entry " + bs.getName() + ", skipping");
                        numSkipped++;
                        continue;
                    }
                }
            // otherwise we assume everything is okay.
            } else {
                // create one based on the text provided.
                if (StringUtils.isBlank(seqName)) {
                    ArrayDesignProbeMapperServiceImpl.log.warn("You must provide sequence names for probes which are not already mapped. probeName=" + probeId + " had no sequence associated and no name provided; skipping");
                    numSkipped++;
                    continue;
                }
                bs = BioSequence.Factory.newInstance();
                bs.setName(seqName);
                bs.setTaxon(taxon);
                bs.setDescription("Imported from annotation file");
                // Placeholder.
                bs.setType(SequenceType.OTHER);
                bs = bioSequenceService.create(bs);
                c.setBiologicalCharacteristic(bs);
                compositeSequenceService.update(c);
            }
            assert bs != null;
            assert bs.getId() != null;
            for (Gene gene : geneListProbe) {
                gene = geneService.thaw(gene);
                if (gene.getProducts().size() == 0) {
                    ArrayDesignProbeMapperServiceImpl.log.warn("There are no gene products for " + gene + ", it cannot be mapped to probes. Skipping");
                    numSkipped++;
                    continue;
                }
                for (GeneProduct gp : gene.getProducts()) {
                    AnnotationAssociation association = AnnotationAssociation.Factory.newInstance();
                    association.setBioSequence(bs);
                    association.setGeneProduct(gp);
                    association.setSource(sourceDB);
                    annotationAssociationService.create(association);
                }
            }
        }
        arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
        this.deleteOldFiles(arrayDesign);
        ArrayDesignProbeMapperServiceImpl.log.info("Completed association processing for " + arrayDesign + ", " + numSkipped + " were skipped");
    }
}
Also used : AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) IOException(java.io.IOException) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) StringTokenizer(java.util.StringTokenizer) Gene(ubic.gemma.model.genome.Gene) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) HashSet(java.util.HashSet)

Example 2 with AnnotationAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.

the class AnnotationAssociationDaoImpl method thaw.

@Override
public void thaw(final Collection<AnnotationAssociation> anCollection) {
    if (anCollection == null)
        return;
    HibernateTemplate template = this.getHibernateTemplate();
    template.executeWithNativeSession(new org.springframework.orm.hibernate3.HibernateCallback<Object>() {

        @Override
        public Object doInHibernate(org.hibernate.Session session) throws org.hibernate.HibernateException {
            for (Object object : anCollection) {
                AnnotationAssociation blatAssociation = (AnnotationAssociation) object;
                if (blatAssociation.getId() == null)
                    continue;
                AnnotationAssociationDaoImpl.this.thawAssociation(session, blatAssociation);
            }
            return null;
        }
    });
}
Also used : AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) HibernateTemplate(org.springframework.orm.hibernate3.HibernateTemplate)

Example 3 with AnnotationAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.

the class CompositeSequenceServiceImpl method getGeneMappingSummary.

@Override
public Collection<GeneMappingSummary> getGeneMappingSummary(CompositeSequence cs) {
    BioSequence biologicalCharacteristic = cs.getBiologicalCharacteristic();
    biologicalCharacteristic = bioSequenceService.thaw(biologicalCharacteristic);
    Map<Integer, GeneMappingSummary> results = new HashMap<>();
    if (biologicalCharacteristic == null || biologicalCharacteristic.getBioSequence2GeneProduct() == null) {
        return results.values();
    }
    Collection<BioSequence2GeneProduct> bs2gps = biologicalCharacteristic.getBioSequence2GeneProduct();
    for (BioSequence2GeneProduct bs2gp : bs2gps) {
        GeneProductValueObject geneProduct = new GeneProductValueObject(geneProductService.thaw(bs2gp.getGeneProduct()));
        GeneValueObject gene = new GeneValueObject(bs2gp.getGeneProduct().getGene());
        BlatResultValueObject blatResult = null;
        if ((bs2gp instanceof BlatAssociation)) {
            BlatAssociation blatAssociation = (BlatAssociation) bs2gp;
            blatResult = new BlatResultValueObject(blatResultService.thaw(blatAssociation.getBlatResult()));
        } else if (bs2gp instanceof AnnotationAssociation) {
            /*
                 * Make a dummy blat result
                 */
            blatResult = new BlatResultValueObject();
            blatResult.setQuerySequence(BioSequenceValueObject.fromEntity(biologicalCharacteristic));
            blatResult.setId(biologicalCharacteristic.getId());
        }
        if (blatResult == null) {
            continue;
        }
        if (results.containsKey(ProbeMapUtils.hashBlatResult(blatResult))) {
            results.get(ProbeMapUtils.hashBlatResult(blatResult)).addGene(geneProduct, gene);
        } else {
            GeneMappingSummary summary = new GeneMappingSummary();
            summary.addGene(geneProduct, gene);
            summary.setBlatResult(blatResult);
            summary.setCompositeSequence(this.loadValueObject(cs));
            results.put(ProbeMapUtils.hashBlatResult(blatResult), summary);
        }
    }
    this.addBlatResultsLackingGenes(cs, results);
    if (results.size() == 0) {
        // add a 'dummy' that at least contains the information about the CS. This is a bit of a hack...
        GeneMappingSummary summary = new GeneMappingSummary();
        summary.setCompositeSequence(this.loadValueObject(cs));
        BlatResultValueObject newInstance = new BlatResultValueObject(-1L);
        newInstance.setQuerySequence(BioSequenceValueObject.fromEntity(biologicalCharacteristic));
        summary.setBlatResult(newInstance);
        results.put(ProbeMapUtils.hashBlatResult(newInstance), summary);
    }
    return results.values();
}
Also used : GeneValueObject(ubic.gemma.model.genome.gene.GeneValueObject) AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) GeneProductValueObject(ubic.gemma.model.genome.gene.GeneProductValueObject) GeneMappingSummary(ubic.gemma.core.analysis.sequence.GeneMappingSummary) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BlatResultValueObject(ubic.gemma.model.genome.sequenceAnalysis.BlatResultValueObject) BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)

Example 4 with AnnotationAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.

the class ArrayDesignDaoImpl method deleteGeneProductAssociations.

@Override
public void deleteGeneProductAssociations(ArrayDesign arrayDesign) {
    this.getSessionFactory().getCurrentSession().buildLockRequest(LockOptions.UPGRADE).setLockMode(LockMode.PESSIMISTIC_WRITE).lock(arrayDesign);
    // this query is polymorphic, id gets the annotation associations?
    // language=HQL
    final String queryString = "select ba from CompositeSequence  cs " + "inner join cs.biologicalCharacteristic bs, BioSequence2GeneProduct ba " + "where ba.bioSequence = bs and cs.arrayDesign=:arrayDesign";
    List blatAssociations = this.getSessionFactory().getCurrentSession().createQuery(queryString).setParameter("arrayDesign", arrayDesign).list();
    if (!blatAssociations.isEmpty()) {
        for (Object r : blatAssociations) {
            this.getSessionFactory().getCurrentSession().delete(r);
        }
        AbstractDao.log.info("Done deleting " + blatAssociations.size() + " blat associations for " + arrayDesign);
    }
    this.getSessionFactory().getCurrentSession().flush();
    final String annotationAssociationQueryString = "select ba from CompositeSequence cs " + " inner join cs.biologicalCharacteristic bs, AnnotationAssociation ba " + " where ba.bioSequence = bs and cs.arrayDesign=:arrayDesign";
    // noinspection unchecked
    List<AnnotationAssociation> annotAssociations = this.getSessionFactory().getCurrentSession().createQuery(annotationAssociationQueryString).setParameter("arrayDesign", arrayDesign).list();
    if (!annotAssociations.isEmpty()) {
        for (AnnotationAssociation r : annotAssociations) {
            this.getSessionFactory().getCurrentSession().delete(r);
        }
        AbstractDao.log.info("Done deleting " + annotAssociations.size() + " AnnotationAssociations for " + arrayDesign);
    }
}
Also used : AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) ArrayDesignValueObject(ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject)

Example 5 with AnnotationAssociation

use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.

the class GeneProductServiceImpl method remove.

@Override
@Transactional
public void remove(Collection<GeneProduct> toRemove) {
    Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
    if (!associations.isEmpty()) {
        AbstractService.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
        this.blatAssociationDao.remove(associations);
    }
    Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
    if (!annotationAssociations.isEmpty()) {
        AbstractService.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
        this.annotationAssociationDao.remove(annotationAssociations);
    }
    // remove associations to database entries that are still associated with sequences.
    for (GeneProduct gp : toRemove) {
        gp = this.thaw(gp);
        Collection<DatabaseEntry> accessions = gp.getAccessions();
        Collection<DatabaseEntry> toRelease = new HashSet<>();
        for (DatabaseEntry de : accessions) {
            if (this.bioSequenceDao.findByAccession(de) != null) {
                toRelease.add(de);
            }
        }
        gp.getAccessions().removeAll(toRelease);
        this.geneProductDao.remove(gp);
    }
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

AnnotationAssociation (ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation)8 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)5 HashSet (java.util.HashSet)4 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)3 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)3 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)3 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)2 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)2 Gene (ubic.gemma.model.genome.Gene)2 BufferedReader (java.io.BufferedReader)1 FileReader (java.io.FileReader)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 StringTokenizer (java.util.StringTokenizer)1 Criteria (org.hibernate.Criteria)1 HibernateTemplate (org.springframework.orm.hibernate3.HibernateTemplate)1 Transactional (org.springframework.transaction.annotation.Transactional)1 GeneMappingSummary (ubic.gemma.core.analysis.sequence.GeneMappingSummary)1 ExternalDatabase (ubic.gemma.model.common.description.ExternalDatabase)1 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)1