Examples with BioSequence - ubic.gemma.model.genome.biosequence.BioSequence

Example 56 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class BioSequenceDaoImpl method find.

@SuppressWarnings("unchecked")
@Override
public BioSequence find(BioSequence bioSequence) {
    BusinessKey.checkValidKey(bioSequence);
    Criteria queryObject = BusinessKey.createQueryObject(this.getSessionFactory().getCurrentSession(), bioSequence);
    queryObject.setReadOnly(true);
    queryObject.setFlushMode(FlushMode.MANUAL);
    /*
         * this initially matches on name and taxon only.
         */
    java.util.List<?> results = queryObject.list();
    Object result = null;
    if (results != null) {
        if (results.size() > 1) {
            this.debug(bioSequence, results);
            // Try to find the best match. See BusinessKey for more
            // explanation of why this is needed.
            BioSequence match = null;
            for (BioSequence res : (Collection<BioSequence>) results) {
                if (res.equals(bioSequence)) {
                    if (match != null) {
                        AbstractDao.log.warn("More than one sequence in the database matches " + bioSequence + ", returning arbitrary match: " + match);
                        break;
                    }
                    match = res;
                }
            }
            return match;
        } else if (results.size() == 1) {
            result = results.iterator().next();
        }
    }
    return (BioSequence) result;
}

Also used : java.util(java.util) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BioSequenceValueObject(ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject) Criteria(org.hibernate.Criteria)

Example 57 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class BioSequenceDaoImpl method debug.

private void debug(BioSequence query, List<?> results) {
    StringBuilder sb = new StringBuilder();
    sb.append("\nMultiple BioSequences found matching query:\n");
    if (query != null) {
        sb.append("\tQuery: ID=").append(query.getId()).append(" Name=").append(query.getName());
        if (StringUtils.isNotBlank(query.getSequence()))
            sb.append(" Sequence=").append(StringUtils.abbreviate(query.getSequence(), 10));
        if (query.getSequenceDatabaseEntry() != null)
            sb.append(" acc=").append(query.getSequenceDatabaseEntry().getAccession());
        sb.append("\n");
    }
    for (Object object : results) {
        BioSequence entity = (BioSequence) object;
        sb.append("\tMatch: ID=").append(entity.getId()).append(" Name=").append(entity.getName());
        if (StringUtils.isNotBlank(entity.getSequence()))
            sb.append(" Sequence=").append(StringUtils.abbreviate(entity.getSequence(), 10));
        if (entity.getSequenceDatabaseEntry() != null)
            sb.append(" acc=").append(entity.getSequenceDatabaseEntry().getAccession());
        sb.append("\n");
    }
    if (AbstractDao.log.isDebugEnabled())
        AbstractDao.log.debug(sb.toString());
}

Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BioSequenceValueObject(ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject)

Example 58 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class DesignElementDataVectorDaoImpl method thawRawAndProcessed.

@Override
public void thawRawAndProcessed(Collection<DesignElementDataVector> designElementDataVectors) {
    if (designElementDataVectors == null)
        return;
    Session session = this.getSessionFactory().getCurrentSession();
    Hibernate.initialize(designElementDataVectors);
    StopWatch timer = new StopWatch();
    timer.start();
    Collection<ExpressionExperiment> ees = new HashSet<>();
    Map<BioAssayDimension, Collection<DesignElementDataVector>> dims = new HashMap<>();
    Collection<CompositeSequence> cs = new HashSet<>();
    for (DesignElementDataVector vector : designElementDataVectors) {
        session.buildLockRequest(LockOptions.NONE).lock(vector);
        Hibernate.initialize(vector);
        Hibernate.initialize(vector.getQuantitationType());
        BioAssayDimension bad = vector.getBioAssayDimension();
        if (!dims.containsKey(bad)) {
            dims.put(bad, new HashSet<DesignElementDataVector>());
        }
        dims.get(bad).add(vector);
        cs.add(vector.getDesignElement());
        ees.add(vector.getExpressionExperiment());
        session.evict(vector.getQuantitationType());
        session.evict(vector);
    }
    if (timer.getTime() > designElementDataVectors.size()) {
        AbstractDao.log.info("Thaw phase 1, " + designElementDataVectors.size() + " vectors initialized in " + timer.getTime() + "ms ");
    }
    timer.reset();
    timer.start();
    // lightly thawRawAndProcessed the EEs we saw
    for (ExpressionExperiment ee : ees) {
        Hibernate.initialize(ee);
        session.evict(ee);
    }
    if (timer.getTime() > 200) {
        AbstractDao.log.info("Thaw phase 2, " + ees.size() + " vector-associated expression experiments in " + timer.getTime() + "ms ");
    }
    timer.reset();
    timer.start();
    // thawRawAndProcessed the bioassayDimensions we saw -- usually one, more rarely two.
    for (BioAssayDimension bad : dims.keySet()) {
        BioAssayDimension tbad = (BioAssayDimension) this.getSessionFactory().getCurrentSession().createQuery("select distinct bad from BioAssayDimension bad join fetch bad.bioAssays ba join fetch ba.sampleUsed " + "bm join fetch ba.arrayDesignUsed left join fetch bm.factorValues fetch all properties where bad.id= :bad ").setParameter("bad", bad.getId()).uniqueResult();
        assert tbad != null;
        assert !dims.get(tbad).isEmpty();
        for (DesignElementDataVector v : designElementDataVectors) {
            if (v.getBioAssayDimension().getId().equals(tbad.getId())) {
                v.setBioAssayDimension(tbad);
            }
        }
    }
    if (timer.getTime() > 1000) {
        AbstractDao.log.info("Thaw phase 3, " + dims.size() + " vector-associated bioassaydimensions in " + timer.getTime() + "ms ");
    }
    timer.reset();
    timer.start();
    // thawRawAndProcessed the designelements we saw. SLOW
    long lastTime = 0;
    int count = 0;
    for (CompositeSequence de : cs) {
        BioSequence seq = de.getBiologicalCharacteristic();
        if (seq == null)
            continue;
        session.buildLockRequest(LockOptions.NONE).lock(seq);
        Hibernate.initialize(seq);
        // is this really necessary?
        ArrayDesign arrayDesign = de.getArrayDesign();
        Hibernate.initialize(arrayDesign);
        if (++count % 10000 == 0) {
            if (timer.getTime() - lastTime > 1000) {
                AbstractDao.log.info("Thawed " + count + " vector-associated probes " + timer.getTime() + " ms");
            }
            lastTime = timer.getTime();
        }
    }
    timer.stop();
    if (designElementDataVectors.size() >= 2000 || timer.getTime() > 200) {
        AbstractDao.log.info("Thaw phase 4 " + cs.size() + " vector-associated probes thawed in " + timer.getTime() + "ms");
    }
}

Also used : HashMap(java.util.HashMap) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) StopWatch(org.apache.commons.lang3.time.StopWatch) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) Collection(java.util.Collection) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) HashSet(java.util.HashSet)

Example 59 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ArrayDesignRepeatScanCli method processArrayDesign.

private void processArrayDesign(ArrayDesign design) {
    ArrayDesign thawed = this.thaw(design);
    // no taxon is passed to this method so all sequences will be retrieved even for multi taxon arrays
    Collection<BioSequence> sequences = ArrayDesignSequenceAlignmentServiceImpl.getSequences(thawed);
    RepeatScan scanner = new RepeatScan();
    Collection<BioSequence> altered;
    if (this.inputFileName != null) {
        altered = scanner.processRepeatMaskerOutput(sequences, inputFileName);
    } else {
        altered = scanner.repeatScan(sequences);
    }
    AbstractCLI.log.info("Saving...");
    bsService.update(altered);
    if (this.inputFileName != null) {
        this.audit(thawed, "Repeat scan data from file: " + inputFileName + ", updated " + altered.size() + " sequences.");
    } else {
        this.audit(thawed, "Repeat scan done, updated " + altered.size() + " sequences.");
    }
    AbstractCLI.log.info("Done with " + thawed);
}

Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) RepeatScan(ubic.gemma.core.analysis.sequence.RepeatScan)

Example 60 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method processCompositeSequence.

@Override
@Transactional
public Map<String, Collection<BlatAssociation>> processCompositeSequence(ProbeMapperConfig config, Taxon taxon, GoldenPathSequenceAnalysis goldenPathDb, CompositeSequence compositeSequence) {
    BioSequence bs = compositeSequence.getBiologicalCharacteristic();
    if (bs == null)
        return null;
    /*
         * It isn't 100% clear what the right thing to do is. But this seems at least _reasonable_ when there is a
         * mismatch
         */
    if (taxon != null && !bs.getTaxon().equals(taxon)) {
        return null;
    }
    GoldenPathSequenceAnalysis db;
    if (goldenPathDb == null) {
        db = new GoldenPathSequenceAnalysis(bs.getTaxon());
    } else {
        db = goldenPathDb;
    }
    final Collection<BlatResult> blatResults = blatResultService.findByBioSequence(bs);
    ProbeMapUtils.removeDuplicates(blatResults);
    if (blatResults.isEmpty())
        return null;
    return probeMapper.processBlatResults(db, blatResults, config);
}

Also used : GoldenPathSequenceAnalysis(ubic.gemma.core.externalDb.GoldenPathSequenceAnalysis) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)105 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)40 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 Test (org.junit.Test)18 HashSet (java.util.HashSet)17 Taxon (ubic.gemma.model.genome.Taxon)15 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)12 InputStream (java.io.InputStream)11 Collection (java.util.Collection)11 HashMap (java.util.HashMap)10 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)10 GZIPInputStream (java.util.zip.GZIPInputStream)7 Gene (ubic.gemma.model.genome.Gene)7 GeoPlatform (ubic.gemma.core.loader.expression.geo.model.GeoPlatform)6 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)6 StopWatch (org.apache.commons.lang3.time.StopWatch)5 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)5 BioSequenceValueObject (ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)4