Search in sources :

Example 46 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ArrayDesignMergeServiceImpl method makeNewProbes.

/**
 * Makes the new or additional probes (non-persistent) for the merged array design. If mergeWithExisting=true,
 * probes from arrayDesign will not be included; just the ones that we need to add to it will be returned.
 *
 * @param globalBsMap Map that tells us, in effect, how many probes to make for the sequence.
 */
private Collection<CompositeSequence> makeNewProbes(ArrayDesign arrayDesign, Map<BioSequence, Collection<CompositeSequence>> globalBsMap, boolean mergeWithExisting) {
    Collection<CompositeSequence> newProbes = new HashSet<>();
    ArrayDesignMergeServiceImpl.log.info(globalBsMap.size() + " unique sequences");
    Collection<String> probeNames = new HashSet<>();
    for (BioSequence bs : globalBsMap.keySet()) {
        // should be the placeholder NULL_BIOSEQUENCE
        assert bs != null;
        for (CompositeSequence cs : globalBsMap.get(bs)) {
            if (mergeWithExisting && cs.getArrayDesign().equals(arrayDesign)) {
                assert arrayDesign.getId() != null;
                /*
                     * Only add probes from the _other_ array designs.
                     */
                continue;
            }
            CompositeSequence newCs = CompositeSequence.Factory.newInstance();
            if (!bs.equals(ExpressionExperimentPlatformSwitchService.NULL_BIOSEQUENCE)) {
                newCs.setBiologicalCharacteristic(bs);
            }
            String name = this.getProbeName(probeNames, cs);
            probeNames.add(name);
            newCs.setName(name);
            newCs.setDescription((cs.getDescription() == null ? "" : cs.getDescription()) + " (via merge)");
            newCs.setArrayDesign(arrayDesign);
            newProbes.add(newCs);
            if (ArrayDesignMergeServiceImpl.log.isDebugEnabled())
                ArrayDesignMergeServiceImpl.log.debug("Made merged probe for " + bs + ": " + newCs + " for old probe on " + cs.getArrayDesign().getShortName());
        }
    }
    ArrayDesignMergeServiceImpl.log.info("Made " + newProbes.size() + " new probes");
    return newProbes;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) HashSet(java.util.HashSet)

Example 47 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class CompositeSequenceParser method parseOneLine.

@Override
public CompositeSequence parseOneLine(String line) {
    String[] tokens = StringUtils.splitPreserveAllTokens(line, '\t');
    if (tokens.length != 3) {
        return null;
    }
    String probeid = tokens[0];
    String genbankAcc = tokens[1];
    String description = tokens[2];
    CompositeSequence result = CompositeSequence.Factory.newInstance();
    result.setName(probeid);
    result.setDescription(description);
    DatabaseEntry dbEntry = ExternalDatabaseUtils.getGenbankAccession(genbankAcc);
    BioSequence biologicalCharacteristic = BioSequence.Factory.newInstance();
    // this will be changed later, typically.
    biologicalCharacteristic.setName(genbankAcc);
    // this will be changed later, typically.
    biologicalCharacteristic.setDescription(description + " (From platform source)");
    biologicalCharacteristic.setSequenceDatabaseEntry(dbEntry);
    result.setBiologicalCharacteristic(biologicalCharacteristic);
    return result;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 48 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ExpressionExperimentPrePersistServiceImpl method loadOrPersistArrayDesignAndAddToCache.

/**
 * Put an array design in the cache (if it already isn't there). This is needed when loading
 * designelementdatavectors, for example, to avoid repeated (and one-at-a-time) fetching of designelement.
 *
 * @return the persistent array design.
 */
private ArrayDesign loadOrPersistArrayDesignAndAddToCache(ArrayDesign arrayDesign, ArrayDesignsForExperimentCache cache) {
    assert arrayDesign != null;
    if (StringUtils.isBlank(arrayDesign.getShortName())) {
        throw new IllegalArgumentException("Array design must have a 'short name'");
    }
    if (cache.getArrayDesignCache().containsKey(arrayDesign.getShortName())) {
        // already done.
        return cache.getArrayDesignCache().get(arrayDesign.getShortName());
    }
    StopWatch timer = new StopWatch();
    timer.start();
    // transaction, but fast if the design already exists.
    arrayDesign = (ArrayDesign) persisterHelper.persist(arrayDesign);
    // transaction (read-only). Wasteful, if this is an existing design.
    // arrayDesign = arrayDesignService.thawRawAndProcessed( arrayDesign );
    Map<CompositeSequence, BioSequence> sequences = arrayDesignService.getBioSequences(arrayDesign);
    cache.add(arrayDesign, sequences.keySet());
    if (timer.getTime() > 20000) {
        ExpressionExperimentPrePersistServiceImpl.log.info("Load/persist & thawRawAndProcessed array design: " + timer.getTime() + "ms");
    }
    return arrayDesign;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 49 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ExpressionExperimentPrePersistServiceImpl method addNewDesignElementToPersistentArrayDesign.

private CompositeSequence addNewDesignElementToPersistentArrayDesign(ArrayDesign arrayDesign, CompositeSequence designElement) {
    if (designElement == null)
        return null;
    if (!persisterHelper.isTransient(designElement))
        return designElement;
    /*
         * No sequence, or the sequence name isn't provided. Of course, if there is no sequence it isn't going to be
         * very useful.
         */
    BioSequence biologicalCharacteristic = designElement.getBiologicalCharacteristic();
    assert arrayDesign.getId() != null;
    designElement.setArrayDesign(arrayDesign);
    if (persisterHelper.isTransient(biologicalCharacteristic)) {
        // transaction.
        designElement.setBiologicalCharacteristic((BioSequence) persisterHelper.persist(biologicalCharacteristic));
    }
    return compositeSequenceService.create(designElement);
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence)

Example 50 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class BlatAssociationScorer method organizeBlatAssociationsByGeneProductAndInitializeScores.

/**
 * Break results down by gene product, and throw out duplicates (only allow one result per gene product), fills in
 * score and initializes specificity
 *
 * @param blatAssociations blat assocs
 * @return map
 */
private static Map<GeneProduct, Collection<BlatAssociation>> organizeBlatAssociationsByGeneProductAndInitializeScores(Collection<BlatAssociation> blatAssociations) {
    Map<GeneProduct, Collection<BlatAssociation>> geneProducts = new HashMap<>();
    Collection<BioSequence> sequences = new HashSet<>();
    for (BlatAssociation blatAssociation : blatAssociations) {
        assert blatAssociation.getBioSequence() != null;
        BlatAssociationScorer.computeScore(blatAssociation);
        sequences.add(blatAssociation.getBioSequence());
        if (sequences.size() > 1) {
            throw new IllegalArgumentException("Blat associations must all be for the same query sequence");
        }
        assert blatAssociation.getGeneProduct() != null;
        GeneProduct geneProduct = blatAssociation.getGeneProduct();
        if (!geneProducts.containsKey(geneProduct)) {
            geneProducts.put(geneProduct, new HashSet<BlatAssociation>());
        }
        geneProducts.get(geneProduct).add(blatAssociation);
        // an initial value.
        blatAssociation.setSpecificity(1.0);
    }
    return geneProducts;
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) HashMap(java.util.HashMap) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Collection(java.util.Collection) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet)

Aggregations

BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)105 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)40 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 Test (org.junit.Test)18 HashSet (java.util.HashSet)17 Taxon (ubic.gemma.model.genome.Taxon)15 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)12 InputStream (java.io.InputStream)11 Collection (java.util.Collection)11 HashMap (java.util.HashMap)10 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)10 GZIPInputStream (java.util.zip.GZIPInputStream)7 Gene (ubic.gemma.model.genome.Gene)7 GeoPlatform (ubic.gemma.core.loader.expression.geo.model.GeoPlatform)6 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)6 StopWatch (org.apache.commons.lang3.time.StopWatch)5 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)5 BioSequenceValueObject (ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)4