Search in sources :

Example 26 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class DatabaseViewGeneratorImpl method formatDiffExResult.

private String formatDiffExResult(ExpressionExperiment ee, DifferentialExpressionAnalysisResult probeAnalysisResult, String factorName, String factorURI, String baselineDescription) {
    CompositeSequence cs = probeAnalysisResult.getProbe();
    Collection<Gene> genes = compositeSequenceService.getGenes(cs);
    if (genes.isEmpty() || genes.size() > 1) {
        return null;
    }
    Gene g = genes.iterator().next();
    if (g.getNcbiGeneId() == null)
        return null;
    Collection<ContrastResult> contrasts = probeAnalysisResult.getContrasts();
    StringBuilder buf = new StringBuilder();
    for (ContrastResult cr : contrasts) {
        FactorValue factorValue = cr.getFactorValue();
        String direction = cr.getLogFoldChange() < 0 ? "-" : "+";
        String factorValueDescription = ExperimentalDesignUtils.prettyString(factorValue);
        buf.append(String.format("%d\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n", ee.getId(), ee.getShortName(), g.getNcbiGeneId().toString(), g.getId(), factorName, factorURI, baselineDescription, factorValueDescription, direction));
    }
    return buf.toString();
}
Also used : FactorValue(ubic.gemma.model.expression.experiment.FactorValue) Gene(ubic.gemma.model.genome.Gene) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult)

Example 27 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class ArrayDesignAnnotationServiceImpl method populateProbeNameToIdMap.

private static void populateProbeNameToIdMap(ArrayDesign arrayDesign, Map<Long, Collection<Gene>> results, Map<String, Long> probeNameToId) {
    for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
        results.put(cs.getId(), new HashSet<Gene>());
        if (probeNameToId.containsKey(cs.getName())) {
            ArrayDesignAnnotationServiceImpl.log.warn("Duplicate probe name: " + cs.getName());
        }
        probeNameToId.put(cs.getName(), cs.getId());
    }
}
Also used : Gene(ubic.gemma.model.genome.Gene) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 28 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class ArrayDesignAnnotationServiceImpl method generateAnnotationFile.

@Override
public int generateAnnotationFile(Writer writer, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> genesWithSpecificity, OutputType ty) throws IOException {
    int compositeSequencesProcessed = 0;
    int simple = 0;
    int empty = 0;
    int complex = 0;
    // we used LinkedHasSets to keep everything in a predictable order - this is important for the gene symbols,
    // descriptions and NCBIIds (but not important for GO terms). When a probe maps to multiple genes, we list those
    // three items for the genes in the same order. There is a feature request to make
    // the order deterministic (i.e.,lexicographic sort), this could be done by using little gene objects or whatever.
    Collection<OntologyTerm> goTerms = new LinkedHashSet<>();
    Set<String> genes = new LinkedHashSet<>();
    Set<String> geneDescriptions = new LinkedHashSet<>();
    Set<String> geneIds = new LinkedHashSet<>();
    Set<String> ncbiIds = new LinkedHashSet<>();
    Map<Gene, Collection<VocabCharacteristic>> goMappings = this.getGOMappings(genesWithSpecificity);
    for (CompositeSequence cs : genesWithSpecificity.keySet()) {
        Collection<BioSequence2GeneProduct> geneclusters = genesWithSpecificity.get(cs);
        if (++compositeSequencesProcessed % 2000 == 0 && ArrayDesignAnnotationServiceImpl.log.isInfoEnabled()) {
            ArrayDesignAnnotationServiceImpl.log.info("Processed " + compositeSequencesProcessed + "/" + genesWithSpecificity.size() + " compositeSequences " + empty + " empty; " + simple + " simple; " + complex + " complex;");
        }
        if (geneclusters.isEmpty()) {
            this.writeAnnotationLine(writer, cs.getName(), "", "", null, "", "");
            empty++;
            continue;
        }
        if (geneclusters.size() == 1) {
            // common case, do it quickly.
            BioSequence2GeneProduct b2g = geneclusters.iterator().next();
            Gene g = b2g.getGeneProduct().getGene();
            goTerms = this.getGoTerms(goMappings.get(g), ty);
            String gemmaId = g.getId() == null ? "" : g.getId().toString();
            String ncbiId = g.getNcbiGeneId() == null ? "" : g.getNcbiGeneId().toString();
            this.writeAnnotationLine(writer, cs.getName(), g.getOfficialSymbol(), g.getOfficialName(), goTerms, gemmaId, ncbiId);
            simple++;
            continue;
        }
        goTerms.clear();
        genes.clear();
        geneDescriptions.clear();
        geneIds.clear();
        ncbiIds.clear();
        for (BioSequence2GeneProduct bioSequence2GeneProduct : geneclusters) {
            Gene g = bioSequence2GeneProduct.getGeneProduct().getGene();
            genes.add(g.getOfficialSymbol());
            geneDescriptions.add(g.getOfficialName());
            geneIds.add(g.getId().toString());
            Integer ncbiGeneId = g.getNcbiGeneId();
            if (ncbiGeneId != null) {
                ncbiIds.add(ncbiGeneId.toString());
            }
            goTerms.addAll(this.getGoTerms(goMappings.get(g), ty));
        }
        String geneString = StringUtils.join(genes, "|");
        String geneDescriptionString = StringUtils.join(geneDescriptions, "|");
        String geneIdsString = StringUtils.join(geneIds, "|");
        String ncbiIdsString = StringUtils.join(ncbiIds, "|");
        this.writeAnnotationLine(writer, cs.getName(), geneString, geneDescriptionString, goTerms, geneIdsString, ncbiIdsString);
        complex++;
    }
    writer.close();
    return compositeSequencesProcessed;
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) OntologyTerm(ubic.basecode.ontology.model.OntologyTerm) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) Gene(ubic.gemma.model.genome.Gene)

Example 29 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class GeneMultifunctionalityPopulationServiceImpl method computeMultifunctionality.

/**
 * Implementation of multifunctionality computations as described in Gillis and Pavlidis (2011) PLoS ONE 6:2:e17258.
 *
 * @param gomap gomap
 * @return map
 */
private Map<Gene, Multifunctionality> computeMultifunctionality(Map<Gene, Collection<String>> gomap) {
    assert !gomap.isEmpty();
    Map<String, Integer> goGroupSizes = new HashMap<>();
    for (Gene g : gomap.keySet()) {
        for (String go : gomap.get(g)) {
            if (!goGroupSizes.containsKey(go)) {
                goGroupSizes.put(go, 1);
            } else {
                goGroupSizes.put(go, goGroupSizes.get(go) + 1);
            }
        }
    }
    GeneMultifunctionalityPopulationServiceImpl.log.info("Computed GO group sizes");
    Map<Gene, Double> geneMultifunctionalityScore = new HashMap<>();
    Map<Gene, Multifunctionality> geneMultifunctionality = new HashMap<>();
    int numGenes = gomap.size();
    for (Gene gene : gomap.keySet()) {
        Multifunctionality mf = Multifunctionality.Factory.newInstance();
        double mfscore = 0.0;
        Collection<String> sets = gomap.get(gene);
        for (String goset : sets) {
            assert goGroupSizes.containsKey(goset);
            int inGroup = goGroupSizes.get(goset);
            assert inGroup > 0;
            int outGroup = numGenes - inGroup;
            if (outGroup == 0) {
                // this doesn't meaningfully contribute to multifunctionality since every gene has it.
                continue;
            }
            mfscore += 1.0 / (inGroup * outGroup);
        }
        assert mfscore >= 0.0 && mfscore <= 1.0;
        mf.setNumGoTerms(gomap.get(gene).size());
        mf.setScore(mfscore);
        geneMultifunctionalityScore.put(gene, mfscore);
        geneMultifunctionality.put(gene, mf);
    }
    Map<Gene, Double> rawGeneMultifunctionalityRanks = Rank.rankTransform(geneMultifunctionalityScore, true);
    assert numGenes == rawGeneMultifunctionalityRanks.size();
    for (Gene gene : rawGeneMultifunctionalityRanks.keySet()) {
        // 1-base the rank before calculating ratio
        double relRank = (rawGeneMultifunctionalityRanks.get(gene) + 1) / numGenes;
        assert relRank >= 0.0 && relRank <= 1.0;
        // big values are "more multifunctional".
        geneMultifunctionality.get(gene).setRank(Math.max(0.0, 1.0 - relRank));
    }
    GeneMultifunctionalityPopulationServiceImpl.log.info("Computed multifunctionality");
    return geneMultifunctionality;
}
Also used : Multifunctionality(ubic.gemma.model.genome.gene.Multifunctionality) Gene(ubic.gemma.model.genome.Gene) HashMap(java.util.HashMap)

Example 30 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class GeneMultifunctionalityPopulationServiceImpl method updateMultifunctionality.

@Override
public void updateMultifunctionality(Taxon taxon) {
    Collection<Gene> genes = geneService.loadAll(taxon);
    if (genes.isEmpty()) {
        GeneMultifunctionalityPopulationServiceImpl.log.warn("No genes found for " + taxon);
        return;
    }
    Map<Gene, Collection<String>> gomap = this.fetchGoAnnotations(genes);
    Map<Gene, Multifunctionality> mfs = this.computeMultifunctionality(gomap);
    GeneMultifunctionalityPopulationServiceImpl.log.info("Saving multifunctionality for " + genes.size() + " genes");
    Collection<Gene> batch = new HashSet<>();
    int batchSize = 200;
    int i = 0;
    for (Gene g : genes) {
        batch.add(g);
        if (batch.size() == batchSize) {
            this.saveBatch(batch, mfs);
            batch.clear();
        }
        if (++i % 1000 == 0) {
            GeneMultifunctionalityPopulationServiceImpl.log.info("Updated " + i + " genes/" + genes.size());
        }
    }
    if (!batch.isEmpty()) {
        this.saveBatch(batch, mfs);
    }
    GeneMultifunctionalityPopulationServiceImpl.log.info("Done");
}
Also used : Multifunctionality(ubic.gemma.model.genome.gene.Multifunctionality) Gene(ubic.gemma.model.genome.Gene) Collection(java.util.Collection) HashSet(java.util.HashSet)

Aggregations

Gene (ubic.gemma.model.genome.Gene)186 Taxon (ubic.gemma.model.genome.Taxon)34 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)32 StopWatch (org.apache.commons.lang3.time.StopWatch)31 Test (org.junit.Test)24 HashSet (java.util.HashSet)23 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)20 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)18 Element (org.w3c.dom.Element)16 ArrayList (java.util.ArrayList)13 Transactional (org.springframework.transaction.annotation.Transactional)12 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 Collection (java.util.Collection)11 OntologyTerm (ubic.basecode.ontology.model.OntologyTerm)11 CharacteristicValueObject (ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)10 HashMap (java.util.HashMap)8 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)8 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)7 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)7 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)7