use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class DatabaseViewGeneratorImpl method formatDiffExResult.
private String formatDiffExResult(ExpressionExperiment ee, DifferentialExpressionAnalysisResult probeAnalysisResult, String factorName, String factorURI, String baselineDescription) {
CompositeSequence cs = probeAnalysisResult.getProbe();
Collection<Gene> genes = compositeSequenceService.getGenes(cs);
if (genes.isEmpty() || genes.size() > 1) {
return null;
}
Gene g = genes.iterator().next();
if (g.getNcbiGeneId() == null)
return null;
Collection<ContrastResult> contrasts = probeAnalysisResult.getContrasts();
StringBuilder buf = new StringBuilder();
for (ContrastResult cr : contrasts) {
FactorValue factorValue = cr.getFactorValue();
String direction = cr.getLogFoldChange() < 0 ? "-" : "+";
String factorValueDescription = ExperimentalDesignUtils.prettyString(factorValue);
buf.append(String.format("%d\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n", ee.getId(), ee.getShortName(), g.getNcbiGeneId().toString(), g.getId(), factorName, factorURI, baselineDescription, factorValueDescription, direction));
}
return buf.toString();
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class ArrayDesignAnnotationServiceImpl method populateProbeNameToIdMap.
private static void populateProbeNameToIdMap(ArrayDesign arrayDesign, Map<Long, Collection<Gene>> results, Map<String, Long> probeNameToId) {
for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
results.put(cs.getId(), new HashSet<Gene>());
if (probeNameToId.containsKey(cs.getName())) {
ArrayDesignAnnotationServiceImpl.log.warn("Duplicate probe name: " + cs.getName());
}
probeNameToId.put(cs.getName(), cs.getId());
}
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class ArrayDesignAnnotationServiceImpl method generateAnnotationFile.
@Override
public int generateAnnotationFile(Writer writer, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> genesWithSpecificity, OutputType ty) throws IOException {
int compositeSequencesProcessed = 0;
int simple = 0;
int empty = 0;
int complex = 0;
// we used LinkedHasSets to keep everything in a predictable order - this is important for the gene symbols,
// descriptions and NCBIIds (but not important for GO terms). When a probe maps to multiple genes, we list those
// three items for the genes in the same order. There is a feature request to make
// the order deterministic (i.e.,lexicographic sort), this could be done by using little gene objects or whatever.
Collection<OntologyTerm> goTerms = new LinkedHashSet<>();
Set<String> genes = new LinkedHashSet<>();
Set<String> geneDescriptions = new LinkedHashSet<>();
Set<String> geneIds = new LinkedHashSet<>();
Set<String> ncbiIds = new LinkedHashSet<>();
Map<Gene, Collection<VocabCharacteristic>> goMappings = this.getGOMappings(genesWithSpecificity);
for (CompositeSequence cs : genesWithSpecificity.keySet()) {
Collection<BioSequence2GeneProduct> geneclusters = genesWithSpecificity.get(cs);
if (++compositeSequencesProcessed % 2000 == 0 && ArrayDesignAnnotationServiceImpl.log.isInfoEnabled()) {
ArrayDesignAnnotationServiceImpl.log.info("Processed " + compositeSequencesProcessed + "/" + genesWithSpecificity.size() + " compositeSequences " + empty + " empty; " + simple + " simple; " + complex + " complex;");
}
if (geneclusters.isEmpty()) {
this.writeAnnotationLine(writer, cs.getName(), "", "", null, "", "");
empty++;
continue;
}
if (geneclusters.size() == 1) {
// common case, do it quickly.
BioSequence2GeneProduct b2g = geneclusters.iterator().next();
Gene g = b2g.getGeneProduct().getGene();
goTerms = this.getGoTerms(goMappings.get(g), ty);
String gemmaId = g.getId() == null ? "" : g.getId().toString();
String ncbiId = g.getNcbiGeneId() == null ? "" : g.getNcbiGeneId().toString();
this.writeAnnotationLine(writer, cs.getName(), g.getOfficialSymbol(), g.getOfficialName(), goTerms, gemmaId, ncbiId);
simple++;
continue;
}
goTerms.clear();
genes.clear();
geneDescriptions.clear();
geneIds.clear();
ncbiIds.clear();
for (BioSequence2GeneProduct bioSequence2GeneProduct : geneclusters) {
Gene g = bioSequence2GeneProduct.getGeneProduct().getGene();
genes.add(g.getOfficialSymbol());
geneDescriptions.add(g.getOfficialName());
geneIds.add(g.getId().toString());
Integer ncbiGeneId = g.getNcbiGeneId();
if (ncbiGeneId != null) {
ncbiIds.add(ncbiGeneId.toString());
}
goTerms.addAll(this.getGoTerms(goMappings.get(g), ty));
}
String geneString = StringUtils.join(genes, "|");
String geneDescriptionString = StringUtils.join(geneDescriptions, "|");
String geneIdsString = StringUtils.join(geneIds, "|");
String ncbiIdsString = StringUtils.join(ncbiIds, "|");
this.writeAnnotationLine(writer, cs.getName(), geneString, geneDescriptionString, goTerms, geneIdsString, ncbiIdsString);
complex++;
}
writer.close();
return compositeSequencesProcessed;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class GeneMultifunctionalityPopulationServiceImpl method computeMultifunctionality.
/**
* Implementation of multifunctionality computations as described in Gillis and Pavlidis (2011) PLoS ONE 6:2:e17258.
*
* @param gomap gomap
* @return map
*/
private Map<Gene, Multifunctionality> computeMultifunctionality(Map<Gene, Collection<String>> gomap) {
assert !gomap.isEmpty();
Map<String, Integer> goGroupSizes = new HashMap<>();
for (Gene g : gomap.keySet()) {
for (String go : gomap.get(g)) {
if (!goGroupSizes.containsKey(go)) {
goGroupSizes.put(go, 1);
} else {
goGroupSizes.put(go, goGroupSizes.get(go) + 1);
}
}
}
GeneMultifunctionalityPopulationServiceImpl.log.info("Computed GO group sizes");
Map<Gene, Double> geneMultifunctionalityScore = new HashMap<>();
Map<Gene, Multifunctionality> geneMultifunctionality = new HashMap<>();
int numGenes = gomap.size();
for (Gene gene : gomap.keySet()) {
Multifunctionality mf = Multifunctionality.Factory.newInstance();
double mfscore = 0.0;
Collection<String> sets = gomap.get(gene);
for (String goset : sets) {
assert goGroupSizes.containsKey(goset);
int inGroup = goGroupSizes.get(goset);
assert inGroup > 0;
int outGroup = numGenes - inGroup;
if (outGroup == 0) {
// this doesn't meaningfully contribute to multifunctionality since every gene has it.
continue;
}
mfscore += 1.0 / (inGroup * outGroup);
}
assert mfscore >= 0.0 && mfscore <= 1.0;
mf.setNumGoTerms(gomap.get(gene).size());
mf.setScore(mfscore);
geneMultifunctionalityScore.put(gene, mfscore);
geneMultifunctionality.put(gene, mf);
}
Map<Gene, Double> rawGeneMultifunctionalityRanks = Rank.rankTransform(geneMultifunctionalityScore, true);
assert numGenes == rawGeneMultifunctionalityRanks.size();
for (Gene gene : rawGeneMultifunctionalityRanks.keySet()) {
// 1-base the rank before calculating ratio
double relRank = (rawGeneMultifunctionalityRanks.get(gene) + 1) / numGenes;
assert relRank >= 0.0 && relRank <= 1.0;
// big values are "more multifunctional".
geneMultifunctionality.get(gene).setRank(Math.max(0.0, 1.0 - relRank));
}
GeneMultifunctionalityPopulationServiceImpl.log.info("Computed multifunctionality");
return geneMultifunctionality;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class GeneMultifunctionalityPopulationServiceImpl method updateMultifunctionality.
@Override
public void updateMultifunctionality(Taxon taxon) {
Collection<Gene> genes = geneService.loadAll(taxon);
if (genes.isEmpty()) {
GeneMultifunctionalityPopulationServiceImpl.log.warn("No genes found for " + taxon);
return;
}
Map<Gene, Collection<String>> gomap = this.fetchGoAnnotations(genes);
Map<Gene, Multifunctionality> mfs = this.computeMultifunctionality(gomap);
GeneMultifunctionalityPopulationServiceImpl.log.info("Saving multifunctionality for " + genes.size() + " genes");
Collection<Gene> batch = new HashSet<>();
int batchSize = 200;
int i = 0;
for (Gene g : genes) {
batch.add(g);
if (batch.size() == batchSize) {
this.saveBatch(batch, mfs);
batch.clear();
}
if (++i % 1000 == 0) {
GeneMultifunctionalityPopulationServiceImpl.log.info("Updated " + i + " genes/" + genes.size());
}
}
if (!batch.isEmpty()) {
this.saveBatch(batch, mfs);
}
GeneMultifunctionalityPopulationServiceImpl.log.info("Done");
}
Aggregations