use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class DifferentialExpressionSearchController method getDifferentialExpressionMetaAnalysis.
/**
* Returns the results of the meta-analysis.
*/
private DifferentialExpressionMetaAnalysisValueObject getDifferentialExpressionMetaAnalysis(Long geneId, Collection<DiffExpressionSelectedFactorCommand> selectedFactors, double threshold) {
Gene g = geneService.load(geneId);
if (g == null) {
log.warn("No Gene with id=" + geneId);
return null;
}
/* find experiments that have had the diff cli run on it and have the gene g (analyzed) - security filtered. */
Collection<BioAssaySet> experimentsAnalyzed = differentialExpressionAnalysisService.findExperimentsWithAnalyses(g);
if (experimentsAnalyzed.size() == 0) {
throw new EntityNotFoundException("No results were found: no experiment analyzed those genes");
}
/* the 'chosen' factors (and their associated experiments) */
Map<Long, Long> eeFactorsMap = new HashMap<>();
for (DiffExpressionSelectedFactorCommand selectedFactor : selectedFactors) {
Long eeId = selectedFactor.getEeId();
eeFactorsMap.put(eeId, selectedFactor.getEfId());
if (log.isDebugEnabled())
log.debug(eeId + " --> " + selectedFactor.getEfId());
}
/*
* filter experiments that had the diff cli run on it and are in the scope of eeFactorsMap eeIds
* (active/available to the user).
*/
Collection<BioAssaySet> activeExperiments;
if (eeFactorsMap.keySet() == null || eeFactorsMap.isEmpty()) {
activeExperiments = experimentsAnalyzed;
} else {
activeExperiments = new ArrayList<>();
for (BioAssaySet ee : experimentsAnalyzed) {
if (eeFactorsMap.keySet().contains(ee.getId())) {
activeExperiments.add(ee);
}
}
}
if (activeExperiments.isEmpty()) {
throw new EntityNotFoundException("No results were found: none of the experiments selected analyzed those genes");
}
return geneDifferentialExpressionService.getDifferentialExpressionMetaAnalysis(threshold, g, eeFactorsMap, activeExperiments);
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class BlatAssociationScorer method clusterGenes.
/**
* @param associations assocs.
* @return map of physical locations for the alignments, and which genes are found there.
*/
private static Map<PhysicalLocation, Collection<Gene>> clusterGenes(Map<Gene, Collection<BlatAssociation>> associations) {
Map<PhysicalLocation, Collection<Gene>> clusters = new HashMap<>();
for (Gene gene : associations.keySet()) {
Collection<BlatAssociation> geneAssoc = associations.get(gene);
for (BlatAssociation ba : geneAssoc) {
PhysicalLocation pl = ba.getBlatResult().getTargetAlignedRegion();
if (!clusters.containsKey(pl)) {
clusters.put(pl, new HashSet<Gene>());
}
clusters.get(pl).add(gene);
}
}
// debugging information about clusters.
if (BlatAssociationScorer.log.isDebugEnabled()) {
for (PhysicalLocation pl : clusters.keySet()) {
if (clusters.get(pl).size() > 1) {
BlatAssociationScorer.log.debug("Cluster at " + pl + " with " + clusters.get(pl).size() + " members:\n" + StringUtils.join(clusters.get(pl).iterator(), "\n"));
}
}
}
return clusters;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class BlatAssociationScorer method scoreResults.
/**
* From a collection of BlatAssociations from a single BioSequence, reduce redundancy, fill in the specificity and
* score and pick the one with the best scoring statistics.
* This is a little complicated because a single sequence can yield many BlatResults to the same gene and/or gene
* product. We reduce the results down to a single (best) result for any given gene product. We also score
* specificity by the gene: if a sequence 'hits' multiple genes, then the specificity of the generated associations
* will be less than 1.
*
* @param blatAssociations for a single sequence.
* @return the highest-scoring result (if there are ties this will be a random one). Note that this return value is
* not all that useful because it assumes there is a "clear winner". The passed-in blatAssociations will be
* pruned to remove redundant entries, and will have score information filled in as well. It is intended
* that these 'refined' BlatAssociations will be used in further analysis.
* @throws IllegalArgumentException if the blatAssociations are from multiple biosequences.
*/
public static BlatAssociation scoreResults(Collection<BlatAssociation> blatAssociations) {
Map<GeneProduct, Collection<BlatAssociation>> geneProducts2Associations = BlatAssociationScorer.organizeBlatAssociationsByGeneProductAndInitializeScores(blatAssociations);
BlatAssociation globalBest = BlatAssociationScorer.removeExtraHitsPerGeneProduct(blatAssociations, geneProducts2Associations);
assert blatAssociations.size() > 0;
Map<Gene, Collection<BlatAssociation>> genes2Associations = BlatAssociationScorer.organizeBlatAssociationsByGene(blatAssociations);
assert genes2Associations.size() > 0;
/*
* At this point there should be just one blatAssociation per gene product. However, all of these really might
* be for the same gene. It is only in the case of truly multiple genes that we flag a lower specificity.
*/
if (genes2Associations.size() == 1) {
return globalBest;
}
Map<PhysicalLocation, Collection<Gene>> geneClusters = BlatAssociationScorer.clusterGenes(genes2Associations);
// compute specificity at the level of genes. First, get the best score for each gene cluster.
Map<PhysicalLocation, Double> scores = new HashMap<>();
for (PhysicalLocation pl : geneClusters.keySet()) {
Double geneScore = 0.0;
for (Gene cgene : geneClusters.get(pl)) {
for (BlatAssociation blatAssociation : genes2Associations.get(cgene)) {
Double alignScore = blatAssociation.getScore();
if (alignScore > geneScore) {
geneScore = alignScore;
}
}
}
scores.put(pl, geneScore);
}
for (PhysicalLocation pl : geneClusters.keySet()) {
Double alignScore = scores.get(pl);
for (Gene cgene : geneClusters.get(pl)) {
// All members of the cluster get the same specificity.
for (BlatAssociation blatAssociation : genes2Associations.get(cgene)) {
blatAssociation.setSpecificity(BlatAssociationScorer.computeSpecificity(scores.values(), alignScore));
}
}
}
return globalBest;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method computeHitListSizes.
@Override
public Collection<HitListSize> computeHitListSizes(Collection<DifferentialExpressionAnalysisResult> results, Map<CompositeSequence, Collection<Gene>> probeToGeneMap) {
Collection<HitListSize> hitListSizes = new HashSet<>();
StopWatch timer = new StopWatch();
timer.start();
double maxThreshold = MathUtil.max(LinearModelAnalyzer.qValueThresholdsForHitLists);
assert probeToGeneMap != null;
Collection<Gene> allGenes = new HashSet<>();
for (Collection<Gene> genes : probeToGeneMap.values()) {
allGenes.addAll(genes);
}
// maps from Doubles are a bit dodgy...
Map<Double, Integer> upCounts = new HashMap<>();
Map<Double, Integer> downCounts = new HashMap<>();
Map<Double, Integer> eitherCounts = new HashMap<>();
Map<Double, Integer> upCountGenes = new HashMap<>();
Map<Double, Integer> downCountGenes = new HashMap<>();
Map<Double, Integer> eitherCountGenes = new HashMap<>();
Collection<Gene> seenGenes = new HashSet<>();
for (DifferentialExpressionAnalysisResult r : results) {
Double corrP = r.getCorrectedPvalue();
if (corrP == null || corrP > maxThreshold) {
continue;
}
CompositeSequence probe = r.getProbe();
Collection<Gene> genesForProbe = probeToGeneMap.get(probe);
int numGenes = 0;
if (genesForProbe != null) {
numGenes = this.countNumberOfGenesNotSeenAlready(genesForProbe, seenGenes);
}
// if ( numGenes == 0 ) // This is okay; it might mean we already counted it as differentially expressed.
Collection<ContrastResult> crs = r.getContrasts();
boolean up = false;
boolean down = false;
/*
* We set up and down to be true (either or both) if at least on contrast is shown.
*/
for (ContrastResult cr : crs) {
Double lf = cr.getLogFoldChange();
// noinspection StatementWithEmptyBody // Better readability
if (lf == null) {
/*
* A contrast which is actually not valid, so it won't be counted in the hit list.
*/
} else if (lf < 0) {
down = true;
} else if (lf > 0) {
up = true;
}
}
for (double thresh : LinearModelAnalyzer.qValueThresholdsForHitLists) {
if (!upCounts.containsKey(thresh)) {
upCounts.put(thresh, 0);
upCountGenes.put(thresh, 0);
}
if (!downCounts.containsKey(thresh)) {
downCounts.put(thresh, 0);
downCountGenes.put(thresh, 0);
}
if (!eitherCounts.containsKey(thresh)) {
eitherCounts.put(thresh, 0);
eitherCountGenes.put(thresh, 0);
}
if (corrP < thresh) {
if (up) {
upCounts.put(thresh, upCounts.get(thresh) + 1);
upCountGenes.put(thresh, upCountGenes.get(thresh) + numGenes);
}
if (down) {
downCounts.put(thresh, downCounts.get(thresh) + 1);
downCountGenes.put(thresh, downCountGenes.get(thresh) + numGenes);
}
eitherCounts.put(thresh, eitherCounts.get(thresh) + 1);
eitherCountGenes.put(thresh, eitherCountGenes.get(thresh) + numGenes);
}
}
}
for (double thresh : LinearModelAnalyzer.qValueThresholdsForHitLists) {
// Ensure we don't set values to null.
Integer up = upCounts.get(thresh) == null ? 0 : upCounts.get(thresh);
Integer down = downCounts.get(thresh) == null ? 0 : downCounts.get(thresh);
Integer either = eitherCounts.get(thresh) == null ? 0 : eitherCounts.get(thresh);
Integer upGenes = upCountGenes.get(thresh) == null ? 0 : upCountGenes.get(thresh);
Integer downGenes = downCountGenes.get(thresh) == null ? 0 : downCountGenes.get(thresh);
Integer eitherGenes = eitherCountGenes.get(thresh) == null ? 0 : eitherCountGenes.get(thresh);
assert !(allGenes.size() < upGenes || allGenes.size() < downGenes || allGenes.size() < eitherGenes) : "There were more genes differentially expressed than exist in the experiment";
HitListSize upS = HitListSize.Factory.newInstance(thresh, up, Direction.UP, upGenes);
HitListSize downS = HitListSize.Factory.newInstance(thresh, down, Direction.DOWN, downGenes);
HitListSize eitherS = HitListSize.Factory.newInstance(thresh, either, Direction.EITHER, eitherGenes);
hitListSizes.add(upS);
hitListSizes.add(downS);
hitListSizes.add(eitherS);
assert upGenes <= eitherGenes : "More genes upregulated than upregulated+downregulated";
assert downGenes <= eitherGenes : "More genes downregulated than upregulated+downregulated";
}
if (timer.getTime() > 1000) {
LinearModelAnalyzer.log.info("Hitlist computation: " + timer.getTime() + "ms");
}
return hitListSizes;
}
use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.
the class ArrayDesignAnnotationServiceImpl method parseAnnotationFile.
private static Map<Long, Collection<Gene>> parseAnnotationFile(Map<Long, Collection<Gene>> results, InputStream is, Map<String, Long> probeNameToId) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line;
while ((line = br.readLine()) != null) {
if (StringUtils.isBlank(line) || line.startsWith(ArrayDesignAnnotationServiceImpl.COMMENT_CHARACTER)) {
continue;
}
String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
if (fields.length < 3)
// means there are no gene annotations.
continue;
String probeName = fields[0];
if (!probeNameToId.containsKey(probeName))
continue;
Long probeId = probeNameToId.get(probeName);
List<String> geneSymbols = Arrays.asList(StringUtils.splitPreserveAllTokens(fields[1], '|'));
List<String> geneNames = Arrays.asList(StringUtils.splitPreserveAllTokens(fields[2], '|'));
if (geneSymbols.size() != geneNames.size()) {
ArrayDesignAnnotationServiceImpl.log.warn("Annotation file format error: Unequal number of gene symbols and names for probe=" + probeName + ", skipping row");
continue;
}
List<String> gemmaGeneIds = null;
List<String> ncbiIds = null;
if (fields.length > 4) {
// new style. fields[3] is the GO annotations.
gemmaGeneIds = Arrays.asList(StringUtils.splitPreserveAllTokens(fields[4], '|'));
}
if (fields.length > 5) {
ncbiIds = Arrays.asList(StringUtils.splitPreserveAllTokens(fields[5], '|'));
}
for (int i = 0; i < geneSymbols.size(); i++) {
String symbol = geneSymbols.get(i);
String name = geneNames.get(i);
if (StringUtils.isBlank(symbol)) {
continue;
}
String[] symbolsB = StringUtils.split(symbol, ',');
String[] namesB = StringUtils.split(name, '$');
for (int j = 0; j < symbolsB.length; j++) {
String s = symbolsB[j];
Gene g = Gene.Factory.newInstance();
g.setOfficialSymbol(s);
try {
if (gemmaGeneIds != null) {
g.setId(Long.parseLong(gemmaGeneIds.get(j)));
}
if (ncbiIds != null) {
g.setNcbiGeneId(Integer.parseInt(ncbiIds.get(j)));
}
} catch (NumberFormatException e) {
// oh well, couldn't populate extra info.
}
if (namesB.length >= j + 1) {
String n = namesB[j];
g.setName(n);
}
results.get(probeId).add(g);
}
}
}
return results;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
Aggregations