use of ubic.gemma.model.genome.gene.GeneValueObject in project Gemma by PavlidisLab.
the class GeneDifferentialExpressionServiceImpl method getDifferentialExpressionMetaAnalysis.
@Override
public DifferentialExpressionMetaAnalysisValueObject getDifferentialExpressionMetaAnalysis(double threshold, Gene g, Map<Long, Long> eeFactorsMap, Collection<BioAssaySet> activeExperiments) {
StopWatch timer = new StopWatch();
timer.start();
/*
* Get results for each active experiment on given gene. Handling the threshold check below since we ignore this
* for the meta analysis. The results returned are for all factors, not just the factors we are seeking.
*/
Map<ExpressionExperimentValueObject, List<DifferentialExpressionValueObject>> resultsMap = differentialExpressionResultService.find(g, EntityUtils.getIds(activeExperiments));
GeneDifferentialExpressionServiceImpl.log.debug(resultsMap.size() + " results for " + g + " in " + activeExperiments);
DifferentialExpressionMetaAnalysisValueObject mavo = new DifferentialExpressionMetaAnalysisValueObject();
DoubleArrayList pvaluesToCombine = new DoubleArrayList();
/* a gene can have multiple probes that map to it, so store one diff value object for each probe */
Collection<DifferentialExpressionValueObject> devos = new ArrayList<>();
Collection<Long> eesThatMetThreshold = new HashSet<>();
for (ExpressionExperimentValueObject ee : resultsMap.keySet()) {
ExpressionExperimentValueObject eevo = this.configExpressionExperimentValueObject(ee);
Collection<DifferentialExpressionValueObject> proberesults = resultsMap.get(ee);
Collection<DifferentialExpressionValueObject> filteredResults = new HashSet<>();
for (DifferentialExpressionValueObject r : proberesults) {
Collection<ExperimentalFactorValueObject> efs = r.getExperimentalFactors();
assert efs.size() > 0;
if (efs.size() > 1) {
// We always ignore interaction effects.
continue;
}
ExperimentalFactorValueObject ef = efs.iterator().next();
/*
* note that we don't care about the reverse: the eefactorsmap can have stuff we don't need. We focus on
* the experiments because they are easy to select & secure. The eefactorsmap provides additional
* details.
*/
assert eeFactorsMap.containsKey(ee.getId()) : "eeFactorsMap does not contain ee=" + ee.getId();
Long sfId = eeFactorsMap.get(ee.getId());
if (!ef.getId().equals(sfId)) {
/*
* Screen out factors we're not using.
*/
continue;
}
/* filtered result with chosen factor */
filteredResults.add(r);
}
if (filteredResults.size() == 0) {
GeneDifferentialExpressionServiceImpl.log.warn("No result for ee=" + ee);
continue;
}
/*
* For the diff expression meta analysis, ignore threshold. Select the 'best' penalized probe if multiple
* probes map to the same gene.
*/
DifferentialExpressionValueObject res = this.findMinPenalizedProbeResult(filteredResults);
if (res == null)
continue;
Double p = res.getP();
if (p == null)
continue;
/*
* Moderate the pvalues by setting all values to be no smaller than PVALUE_CLIP_THRESHOLD
*/
pvaluesToCombine.add(Math.max(p, GeneDifferentialExpressionService.PVALUE_CLIP_THRESHOLD));
for (DifferentialExpressionValueObject r : filteredResults) {
Collection<ExperimentalFactorValueObject> efs = r.getExperimentalFactors();
if (efs == null) {
// This should not happen any more, but just in case.
GeneDifferentialExpressionServiceImpl.log.warn("No experimentalFactor(s) for DifferentialExpressionAnalysisResult: " + r.getId());
continue;
}
Boolean metThreshold = r.getCorrP() != null && (r.getCorrP() <= threshold);
r.setMetThreshold(metThreshold);
if (metThreshold) {
eesThatMetThreshold.add(eevo.getId());
}
Boolean fisherContribution = r.equals(res);
r.setFisherContribution(fisherContribution);
}
}
/*
* Meta-analysis part.
*/
double fisherPval = MetaAnalysis.fisherCombinePvalues(pvaluesToCombine);
mavo.setFisherPValue(fisherPval);
mavo.setGene(new GeneValueObject(g));
mavo.setActiveExperiments(activeExperiments);
mavo.setProbeResults(devos);
mavo.setNumMetThreshold(eesThatMetThreshold.size());
mavo.setSortKey();
timer.stop();
if (timer.getTime() > 1000) {
GeneDifferentialExpressionServiceImpl.log.info("Meta-analysis results: " + timer.getTime() + " ms");
}
return mavo;
}
use of ubic.gemma.model.genome.gene.GeneValueObject in project Gemma by PavlidisLab.
the class GeneCoexpressionSearchServiceImpl method addExtCoexpressionValueObjects.
/**
* Convert CoexpressionValueObjects to CoexpressionValueObjectExts
*
* @param queryGeneIds all those included in the query
*/
private List<CoexpressionValueObjectExt> addExtCoexpressionValueObjects(GeneValueObject queryGene, Collection<CoexpressionValueObject> coexp, boolean queryGenesOnly, Collection<Long> queryGeneIds) {
List<CoexpressionValueObjectExt> results = new ArrayList<>();
Collection<Long> coexpGenes = new HashSet<>();
for (CoexpressionValueObject cvo : coexp) {
coexpGenes.add(cvo.getCoexGeneId());
}
// database hit. loadValueObjects is too slow.
Map<Long, GeneValueObject> coexpedGenes = EntityUtils.getIdMap(geneService.loadValueObjectsByIds(coexpGenes));
for (CoexpressionValueObject cvo : coexp) {
/*
* sanity check, this is sort of debug code. Each link has to contain at least one query gene.
*/
if (queryGenesOnly) {
if (!queryGeneIds.contains(cvo.getCoexGeneId())) {
GeneCoexpressionSearchServiceImpl.log.warn("coexpression for non-query genes obtained unexpectedly when doing queryGenesOnly " + cvo.getCoexGeneId() + " is not a query");
continue;
}
if (!queryGeneIds.contains(cvo.getQueryGeneId())) {
GeneCoexpressionSearchServiceImpl.log.warn("coexpression for non-query genes obtained unexpectedly when doing queryGenesOnly " + cvo.getQueryGeneId() + " is not a query");
continue;
}
}
CoexpressionValueObjectExt ecVo = new CoexpressionValueObjectExt();
ecVo.setQueryGene(queryGene);
GeneValueObject foundGene = coexpedGenes.get(cvo.getCoexGeneId());
if (!queryGeneIds.contains(foundGene.getId())) {
foundGene.setIsQuery(false);
} else {
foundGene.setIsQuery(true);
}
ecVo.setFoundGene(foundGene);
if (cvo.isPositiveCorrelation()) {
ecVo.setPosSupp(cvo.getNumDatasetsSupporting());
} else {
ecVo.setNegSupp(cvo.getNumDatasetsSupporting());
}
// when 'quick', these will not necessarily be set.
ecVo.setNumTestedIn(cvo.getNumDatasetsTestedIn());
ecVo.setSupportingExperiments(cvo.getSupportingDatasets());
ecVo.setSortKey();
results.add(ecVo);
assert ecVo.getPosSupp() > 0 || ecVo.getNegSupp() > 0;
assert ecVo.getFoundGene() != null;
assert ecVo.getQueryGene() != null;
}
return results;
}
use of ubic.gemma.model.genome.gene.GeneValueObject in project Gemma by PavlidisLab.
the class GeneCoexpressionSearchServiceImpl method doCoexpressionSearch.
/**
* @param genes 1 or more.
* @param stringency; this may be modified to control the number of results, unless "queryGenesOnly" is true.
* @param maxResults per gene, not including the query genes themselves. Ignored if this is 'queryGenesOnly'
* @param queryGenesOnly will be ignored if number of genes is 1.
* @return CoexpressionMetaValueObject, in which the results are already populated and sorted.
*/
private CoexpressionMetaValueObject doCoexpressionSearch(Collection<Long> inputEeIds, Collection<Long> genes, int stringency, final int maxResults, final boolean queryGenesOnly, final boolean quick) {
if (genes.isEmpty()) {
CoexpressionMetaValueObject r = new CoexpressionMetaValueObject();
r.setErrorState("No genes selected");
return r;
}
boolean actuallyUseQueryGeneOnly = queryGenesOnly && genes.size() > 1;
Taxon taxon = this.geneService.load(genes.iterator().next()).getTaxon();
List<ExpressionExperimentValueObject> eevos = this.getFilteredEEVos(inputEeIds, taxon);
CoexpressionMetaValueObject result = this.initValueObject(genes, eevos);
if (eevos.isEmpty()) {
result = new CoexpressionMetaValueObject();
result.setErrorState("No experiments selected");
return result;
}
Collection<Long> eeIds = EntityUtils.getIds(eevos);
Map<Long, List<CoexpressionValueObject>> allCoexpressions;
if (genes.size() > GeneCoexpressionSearchServiceImpl.THRESHOLD_TRIGGER_QUERY_GENES_ONLY) {
if (!actuallyUseQueryGeneOnly) {
GeneCoexpressionSearchServiceImpl.log.info("Switching to 'query genes only'");
}
actuallyUseQueryGeneOnly = true;
}
if (stringency < 1)
stringency = 1;
if (!queryGenesOnly) {
stringency = Math.max(stringency, this.chooseStringency(actuallyUseQueryGeneOnly, eeIds.size(), genes.size()));
GeneCoexpressionSearchServiceImpl.log.info("Stringency set to " + stringency + " based on number of experiments (" + eeIds.size() + ") and genes (" + genes.size() + ") queried");
} else {
GeneCoexpressionSearchServiceImpl.log.info("Query gene only: stringency maintained at requested value=" + stringency);
}
assert stringency >= 1 || eeIds.size() == 1;
// HACK drop the stringency until we get some results.
int stepSize = 3;
while (true) {
if (actuallyUseQueryGeneOnly) {
// note that maxResults is ignored.
if (genes.size() < 2) {
// should be impossible - could assert.
throw new IllegalArgumentException("cannot do inter-gene coexpression search with only one gene");
}
allCoexpressions = coexpressionService.findInterCoexpressionRelationships(taxon, genes, eeIds, stringency, quick);
} else {
allCoexpressions = coexpressionService.findCoexpressionRelationships(taxon, genes, eeIds, stringency, maxResults, quick);
}
int minimum_stringency_for_requery = 2;
if (allCoexpressions.isEmpty() && stringency > minimum_stringency_for_requery) {
// step size completely made up.
stringency -= stepSize;
// keep stringency at least 2.
stringency = Math.max(minimum_stringency_for_requery, stringency);
GeneCoexpressionSearchServiceImpl.log.info("No results, re-querying with stringency=" + stringency);
} else {
// no results.
break;
}
}
GeneCoexpressionSearchServiceImpl.log.info("Final actual stringency used was " + stringency);
result.setQueryStringency(stringency);
result.setQueryGenesOnly(actuallyUseQueryGeneOnly);
Set<Long> queryGeneIds = allCoexpressions.keySet();
assert genes.containsAll(queryGeneIds);
Map<Long, GeneValueObject> idMap = EntityUtils.getIdMap(geneService.loadValueObjectsByIds(queryGeneIds));
int k = 0;
for (Long queryGene : queryGeneIds) {
Collection<CoexpressionValueObject> coexpressions = allCoexpressions.get(queryGene);
List<CoexpressionValueObjectExt> results = this.addExtCoexpressionValueObjects(idMap.get(queryGene), coexpressions, actuallyUseQueryGeneOnly, genes);
result.getResults().addAll(results);
CoexpressionSummaryValueObject summary = new CoexpressionSummaryValueObject(queryGene);
summary.setDatasetsAvailable(eevos.size());
summary.setLinksFound(coexpressions.size());
result.getSummaries().put(queryGene, summary);
if (++k % 100 == 0) {
GeneCoexpressionSearchServiceImpl.log.info("Processed results for " + k + " query genes ...");
}
}
Collections.sort(result.getResults());
// FIXME we might want to suppress this in some situations
if (!queryGenesOnly) {
result.trim();
}
this.populateNodeDegrees(result);
return result;
}
use of ubic.gemma.model.genome.gene.GeneValueObject in project Gemma by PavlidisLab.
the class DifferentialExpressionSearchTaskImpl method addGenesToSearchResultValueObject.
/**
* No database calls here, just organization.
*
* @param searchResult the search results
*/
private void addGenesToSearchResultValueObject(DifferentialExpressionGenesConditionsValueObject searchResult) {
DifferentialExpressionSearchTaskImpl.log.info("Loading genes for " + geneGroupName + " ...");
for (GeneValueObject gene : geneGroup) {
DifferentialExpressionGenesConditionsValueObject.DiffExGene g = searchResult.new DiffExGene(gene.getId(), gene.getOfficialSymbol(), gene.getOfficialName());
g.setGroupName(geneGroupName);
searchResult.addGene(g);
}
}
use of ubic.gemma.model.genome.gene.GeneValueObject in project Gemma by PavlidisLab.
the class GeneCoreServiceImpl method searchGenes.
/**
* Search for genes (by name or symbol)
*
* @param taxonId, can be null to not constrain by taxon
* @return Collection of Gene entity objects
*/
@Override
public Collection<GeneValueObject> searchGenes(String query, Long taxonId) {
Taxon taxon = null;
if (taxonId != null) {
taxon = this.taxonService.load(taxonId);
}
SearchSettings settings = SearchSettingsImpl.geneSearch(query, taxon);
List<SearchResult> geneSearchResults = this.searchService.search(settings).get(Gene.class);
Collection<Gene> genes = new HashSet<>();
if (geneSearchResults == null || geneSearchResults.isEmpty()) {
GeneCoreServiceImpl.log.info("No Genes for search: " + query + " taxon=" + taxonId);
return new HashSet<>();
}
GeneCoreServiceImpl.log.info("Gene search: " + query + " taxon=" + taxonId + ", " + geneSearchResults.size() + " found");
for (SearchResult sr : geneSearchResults) {
Gene g = (Gene) sr.getResultObject();
g = geneService.thaw(g);
genes.add(g);
GeneCoreServiceImpl.log.debug("Gene search result: " + g.getOfficialSymbol());
}
Collection<GeneValueObject> geneValueObjects = geneService.loadValueObjects(genes);
GeneCoreServiceImpl.log.debug("Gene search: " + geneValueObjects.size() + " value objects returned.");
return geneValueObjects;
}
Aggregations