use of ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject in project Gemma by PavlidisLab.
the class LinkAnalysisServiceTest method checkResults.
private int checkResults(Collection<BioAssaySet> ees, int expectedMinimumMaxSupport) {
boolean foundOne = false;
int maxSupport = 0;
Taxon mouse = taxonService.findByCommonName("mouse");
Collection<Gene> genesWithLinks = new ArrayList<>();
int totalLinks = 0;
// numdatasetstesting will not be set so we won't bother checking.
assertTrue(!geneCoexpressionService.getCoexpression(ee, true).isEmpty());
Collection<CoexpressionValueObject> eeResults = geneCoexpressionService.getCoexpression(ee, false);
assertTrue(!eeResults.isEmpty());
for (CoexpressionValueObject coex : eeResults) {
this.checkResult(coex);
}
Map<Long, GeneCoexpressionNodeDegreeValueObject> nodeDegrees = geneCoexpressionService.getNodeDegrees(EntityUtils.getIds(geneService.loadAll()));
assertTrue(!nodeDegrees.isEmpty());
// experiment-major query
Map<Long, List<CoexpressionValueObject>> allLinks = geneCoexpressionService.findCoexpressionRelationships(mouse, new HashSet<Long>(), EntityUtils.getIds(ees), ees.size(), 10, false);
assertTrue(!allLinks.isEmpty());
for (Long g : allLinks.keySet()) {
for (CoexpressionValueObject coex : allLinks.get(g)) {
this.checkResult(coex);
}
}
for (Gene gene : geneService.loadAll(mouse)) {
Collection<CoexpressionValueObject> links = geneCoexpressionService.findCoexpressionRelationships(gene, EntityUtils.getIds(ees), 1, 0, false);
if (links == null || links.isEmpty()) {
continue;
}
assertEquals(geneCoexpressionService.findCoexpressionRelationships(gene, Collections.singleton(ee.getId()), 0, false).size(), geneCoexpressionService.countLinks(ee, gene).intValue());
GeneCoexpressionNodeDegreeValueObject nodeDegree = geneCoexpressionService.getNodeDegree(gene);
if (links.size() != nodeDegree.getLinksWithMinimumSupport(1)) {
log.info(nodeDegree);
assertEquals("Node degree check failed for gene " + gene, links.size(), nodeDegree.getLinksWithMinimumSupport(1).intValue());
}
assertTrue(nodeDegree.getLinksWithMinimumSupport(1) >= nodeDegree.getLinksWithMinimumSupport(2));
totalLinks += links.size();
log.debug(links.size() + " hits for " + gene);
for (CoexpressionValueObject coex : links) {
this.checkResult(coex);
if (coex.getNumDatasetsSupporting() > maxSupport) {
maxSupport = coex.getNumDatasetsSupporting();
}
}
foundOne = true;
if (genesWithLinks.size() == 5) {
// without specifying stringency
Map<Long, List<CoexpressionValueObject>> multiGeneResults = geneCoexpressionService.findCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), 100, false);
if (multiGeneResults.isEmpty()) {
// noinspection ConstantConditions // these strange structures are to help with debugger.
assertTrue(!multiGeneResults.isEmpty());
}
for (Long id : multiGeneResults.keySet()) {
for (CoexpressionValueObject coex : multiGeneResults.get(id)) {
this.checkResult(coex);
}
}
// with stringency specified, quick.
Map<Long, List<CoexpressionValueObject>> multiGeneResults2 = geneCoexpressionService.findCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), ees.size(), 100, true);
if (multiGeneResults.size() != multiGeneResults2.size()) {
assertEquals(multiGeneResults.size(), multiGeneResults2.size());
}
for (Long id : multiGeneResults2.keySet()) {
for (CoexpressionValueObject coex : multiGeneResults2.get(id)) {
this.checkResult(coex);
}
}
}
genesWithLinks.add(gene);
}
assertTrue(foundOne);
Map<Long, List<CoexpressionValueObject>> mygeneresults = geneCoexpressionService.findInterCoexpressionRelationships(mouse, EntityUtils.getIds(genesWithLinks), EntityUtils.getIds(ees), 1, false);
if (mygeneresults.isEmpty()) {
// noinspection ConstantConditions // these strange structures are to help with debugger.
assertTrue(!mygeneresults.isEmpty());
}
for (Long id : mygeneresults.keySet()) {
for (CoexpressionValueObject coex : mygeneresults.get(id)) {
this.checkResult(coex);
}
}
assertTrue(maxSupport >= expectedMinimumMaxSupport);
return totalLinks;
}
use of ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject in project Gemma by PavlidisLab.
the class ExpressionDataFileServiceImpl method writeCoexpressionData.
/**
* Loads the probe to probe coexpression link information for a given expression experiment and writes it to disk.
*/
private void writeCoexpressionData(File file, ExpressionExperiment ee) throws IOException {
Taxon tax = expressionExperimentService.getTaxon(ee);
assert tax != null;
Collection<CoexpressionValueObject> geneLinks = gene2geneCoexpressionService.getCoexpression(ee, true);
Date timestamp = new Date(System.currentTimeMillis());
StringBuilder buf = new StringBuilder();
// Write header information
buf.append("# Coexpression data for: ").append(ee.getShortName()).append(" : ").append(ee.getName()).append(" \n");
buf.append("# Generated On: ").append(timestamp).append(" \n");
buf.append("# Links are listed in an arbitrary order with an indication of positive or negative correlation\n");
buf.append(ExpressionDataFileService.DISCLAIMER);
buf.append("GeneSymbol1\tGeneSymbol2\tDirection\tSupport\n");
// Data
for (CoexpressionValueObject link : geneLinks) {
buf.append(link.getQueryGeneSymbol()).append("\t").append(link.getCoexGeneSymbol()).append("\t");
buf.append(link.isPositiveCorrelation() ? "+" : "-" + "\n");
}
// Write coexpression data to file (zipped of course)
try (Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)))) {
writer.write(buf.toString());
}
}
use of ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject in project Gemma by PavlidisLab.
the class GeneCoexpressionSearchServiceImpl method addExtCoexpressionValueObjects.
/**
* Convert CoexpressionValueObjects to CoexpressionValueObjectExts
*
* @param queryGeneIds all those included in the query
*/
private List<CoexpressionValueObjectExt> addExtCoexpressionValueObjects(GeneValueObject queryGene, Collection<CoexpressionValueObject> coexp, boolean queryGenesOnly, Collection<Long> queryGeneIds) {
List<CoexpressionValueObjectExt> results = new ArrayList<>();
Collection<Long> coexpGenes = new HashSet<>();
for (CoexpressionValueObject cvo : coexp) {
coexpGenes.add(cvo.getCoexGeneId());
}
// database hit. loadValueObjects is too slow.
Map<Long, GeneValueObject> coexpedGenes = EntityUtils.getIdMap(geneService.loadValueObjectsByIds(coexpGenes));
for (CoexpressionValueObject cvo : coexp) {
/*
* sanity check, this is sort of debug code. Each link has to contain at least one query gene.
*/
if (queryGenesOnly) {
if (!queryGeneIds.contains(cvo.getCoexGeneId())) {
GeneCoexpressionSearchServiceImpl.log.warn("coexpression for non-query genes obtained unexpectedly when doing queryGenesOnly " + cvo.getCoexGeneId() + " is not a query");
continue;
}
if (!queryGeneIds.contains(cvo.getQueryGeneId())) {
GeneCoexpressionSearchServiceImpl.log.warn("coexpression for non-query genes obtained unexpectedly when doing queryGenesOnly " + cvo.getQueryGeneId() + " is not a query");
continue;
}
}
CoexpressionValueObjectExt ecVo = new CoexpressionValueObjectExt();
ecVo.setQueryGene(queryGene);
GeneValueObject foundGene = coexpedGenes.get(cvo.getCoexGeneId());
if (!queryGeneIds.contains(foundGene.getId())) {
foundGene.setIsQuery(false);
} else {
foundGene.setIsQuery(true);
}
ecVo.setFoundGene(foundGene);
if (cvo.isPositiveCorrelation()) {
ecVo.setPosSupp(cvo.getNumDatasetsSupporting());
} else {
ecVo.setNegSupp(cvo.getNumDatasetsSupporting());
}
// when 'quick', these will not necessarily be set.
ecVo.setNumTestedIn(cvo.getNumDatasetsTestedIn());
ecVo.setSupportingExperiments(cvo.getSupportingDatasets());
ecVo.setSortKey();
results.add(ecVo);
assert ecVo.getPosSupp() > 0 || ecVo.getNegSupp() > 0;
assert ecVo.getFoundGene() != null;
assert ecVo.getQueryGene() != null;
}
return results;
}
use of ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject in project Gemma by PavlidisLab.
the class GeneCoexpressionSearchServiceImpl method doCoexpressionSearch.
/**
* @param genes 1 or more.
* @param stringency; this may be modified to control the number of results, unless "queryGenesOnly" is true.
* @param maxResults per gene, not including the query genes themselves. Ignored if this is 'queryGenesOnly'
* @param queryGenesOnly will be ignored if number of genes is 1.
* @return CoexpressionMetaValueObject, in which the results are already populated and sorted.
*/
private CoexpressionMetaValueObject doCoexpressionSearch(Collection<Long> inputEeIds, Collection<Long> genes, int stringency, final int maxResults, final boolean queryGenesOnly, final boolean quick) {
if (genes.isEmpty()) {
CoexpressionMetaValueObject r = new CoexpressionMetaValueObject();
r.setErrorState("No genes selected");
return r;
}
boolean actuallyUseQueryGeneOnly = queryGenesOnly && genes.size() > 1;
Taxon taxon = this.geneService.load(genes.iterator().next()).getTaxon();
List<ExpressionExperimentValueObject> eevos = this.getFilteredEEVos(inputEeIds, taxon);
CoexpressionMetaValueObject result = this.initValueObject(genes, eevos);
if (eevos.isEmpty()) {
result = new CoexpressionMetaValueObject();
result.setErrorState("No experiments selected");
return result;
}
Collection<Long> eeIds = EntityUtils.getIds(eevos);
Map<Long, List<CoexpressionValueObject>> allCoexpressions;
if (genes.size() > GeneCoexpressionSearchServiceImpl.THRESHOLD_TRIGGER_QUERY_GENES_ONLY) {
if (!actuallyUseQueryGeneOnly) {
GeneCoexpressionSearchServiceImpl.log.info("Switching to 'query genes only'");
}
actuallyUseQueryGeneOnly = true;
}
if (stringency < 1)
stringency = 1;
if (!queryGenesOnly) {
stringency = Math.max(stringency, this.chooseStringency(actuallyUseQueryGeneOnly, eeIds.size(), genes.size()));
GeneCoexpressionSearchServiceImpl.log.info("Stringency set to " + stringency + " based on number of experiments (" + eeIds.size() + ") and genes (" + genes.size() + ") queried");
} else {
GeneCoexpressionSearchServiceImpl.log.info("Query gene only: stringency maintained at requested value=" + stringency);
}
assert stringency >= 1 || eeIds.size() == 1;
// HACK drop the stringency until we get some results.
int stepSize = 3;
while (true) {
if (actuallyUseQueryGeneOnly) {
// note that maxResults is ignored.
if (genes.size() < 2) {
// should be impossible - could assert.
throw new IllegalArgumentException("cannot do inter-gene coexpression search with only one gene");
}
allCoexpressions = coexpressionService.findInterCoexpressionRelationships(taxon, genes, eeIds, stringency, quick);
} else {
allCoexpressions = coexpressionService.findCoexpressionRelationships(taxon, genes, eeIds, stringency, maxResults, quick);
}
int minimum_stringency_for_requery = 2;
if (allCoexpressions.isEmpty() && stringency > minimum_stringency_for_requery) {
// step size completely made up.
stringency -= stepSize;
// keep stringency at least 2.
stringency = Math.max(minimum_stringency_for_requery, stringency);
GeneCoexpressionSearchServiceImpl.log.info("No results, re-querying with stringency=" + stringency);
} else {
// no results.
break;
}
}
GeneCoexpressionSearchServiceImpl.log.info("Final actual stringency used was " + stringency);
result.setQueryStringency(stringency);
result.setQueryGenesOnly(actuallyUseQueryGeneOnly);
Set<Long> queryGeneIds = allCoexpressions.keySet();
assert genes.containsAll(queryGeneIds);
Map<Long, GeneValueObject> idMap = EntityUtils.getIdMap(geneService.loadValueObjectsByIds(queryGeneIds));
int k = 0;
for (Long queryGene : queryGeneIds) {
Collection<CoexpressionValueObject> coexpressions = allCoexpressions.get(queryGene);
List<CoexpressionValueObjectExt> results = this.addExtCoexpressionValueObjects(idMap.get(queryGene), coexpressions, actuallyUseQueryGeneOnly, genes);
result.getResults().addAll(results);
CoexpressionSummaryValueObject summary = new CoexpressionSummaryValueObject(queryGene);
summary.setDatasetsAvailable(eevos.size());
summary.setLinksFound(coexpressions.size());
result.getSummaries().put(queryGene, summary);
if (++k % 100 == 0) {
GeneCoexpressionSearchServiceImpl.log.info("Processed results for " + k + " query genes ...");
}
}
Collections.sort(result.getResults());
// FIXME we might want to suppress this in some situations
if (!queryGenesOnly) {
result.trim();
}
this.populateNodeDegrees(result);
return result;
}
Aggregations