use of ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis in project Gemma by PavlidisLab.
the class LinkAnalysisConfig method toAnalysis.
/**
* @return representation of this analysis (not completely filled in - only the basic parameters)
*/
public CoexpressionAnalysis toAnalysis() {
CoexpressionAnalysis analysis = CoexpressionAnalysis.Factory.newInstance();
Protocol protocol = Protocol.Factory.newInstance();
protocol.setName("Link analysis settings");
protocol.setDescription(this.toString());
analysis.setProtocol(protocol);
return analysis;
}
use of ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis in project Gemma by PavlidisLab.
the class LinkAnalysisPersisterImpl method saveLinks.
/**
* @return how many links were saved
*/
private int saveLinks(LinkAnalysis la, ObjectArrayList links) {
LinkCreator c = this.getLinkCreator(la);
int selfLinksSkipped = 0;
int duplicateLinksSkipped = 0;
Set<Gene> genesWithLinks = new HashSet<>();
Set<NonPersistentNonOrderedCoexpLink> linksForDb = new HashSet<>();
for (int i = 0, n = links.size(); i < n; i++) {
Object val = links.getQuick(i);
if (val == null)
continue;
Link m = (Link) val;
Double w = m.getWeight();
int x = m.getx();
int y = m.gety();
CompositeSequence p1 = la.getProbe(x);
CompositeSequence p2 = la.getProbe(y);
/*
* we have to deal with all the possible genes pairs, if probes map to more than one pair. A single pair of
* probes could result in more than one link. This assumes that preprocessing of the data allowed retention
* of probes that map to more than one gene.
*/
for (Gene g1 : la.getProbeToGeneMap().get(p1)) {
boolean g1HasLinks = false;
for (Gene g2 : la.getProbeToGeneMap().get(p2)) {
if (g1.equals(g2)) {
selfLinksSkipped++;
continue;
}
NonPersistentNonOrderedCoexpLink link = new NonPersistentNonOrderedCoexpLink(this.initCoexp(w, c, g1, g2));
if (linksForDb.contains(link)) {
/*
* This happens if there is more than one probe retained for a gene (or both genes) and the
* coexpression shows up more than once (different pair of probes, same genes).
*/
if (LinkAnalysisPersisterImpl.log.isDebugEnabled())
LinkAnalysisPersisterImpl.log.debug("Skipping duplicate: " + link);
duplicateLinksSkipped++;
continue;
/*
* FIXME what do we do when a pair of genes is both positively and negatively correlated in the
* same experiment? Currently they are both kept, but if we go to a completely gene-based
* analysis we wouldn't do that, so it's an inconsistency;
*/
}
if (LinkAnalysisPersisterImpl.log.isDebugEnabled()) {
LinkAnalysisPersisterImpl.log.debug("Adding : " + link);
}
linksForDb.add(link);
g1HasLinks = true;
genesWithLinks.add(g2);
}
if (g1HasLinks)
genesWithLinks.add(g1);
}
if (i > 0 && i % 200000 == 0) {
LinkAnalysisPersisterImpl.log.info(i + " links checked");
}
}
if (selfLinksSkipped > 0) {
LinkAnalysisPersisterImpl.log.info(selfLinksSkipped + " self-links skipped");
}
if (duplicateLinksSkipped > 0) {
LinkAnalysisPersisterImpl.log.info(duplicateLinksSkipped + " duplicate links skipped (likely cause: more than one probe supporting the same link)");
}
if (linksForDb.isEmpty()) {
throw new RuntimeException("No links left!");
}
LinkAnalysisPersisterImpl.log.info(linksForDb.size() + " links ready for saving to db");
if (!la.getGenesTested().containsAll(genesWithLinks))
throw new AssertionError();
/*
* Do the actual database writing. It's a good idea to do this part in one (big) transaction. Note that even if
* there are no links, we still update the "genes tested" information.
*/
this.gene2GeneCoexpressionService.createOrUpdate(la.getExpressionExperiment(), new ArrayList<>(linksForDb), c, la.getGenesTested());
/*
* Update the meta-data about the analysis
*/
CoexpressionAnalysis analysisObj = la.getAnalysisObj();
assert analysisObj.getId() != null;
analysisObj.setNumberOfElementsAnalyzed(la.getGenesTested().size());
analysisObj.setNumberOfLinks(linksForDb.size());
coexpressionAnalysisService.update(analysisObj);
return linksForDb.size();
/*
* Updating node degree cannot be done here, since we need to know the support. We have to do that
* "periodically" if we want it available in summary form.
*/
}
use of ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis in project Gemma by PavlidisLab.
the class AnalysisUtilServiceImpl method deleteOldAnalyses.
@Override
public boolean deleteOldAnalyses(ExpressionExperiment expExp) {
boolean removedAll = true;
AnalysisUtilServiceImpl.log.info("Removing old analyses for " + expExp);
if (principalComponentAnalysisService.loadForExperiment(expExp) != null) {
try {
principalComponentAnalysisService.removeForExperiment(expExp);
} catch (Exception e) {
AnalysisUtilServiceImpl.log.warn("Could not remove pca for: " + expExp);
removedAll = false;
}
}
for (DifferentialExpressionAnalysis diff : differentialExpressionAnalysisService.findByInvestigation(expExp)) {
try {
differentialExpressionAnalysisService.remove(diff);
} catch (Exception e) {
AnalysisUtilServiceImpl.log.warn("Could not remove analysis: " + diff + ": " + e.getMessage());
removedAll = false;
}
}
for (CoexpressionAnalysis coex : coexpressionAnalysisService.findByInvestigation(expExp)) {
try {
coexpressionAnalysisService.remove(coex);
} catch (Exception e) {
AnalysisUtilServiceImpl.log.warn("Could not remove analysis: " + coex + ": " + e.getMessage());
removedAll = false;
}
}
return removedAll;
}
use of ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis in project Gemma by PavlidisLab.
the class LinkAnalysisServiceTest method testLoadAnalyzeSaveAndCoexpSearch.
@Test
public void testLoadAnalyzeSaveAndCoexpSearch() {
ee = this.getTestPersistentCompleteExpressionExperimentWithSequences();
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
tableMaintenanceUtil.disableEmail();
tableMaintenanceUtil.updateGene2CsEntries();
linkAnalysisConfig.setCdfCut(0.1);
linkAnalysisConfig.setSingularThreshold(SingularThreshold.cdfcut);
linkAnalysisConfig.setProbeDegreeThreshold(25);
linkAnalysisConfig.setCheckCorrelationDistribution(false);
linkAnalysisConfig.setCheckForBatchEffect(false);
filterConfig.setIgnoreMinimumSampleThreshold(true);
// first time.
// noinspection UnusedAssignment // we still want to do this for the testing sake
LinkAnalysis la = linkAnalysisService.process(ee, filterConfig, linkAnalysisConfig);
// test remove is clean; to check this properly requires checking the db.
linkAnalysisPersisterService.deleteAnalyses(ee);
this.checkUnsupportedLinksHaveNoSupport();
assertEquals(0, geneCoexpressionService.getCoexpression(ee, true).size());
la = linkAnalysisService.process(ee, filterConfig, linkAnalysisConfig);
CoexpressionAnalysis analysisObj = la.getAnalysisObj();
assertEquals(151, analysisObj.getNumberOfElementsAnalyzed().intValue());
assertTrue(analysisObj.getNumberOfLinks() > 0);
assertNotNull(analysisObj.getCoexpCorrelationDistribution());
Collection<BioAssaySet> ees = new HashSet<>();
ees.add(ee);
this.updateNodeDegree();
int totalLinksFirstPass = this.checkResults(ees, 1);
// should be ~1140.
assertTrue(totalLinksFirstPass > 1000);
// test redo
linkAnalysisService.process(ee, filterConfig, linkAnalysisConfig);
this.updateNodeDegree();
int totalLinksRedo = this.checkResults(ees, 1);
assertEquals(totalLinksFirstPass, totalLinksRedo);
// now add another experiment that has overlapping links (same data...
Map<CompositeSequence, byte[]> dataMap = new HashMap<>();
ee = eeService.thaw(ee);
for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
dataMap.put(v.getDesignElement(), v.getData());
}
ExpressionExperiment ee2 = this.getTestPersistentCompleteExpressionExperimentWithSequences(ee);
// eeService.thawRawAndProcessed( ee2 );
for (RawExpressionDataVector v : ee2.getRawExpressionDataVectors()) {
assert dataMap.get(v.getDesignElement()) != null;
v.setData(dataMap.get(v.getDesignElement()));
}
eeService.update(ee2);
processedExpressionDataVectorService.computeProcessedExpressionData(ee2);
linkAnalysisService.process(ee2, filterConfig, linkAnalysisConfig);
this.updateNodeDegree();
// expect to get at least one links with support >1
ees.add(ee2);
this.checkResults(ees, 2);
}
use of ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis in project Gemma by PavlidisLab.
the class CoexpressionAnalysisDaoImpl method findByInvestigations.
@Override
public Map<Investigation, Collection<CoexpressionAnalysis>> findByInvestigations(Collection<Investigation> investigations) {
Map<Investigation, Collection<CoexpressionAnalysis>> results = new HashMap<>();
for (Investigation ee : investigations) {
Collection<CoexpressionAnalysis> ae = this.findByInvestigation(ee);
results.put(ee, ae);
}
return results;
}
Aggregations