use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class LegacySessionLoadTest method test_1_LoadedLegacyData.
@Test
@SessionFile("em_session_2.2.cys")
public void test_1_LoadedLegacyData() throws Exception {
EnrichmentMap map = getEnrichmentMap();
assertEquals("EM1_Enrichment Map", map.getName());
CyNetwork network = networkManager.getNetwork(map.getNetworkID());
assertNotNull(network);
assertEquals(1, map.getDataSetCount());
assertEquals(14067, map.getNumberOfGenes());
assertEquals(14067, map.getAllGenes().size());
// Number of edges: 3339 - that's how many geneset similarity objects there should be!!!
CyTable edgeTable = network.getDefaultEdgeTable();
assertEquals(3339, edgeTable.getRowCount());
EMCreationParameters params = map.getParams();
String prefix = params.getAttributePrefix();
assertEquals("EM1_", prefix);
assertEquals(0.5, params.getCombinedConstant(), 0.0);
assertFalse(params.isEMgmt());
assertEquals("Geneset_Overlap", params.getEnrichmentEdgeType());
assertTrue(params.isFDR());
assertEquals(GreatFilter.HYPER, params.getGreatFilter());
assertEquals(0.005, params.getPvalue(), 0.0);
assertEquals(1.0, params.getPvalueMin(), 0.0);
assertEquals(0.1, params.getQvalue(), 0.0);
assertEquals(1.0, params.getQvalueMin(), 0.0);
assertEquals(0.5, params.getSimilarityCutoff(), 0.0);
assertEquals(SimilarityMetric.OVERLAP, params.getSimilarityMetric());
// assertFalse(params.isDistinctExpressionSets());
String geneset1 = "RESOLUTION OF SISTER CHROMATID COHESION%REACTOME%REACT_150425.2";
String geneset2 = "CHROMOSOME, CENTROMERIC REGION%GO%GO:0000775";
Collection<CyRow> rows = edgeTable.getMatchingRows(CyNetwork.NAME, geneset1 + " (Geneset_Overlap) " + geneset2);
assertEquals(1, rows.size());
CyRow row = rows.iterator().next();
assertEquals("Geneset_Overlap", row.get(CyEdge.INTERACTION, String.class));
assertEquals(0.6097560975609756, EMStyleBuilder.Columns.EDGE_SIMILARITY_COEFF.get(row, prefix), 0.0);
EMDataSet dataset = map.getDataSet("Dataset 1");
assertNotNull(dataset);
assertSame(map, dataset.getMap());
assertEquals(Method.GSEA, dataset.getMethod());
assertEquals(12653, dataset.getDataSetGenes().size());
assertEquals(389, dataset.getGeneSetsOfInterest().getGeneSets().size());
// assertEquals(17259, dataset.getSetofgenesets().getGenesets().size()); // MKTODO why? what is this used for
assertEndsWith(dataset.getSetOfGeneSets().getFilename(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
for (long suid : dataset.getNodeSuids()) {
assertNotNull(network.getNode(suid));
}
GeneSet geneset = dataset.getGeneSetsOfInterest().getGeneSets().get("NCRNA PROCESSING%GO%GO:0034470");
assertEquals(88, geneset.getGenes().size());
assertEquals("NCRNA PROCESSING%GO%GO:0034470", geneset.getName());
assertEquals("ncRNA processing", geneset.getDescription());
assertEquals(Optional.of("GO"), geneset.getSource());
SetOfEnrichmentResults enrichments = dataset.getEnrichments();
assertEquals(4756, enrichments.getEnrichments().size());
assertEndsWith(enrichments.getFilename1(), "gsea_report_for_ES12_1473194913081.xls");
assertEndsWith(enrichments.getFilename2(), "gsea_report_for_NT12_1473194913081.xls");
assertEquals("ES12", enrichments.getPhenotype1());
assertEquals("NT12", enrichments.getPhenotype2());
EnrichmentResult result = enrichments.getEnrichments().get("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201");
assertTrue(result instanceof GSEAResult);
GSEAResult gseaResult = (GSEAResult) result;
assertEquals("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201", gseaResult.getName());
assertEquals(0.42844063, gseaResult.getES(), 0.0);
assertEquals(0.45225498, gseaResult.getFdrqvalue(), 0.0);
assertEquals(1.0, gseaResult.getFwerqvalue(), 0.0);
assertEquals(23, gseaResult.getGsSize());
assertEquals(1.1938541, gseaResult.getNES(), 0.0);
assertEquals(0.2457786, gseaResult.getPvalue(), 0.0);
assertEquals(4689, gseaResult.getRankAtMax());
assertEquals(Optional.of("GO"), gseaResult.getSource());
GeneExpressionMatrix expressions = dataset.getExpressionSets();
assertEquals(20326, expressions.getExpressionUniverse());
assertEquals(3.686190609, expressions.getClosesttoZero(), 0.0);
// assertEndsWith(expressions.getFilename(), "MCF7_ExprMx_v2_names.gct");
assertEquals(15380.42388, expressions.getMaxExpression(), 0.0);
assertEquals(3.686190609, expressions.getMinExpression(), 0.0);
assertEquals(20, expressions.getNumConditions());
assertEquals(12653, expressions.getExpressionMatrix().size());
assertEquals(12653, expressions.getExpressionMatrix_rowNormalized().size());
GeneExpression expression = expressions.getExpressionMatrix().get(0);
assertEquals("MOCOS", expression.getName());
assertEquals("MOCOS (molybdenum cofactor sulfurase)", expression.getDescription());
assertEquals(18, expression.getExpression().length);
Ranking ranking = expressions.getRanks().get("GSEARanking");
assertEquals(12653, ranking.getAllRanks().size());
assertEquals(12653, ranking.getRanking().size());
Rank rank = ranking.getRanking().get(0);
assertEquals("MOCOS", rank.getName());
assertEquals(1238, rank.getRank().intValue());
assertEquals(0.54488367, rank.getScore(), 0.0);
DataSetFiles files = dataset.getDataSetFiles();
assertEndsWith(files.getClassFile(), "ES_NT.cls");
assertEndsWith(files.getEnrichmentFileName1(), "gsea_report_for_ES12_1473194913081.xls");
assertEndsWith(files.getEnrichmentFileName2(), "gsea_report_for_NT12_1473194913081.xls");
// assertEndsWith(files.getExpressionFileName(), "MCF7_ExprMx_v2_names.gct");
assertEndsWith(files.getGMTFileName(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
assertEndsWith(files.getGseaHtmlReportFile(), "estrogen_treatment_12hr_gsea_enrichment_results.Gsea.1473194913081/index.html");
assertEndsWith(files.getRankedFile(), "ranked_gene_list_ES12_versus_NT12_1473194913081.xls");
assertEquals("ES12", files.getPhenotype1());
assertEquals("NT12", files.getPhenotype2());
}
use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class CreateDiseaseSignatureNetworkTask method passesCutoff.
/**
* Why not put this in CreateDiseaseSignatureTaskParallel... don't even create the GenesetSimilarity object if it fails!!!
* @param similarityKey
* @return
*/
private boolean passesCutoff(SimilarityKey similarityKey, EMSignatureDataSet sigDataSet) {
GenesetSimilarity similarity = geneSetSimilarities.get(similarityKey);
PostAnalysisFilterParameters filterParams = params.getRankTestParameters();
switch(filterParams.getType()) {
case HYPERGEOM:
return similarity.getHypergeomPValue() <= filterParams.getValue();
case MANN_WHIT_TWO_SIDED:
return !similarity.isMannWhitMissingRanks() && similarity.getMannWhitPValueTwoSided() <= filterParams.getValue();
case MANN_WHIT_GREATER:
return !similarity.isMannWhitMissingRanks() && similarity.getMannWhitPValueGreater() <= filterParams.getValue();
case MANN_WHIT_LESS:
return !similarity.isMannWhitMissingRanks() && similarity.getMannWhitPValueLess() <= filterParams.getValue();
case NUMBER:
return similarity.getSizeOfOverlap() >= filterParams.getValue();
case PERCENT:
EMDataSet dataSet = map.getDataSet(similarityKey.getName());
String enrGeneSetName = similarity.getGeneset2Name();
GeneSet enrGeneset = dataSet.getGeneSetsOfInterest().getGeneSetByName(enrGeneSetName);
int enrGenesetSize = enrGeneset.getGenes().size();
double relative_per = (double) similarity.getSizeOfOverlap() / (double) enrGenesetSize;
return relative_per >= filterParams.getValue() / 100.0;
case SPECIFIC:
String hubName = similarity.getGeneset1Name();
GeneSet sigGeneSet = sigDataSet.getGeneSetsOfInterest().getGeneSetByName(hubName);
int sigGeneSetSize = sigGeneSet.getGenes().size();
double relativePer2 = (double) similarity.getSizeOfOverlap() / (double) sigGeneSetSize;
return relativePer2 >= filterParams.getValue() / 100.0;
default:
return false;
}
}
use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class CreateDiseaseSignatureTaskParallel method startBuildDiseaseSignatureParallel.
/**
* Returns immediately, need to wait on the executor to join all threads.
*/
private Map<SimilarityKey, GenesetSimilarity> startBuildDiseaseSignatureParallel(TaskMonitor tm, ExecutorService executor, Set<String> enrichmentGeneSetNames, Map<String, GeneSet> signatureGeneSets) {
DiscreteTaskMonitor taskMonitor = discreteTaskMonitor(tm, signatureGeneSets.size());
// Gene universe is all enrichment genes in the map
Set<Integer> geneUniverse = map.getAllEnrichmentGenes();
Map<SimilarityKey, GenesetSimilarity> geneSetSimilarities = new ConcurrentHashMap<>();
for (String hubName : signatureGeneSets.keySet()) {
GeneSet sigGeneSet = signatureGeneSets.get(hubName);
Set<Integer> sigGenesInUniverse = Sets.intersection(sigGeneSet.getGenes(), geneUniverse);
// Compute similarities in batches
executor.execute(() -> {
loop: for (String geneSetName : enrichmentGeneSetNames) {
for (EMDataSet dataSet : dataSets) {
if (Thread.interrupted())
break loop;
GeneSet enrGeneSet = dataSet.getSetOfGeneSets().getGeneSetByName(geneSetName);
if (enrGeneSet != null) {
// restrict to a common gene universe
Set<Integer> enrGenes = Sets.intersection(enrGeneSet.getGenes(), geneUniverse);
Set<Integer> union = Sets.union(sigGeneSet.getGenes(), enrGenes);
Set<Integer> intersection = Sets.intersection(sigGenesInUniverse, enrGenes);
if (!intersection.isEmpty()) {
double coeffecient = ComputeSimilarityTaskParallel.computeSimilarityCoeffecient(map.getParams(), intersection, union, sigGeneSet.getGenes(), enrGenes);
GenesetSimilarity comparison = new GenesetSimilarity(hubName, geneSetName, coeffecient, INTERACTION, intersection);
PostAnalysisFilterType filterType = params.getRankTestParameters().getType();
switch(filterType) {
case HYPERGEOM:
int hyperUniverseSize1 = getHypergeometricUniverseSize(dataSet);
hypergeometric(hyperUniverseSize1, sigGenesInUniverse, enrGenes, intersection, comparison);
break;
case MANN_WHIT_TWO_SIDED:
case MANN_WHIT_GREATER:
case MANN_WHIT_LESS:
mannWhitney(intersection, comparison, dataSet);
default:
// want mann-whit to fall through
// #70 calculate hypergeometric also
int hyperUniverseSize2 = map.getNumberOfGenes();
hypergeometric(hyperUniverseSize2, sigGenesInUniverse, enrGenes, intersection, comparison);
break;
}
SimilarityKey key = new SimilarityKey(hubName, geneSetName, INTERACTION, dataSet.getName());
geneSetSimilarities.put(key, comparison);
}
}
}
}
taskMonitor.inc();
});
}
return geneSetSimilarities;
}
use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class CreateDiseaseSignatureTaskParallel method run.
@Override
public void run(TaskMonitor tm) throws InterruptedException {
int cpus = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(cpus);
// Compare enrichment gene sets to signature gene sets
Set<String> enrichmentGeneSetNames = getEnrichmentGeneSetNames();
Map<String, GeneSet> signatureGeneSets = getSignatureGeneSets();
handleDuplicateNames(enrichmentGeneSetNames, signatureGeneSets);
Map<SimilarityKey, GenesetSimilarity> geneSetSimilarities = startBuildDiseaseSignatureParallel(tm, executor, enrichmentGeneSetNames, signatureGeneSets);
// Support cancellation
Timer timer = new Timer();
timer.scheduleAtFixedRate(new TimerTask() {
public void run() {
if (cancelled) {
executor.shutdownNow();
}
}
}, 0, 1000);
executor.shutdown();
executor.awaitTermination(3, TimeUnit.HOURS);
timer.cancel();
// create the network here
if (!cancelled) {
Task networkTask = networkTaskFactory.create(map, params, signatureGeneSets, geneSetSimilarities);
insertTasksAfterCurrentTask(networkTask);
}
}
use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class GMTFileReaderTask method parse.
public void parse() throws IOException, InterruptedException {
try (BufferedReader reader = new BufferedReader(new FileReader(gmtFileName))) {
for (String line; (line = reader.readLine()) != null; ) {
if (cancelled) {
throw new InterruptedException();
}
GeneSet gs = readGeneSet(map, line);
if (gs != null && setOfgenesets != null) {
Map<String, GeneSet> genesets = setOfgenesets.getGeneSets();
genesets.put(gs.getName(), gs);
}
}
}
}
Aggregations