Search in sources :

Example 6 with GeneSet

use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.

the class LegacySessionLoadTest method test_1_LoadedLegacyData.

@Test
@SessionFile("em_session_2.2.cys")
public void test_1_LoadedLegacyData() throws Exception {
    EnrichmentMap map = getEnrichmentMap();
    assertEquals("EM1_Enrichment Map", map.getName());
    CyNetwork network = networkManager.getNetwork(map.getNetworkID());
    assertNotNull(network);
    assertEquals(1, map.getDataSetCount());
    assertEquals(14067, map.getNumberOfGenes());
    assertEquals(14067, map.getAllGenes().size());
    // Number of edges: 3339 - that's how many geneset similarity objects there should be!!!
    CyTable edgeTable = network.getDefaultEdgeTable();
    assertEquals(3339, edgeTable.getRowCount());
    EMCreationParameters params = map.getParams();
    String prefix = params.getAttributePrefix();
    assertEquals("EM1_", prefix);
    assertEquals(0.5, params.getCombinedConstant(), 0.0);
    assertFalse(params.isEMgmt());
    assertEquals("Geneset_Overlap", params.getEnrichmentEdgeType());
    assertTrue(params.isFDR());
    assertEquals(GreatFilter.HYPER, params.getGreatFilter());
    assertEquals(0.005, params.getPvalue(), 0.0);
    assertEquals(1.0, params.getPvalueMin(), 0.0);
    assertEquals(0.1, params.getQvalue(), 0.0);
    assertEquals(1.0, params.getQvalueMin(), 0.0);
    assertEquals(0.5, params.getSimilarityCutoff(), 0.0);
    assertEquals(SimilarityMetric.OVERLAP, params.getSimilarityMetric());
    //		assertFalse(params.isDistinctExpressionSets());
    String geneset1 = "RESOLUTION OF SISTER CHROMATID COHESION%REACTOME%REACT_150425.2";
    String geneset2 = "CHROMOSOME, CENTROMERIC REGION%GO%GO:0000775";
    Collection<CyRow> rows = edgeTable.getMatchingRows(CyNetwork.NAME, geneset1 + " (Geneset_Overlap) " + geneset2);
    assertEquals(1, rows.size());
    CyRow row = rows.iterator().next();
    assertEquals("Geneset_Overlap", row.get(CyEdge.INTERACTION, String.class));
    assertEquals(0.6097560975609756, EMStyleBuilder.Columns.EDGE_SIMILARITY_COEFF.get(row, prefix), 0.0);
    EMDataSet dataset = map.getDataSet("Dataset 1");
    assertNotNull(dataset);
    assertSame(map, dataset.getMap());
    assertEquals(Method.GSEA, dataset.getMethod());
    assertEquals(12653, dataset.getDataSetGenes().size());
    assertEquals(389, dataset.getGeneSetsOfInterest().getGeneSets().size());
    //		assertEquals(17259, dataset.getSetofgenesets().getGenesets().size()); // MKTODO why? what is this used for
    assertEndsWith(dataset.getSetOfGeneSets().getFilename(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
    for (long suid : dataset.getNodeSuids()) {
        assertNotNull(network.getNode(suid));
    }
    GeneSet geneset = dataset.getGeneSetsOfInterest().getGeneSets().get("NCRNA PROCESSING%GO%GO:0034470");
    assertEquals(88, geneset.getGenes().size());
    assertEquals("NCRNA PROCESSING%GO%GO:0034470", geneset.getName());
    assertEquals("ncRNA processing", geneset.getDescription());
    assertEquals(Optional.of("GO"), geneset.getSource());
    SetOfEnrichmentResults enrichments = dataset.getEnrichments();
    assertEquals(4756, enrichments.getEnrichments().size());
    assertEndsWith(enrichments.getFilename1(), "gsea_report_for_ES12_1473194913081.xls");
    assertEndsWith(enrichments.getFilename2(), "gsea_report_for_NT12_1473194913081.xls");
    assertEquals("ES12", enrichments.getPhenotype1());
    assertEquals("NT12", enrichments.getPhenotype2());
    EnrichmentResult result = enrichments.getEnrichments().get("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201");
    assertTrue(result instanceof GSEAResult);
    GSEAResult gseaResult = (GSEAResult) result;
    assertEquals("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201", gseaResult.getName());
    assertEquals(0.42844063, gseaResult.getES(), 0.0);
    assertEquals(0.45225498, gseaResult.getFdrqvalue(), 0.0);
    assertEquals(1.0, gseaResult.getFwerqvalue(), 0.0);
    assertEquals(23, gseaResult.getGsSize());
    assertEquals(1.1938541, gseaResult.getNES(), 0.0);
    assertEquals(0.2457786, gseaResult.getPvalue(), 0.0);
    assertEquals(4689, gseaResult.getRankAtMax());
    assertEquals(Optional.of("GO"), gseaResult.getSource());
    GeneExpressionMatrix expressions = dataset.getExpressionSets();
    assertEquals(20326, expressions.getExpressionUniverse());
    assertEquals(3.686190609, expressions.getClosesttoZero(), 0.0);
    //		assertEndsWith(expressions.getFilename(), "MCF7_ExprMx_v2_names.gct");
    assertEquals(15380.42388, expressions.getMaxExpression(), 0.0);
    assertEquals(3.686190609, expressions.getMinExpression(), 0.0);
    assertEquals(20, expressions.getNumConditions());
    assertEquals(12653, expressions.getExpressionMatrix().size());
    assertEquals(12653, expressions.getExpressionMatrix_rowNormalized().size());
    GeneExpression expression = expressions.getExpressionMatrix().get(0);
    assertEquals("MOCOS", expression.getName());
    assertEquals("MOCOS (molybdenum cofactor sulfurase)", expression.getDescription());
    assertEquals(18, expression.getExpression().length);
    Ranking ranking = expressions.getRanks().get("GSEARanking");
    assertEquals(12653, ranking.getAllRanks().size());
    assertEquals(12653, ranking.getRanking().size());
    Rank rank = ranking.getRanking().get(0);
    assertEquals("MOCOS", rank.getName());
    assertEquals(1238, rank.getRank().intValue());
    assertEquals(0.54488367, rank.getScore(), 0.0);
    DataSetFiles files = dataset.getDataSetFiles();
    assertEndsWith(files.getClassFile(), "ES_NT.cls");
    assertEndsWith(files.getEnrichmentFileName1(), "gsea_report_for_ES12_1473194913081.xls");
    assertEndsWith(files.getEnrichmentFileName2(), "gsea_report_for_NT12_1473194913081.xls");
    //		assertEndsWith(files.getExpressionFileName(), "MCF7_ExprMx_v2_names.gct");
    assertEndsWith(files.getGMTFileName(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
    assertEndsWith(files.getGseaHtmlReportFile(), "estrogen_treatment_12hr_gsea_enrichment_results.Gsea.1473194913081/index.html");
    assertEndsWith(files.getRankedFile(), "ranked_gene_list_ES12_versus_NT12_1473194913081.xls");
    assertEquals("ES12", files.getPhenotype1());
    assertEquals("NT12", files.getPhenotype2());
}
Also used : EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) EMCreationParameters(org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters) GSEAResult(org.baderlab.csplugins.enrichmentmap.model.GSEAResult) CyNetwork(org.cytoscape.model.CyNetwork) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) CyRow(org.cytoscape.model.CyRow) GeneExpressionMatrix(org.baderlab.csplugins.enrichmentmap.model.GeneExpressionMatrix) CyTable(org.cytoscape.model.CyTable) Ranking(org.baderlab.csplugins.enrichmentmap.model.Ranking) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) GeneExpression(org.baderlab.csplugins.enrichmentmap.model.GeneExpression) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) SetOfEnrichmentResults(org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults) BaseIntegrationTest(org.baderlab.csplugins.enrichmentmap.integration.BaseIntegrationTest) Test(org.junit.Test) SessionFile(org.baderlab.csplugins.enrichmentmap.integration.SessionFile)

Example 7 with GeneSet

use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.

the class CreateDiseaseSignatureNetworkTask method passesCutoff.

/**
	 * Why not put this in CreateDiseaseSignatureTaskParallel... don't even create the GenesetSimilarity object if it fails!!!
	 * @param similarityKey
	 * @return
	 */
private boolean passesCutoff(SimilarityKey similarityKey, EMSignatureDataSet sigDataSet) {
    GenesetSimilarity similarity = geneSetSimilarities.get(similarityKey);
    PostAnalysisFilterParameters filterParams = params.getRankTestParameters();
    switch(filterParams.getType()) {
        case HYPERGEOM:
            return similarity.getHypergeomPValue() <= filterParams.getValue();
        case MANN_WHIT_TWO_SIDED:
            return !similarity.isMannWhitMissingRanks() && similarity.getMannWhitPValueTwoSided() <= filterParams.getValue();
        case MANN_WHIT_GREATER:
            return !similarity.isMannWhitMissingRanks() && similarity.getMannWhitPValueGreater() <= filterParams.getValue();
        case MANN_WHIT_LESS:
            return !similarity.isMannWhitMissingRanks() && similarity.getMannWhitPValueLess() <= filterParams.getValue();
        case NUMBER:
            return similarity.getSizeOfOverlap() >= filterParams.getValue();
        case PERCENT:
            EMDataSet dataSet = map.getDataSet(similarityKey.getName());
            String enrGeneSetName = similarity.getGeneset2Name();
            GeneSet enrGeneset = dataSet.getGeneSetsOfInterest().getGeneSetByName(enrGeneSetName);
            int enrGenesetSize = enrGeneset.getGenes().size();
            double relative_per = (double) similarity.getSizeOfOverlap() / (double) enrGenesetSize;
            return relative_per >= filterParams.getValue() / 100.0;
        case SPECIFIC:
            String hubName = similarity.getGeneset1Name();
            GeneSet sigGeneSet = sigDataSet.getGeneSetsOfInterest().getGeneSetByName(hubName);
            int sigGeneSetSize = sigGeneSet.getGenes().size();
            double relativePer2 = (double) similarity.getSizeOfOverlap() / (double) sigGeneSetSize;
            return relativePer2 >= filterParams.getValue() / 100.0;
        default:
            return false;
    }
}
Also used : PostAnalysisFilterParameters(org.baderlab.csplugins.enrichmentmap.model.PostAnalysisFilterParameters) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) GenesetSimilarity(org.baderlab.csplugins.enrichmentmap.model.GenesetSimilarity) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet)

Example 8 with GeneSet

use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.

the class CreateDiseaseSignatureTaskParallel method startBuildDiseaseSignatureParallel.

/**
	 * Returns immediately, need to wait on the executor to join all threads.
	 */
private Map<SimilarityKey, GenesetSimilarity> startBuildDiseaseSignatureParallel(TaskMonitor tm, ExecutorService executor, Set<String> enrichmentGeneSetNames, Map<String, GeneSet> signatureGeneSets) {
    DiscreteTaskMonitor taskMonitor = discreteTaskMonitor(tm, signatureGeneSets.size());
    // Gene universe is all enrichment genes in the map
    Set<Integer> geneUniverse = map.getAllEnrichmentGenes();
    Map<SimilarityKey, GenesetSimilarity> geneSetSimilarities = new ConcurrentHashMap<>();
    for (String hubName : signatureGeneSets.keySet()) {
        GeneSet sigGeneSet = signatureGeneSets.get(hubName);
        Set<Integer> sigGenesInUniverse = Sets.intersection(sigGeneSet.getGenes(), geneUniverse);
        // Compute similarities in batches
        executor.execute(() -> {
            loop: for (String geneSetName : enrichmentGeneSetNames) {
                for (EMDataSet dataSet : dataSets) {
                    if (Thread.interrupted())
                        break loop;
                    GeneSet enrGeneSet = dataSet.getSetOfGeneSets().getGeneSetByName(geneSetName);
                    if (enrGeneSet != null) {
                        // restrict to a common gene universe
                        Set<Integer> enrGenes = Sets.intersection(enrGeneSet.getGenes(), geneUniverse);
                        Set<Integer> union = Sets.union(sigGeneSet.getGenes(), enrGenes);
                        Set<Integer> intersection = Sets.intersection(sigGenesInUniverse, enrGenes);
                        if (!intersection.isEmpty()) {
                            double coeffecient = ComputeSimilarityTaskParallel.computeSimilarityCoeffecient(map.getParams(), intersection, union, sigGeneSet.getGenes(), enrGenes);
                            GenesetSimilarity comparison = new GenesetSimilarity(hubName, geneSetName, coeffecient, INTERACTION, intersection);
                            PostAnalysisFilterType filterType = params.getRankTestParameters().getType();
                            switch(filterType) {
                                case HYPERGEOM:
                                    int hyperUniverseSize1 = getHypergeometricUniverseSize(dataSet);
                                    hypergeometric(hyperUniverseSize1, sigGenesInUniverse, enrGenes, intersection, comparison);
                                    break;
                                case MANN_WHIT_TWO_SIDED:
                                case MANN_WHIT_GREATER:
                                case MANN_WHIT_LESS:
                                    mannWhitney(intersection, comparison, dataSet);
                                default:
                                    // want mann-whit to fall through
                                    // #70 calculate hypergeometric also
                                    int hyperUniverseSize2 = map.getNumberOfGenes();
                                    hypergeometric(hyperUniverseSize2, sigGenesInUniverse, enrGenes, intersection, comparison);
                                    break;
                            }
                            SimilarityKey key = new SimilarityKey(hubName, geneSetName, INTERACTION, dataSet.getName());
                            geneSetSimilarities.put(key, comparison);
                        }
                    }
                }
            }
            taskMonitor.inc();
        });
    }
    return geneSetSimilarities;
}
Also used : DiscreteTaskMonitor(org.baderlab.csplugins.enrichmentmap.util.DiscreteTaskMonitor) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) Set(java.util.Set) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) PostAnalysisFilterType(org.baderlab.csplugins.enrichmentmap.model.PostAnalysisFilterType) SimilarityKey(org.baderlab.csplugins.enrichmentmap.model.SimilarityKey) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) GenesetSimilarity(org.baderlab.csplugins.enrichmentmap.model.GenesetSimilarity) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet)

Example 9 with GeneSet

use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.

the class CreateDiseaseSignatureTaskParallel method run.

@Override
public void run(TaskMonitor tm) throws InterruptedException {
    int cpus = Runtime.getRuntime().availableProcessors();
    ExecutorService executor = Executors.newFixedThreadPool(cpus);
    // Compare enrichment gene sets to signature gene sets
    Set<String> enrichmentGeneSetNames = getEnrichmentGeneSetNames();
    Map<String, GeneSet> signatureGeneSets = getSignatureGeneSets();
    handleDuplicateNames(enrichmentGeneSetNames, signatureGeneSets);
    Map<SimilarityKey, GenesetSimilarity> geneSetSimilarities = startBuildDiseaseSignatureParallel(tm, executor, enrichmentGeneSetNames, signatureGeneSets);
    // Support cancellation
    Timer timer = new Timer();
    timer.scheduleAtFixedRate(new TimerTask() {

        public void run() {
            if (cancelled) {
                executor.shutdownNow();
            }
        }
    }, 0, 1000);
    executor.shutdown();
    executor.awaitTermination(3, TimeUnit.HOURS);
    timer.cancel();
    // create the network here
    if (!cancelled) {
        Task networkTask = networkTaskFactory.create(map, params, signatureGeneSets, geneSetSimilarities);
        insertTasksAfterCurrentTask(networkTask);
    }
}
Also used : Task(org.cytoscape.work.Task) AbstractTask(org.cytoscape.work.AbstractTask) TimerTask(java.util.TimerTask) Timer(java.util.Timer) TimerTask(java.util.TimerTask) ExecutorService(java.util.concurrent.ExecutorService) SimilarityKey(org.baderlab.csplugins.enrichmentmap.model.SimilarityKey) GenesetSimilarity(org.baderlab.csplugins.enrichmentmap.model.GenesetSimilarity) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet)

Example 10 with GeneSet

use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.

the class GMTFileReaderTask method parse.

public void parse() throws IOException, InterruptedException {
    try (BufferedReader reader = new BufferedReader(new FileReader(gmtFileName))) {
        for (String line; (line = reader.readLine()) != null; ) {
            if (cancelled) {
                throw new InterruptedException();
            }
            GeneSet gs = readGeneSet(map, line);
            if (gs != null && setOfgenesets != null) {
                Map<String, GeneSet> genesets = setOfgenesets.getGeneSets();
                genesets.put(gs.getName(), gs);
            }
        }
    }
}
Also used : BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet)

Aggregations

GeneSet (org.baderlab.csplugins.enrichmentmap.model.GeneSet)14 EnrichmentMap (org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap)8 EMDataSet (org.baderlab.csplugins.enrichmentmap.model.EMDataSet)7 EnrichmentResult (org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult)7 GenericResult (org.baderlab.csplugins.enrichmentmap.model.GenericResult)5 NullTaskMonitor (org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor)5 ImmutableSet (com.google.common.collect.ImmutableSet)4 GenesetSimilarity (org.baderlab.csplugins.enrichmentmap.model.GenesetSimilarity)4 SetOfEnrichmentResults (org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults)4 Set (java.util.Set)3 Ranking (org.baderlab.csplugins.enrichmentmap.model.Ranking)3 SimilarityKey (org.baderlab.csplugins.enrichmentmap.model.SimilarityKey)3 DiscreteTaskMonitor (org.baderlab.csplugins.enrichmentmap.util.DiscreteTaskMonitor)3 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 DataSetFiles (org.baderlab.csplugins.enrichmentmap.model.DataSetFiles)2 EMCreationParameters (org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters)2 GeneExpressionMatrix (org.baderlab.csplugins.enrichmentmap.model.GeneExpressionMatrix)2 Rank (org.baderlab.csplugins.enrichmentmap.model.Rank)2 CyNetwork (org.cytoscape.model.CyNetwork)2