Search in sources :

Example 1 with Rank

use of org.baderlab.csplugins.enrichmentmap.model.Rank in project EnrichmentMapApp by BaderLab.

the class LegacySessionLoader method loadSession.

/**
	 * Restore Enrichment maps
	 *
	 * @param pStateFileList - list of files associated with thie session
	 */
@SuppressWarnings("unchecked")
public void loadSession(CySession session) {
    Map<Long, EnrichmentMapParameters> paramsMap = new HashMap<>();
    Map<Long, EnrichmentMap> enrichmentMapMap = new HashMap<>();
    List<File> fileList = session.getAppFileListMap().get(CyActivator.APP_NAME);
    try {
        //go through the prop files first to create the correct objects to be able to add other files to.
        for (File prop_file : fileList) {
            if (prop_file.getName().contains(".props")) {
                InputStream reader = streamUtil.getInputStream(prop_file.getAbsolutePath());
                String fullText = new Scanner(reader, "UTF-8").useDelimiter("\\A").next();
                //Given the file with all the parameters create a new parameter
                EnrichmentMapParameters params = enrichmentMapParametersFactory.create(fullText);
                EnrichmentMap em = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
                //get the network name
                String param_name = em.getName();
                //TODO:distinguish between GSEA and EM saved sessions
                String props_name = (prop_file.getName().split("\\."))[0];
                String networkName = param_name;
                //related to bug ticket #49
                if (!props_name.equalsIgnoreCase(param_name))
                    networkName = props_name;
                //after associated the properties with the network
                //initialized each Dataset that we have files for
                HashMap<String, DataSetFiles> files = params.getFiles();
                for (Iterator<String> j = params.getFiles().keySet().iterator(); j.hasNext(); ) {
                    String current_dataset = j.next();
                    Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
                    em.createDataSet(current_dataset, method, files.get(current_dataset));
                }
                CyNetwork network = getNetworkByName(networkName);
                Long suid = network.getSUID();
                em.setNetworkID(suid);
                paramsMap.put(suid, params);
                enrichmentMapMap.put(suid, em);
            }
        }
        // go through the rest of the files
        for (File propFile : fileList) {
            FileNameParts parts = ParseFileName(propFile);
            if (parts == null || propFile.getName().contains(".props"))
                continue;
            CyNetwork net = getNetworkByName(parts.name);
            EnrichmentMap em = net == null ? null : enrichmentMapMap.get(net.getSUID());
            EnrichmentMapParameters params = paramsMap.get(net.getSUID());
            Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
            if (em == null) {
                System.out.println("network for file" + propFile.getName() + " does not exist.");
            } else if ((!propFile.getName().contains(".props")) && (!propFile.getName().contains(".expression1.txt")) && (!propFile.getName().contains(".expression2.txt"))) {
                HashMap<String, String> props = params.getProps();
                //if this a dataset specific file make sure there is a dataset object for it
                if (!(parts.dataset == null) && em.getDataSet(parts.dataset) == null && !parts.dataset.equalsIgnoreCase("signature"))
                    em.createDataSet(parts.dataset, method, params.getFiles().get(parts.dataset));
                if (parts.type == null)
                    System.out.println("Sorry, unable to determine the type of the file: " + propFile.getName());
                //read the file
                InputStream reader = streamUtil.getInputStream(propFile.getAbsolutePath());
                String fullText = new Scanner(reader, "UTF-8").useDelimiter("\\A").next();
                //if the file is empty then skip it
                if (fullText == null || fullText.equalsIgnoreCase(""))
                    continue;
                if (propFile.getName().contains(".gmt")) {
                    HashMap<String, GeneSet> gsMap = (HashMap<String, GeneSet>) params.repopulateHashmap(fullText, 1);
                    if (propFile.getName().contains(".signature.gmt")) {
                        // TODO Find a better way to serialize EMSignatureDataSet
                        String sdsName = propFile.getName().replace(".signature.gmt", "");
                        sdsName = NamingUtil.getUniqueName(sdsName, em.getSignatureDataSets().keySet());
                        EMSignatureDataSet sigDataSet = new EMSignatureDataSet(sdsName);
                        em.addSignatureDataSet(sigDataSet);
                        SetOfGeneSets sigGeneSets = sigDataSet.getGeneSetsOfInterest();
                        gsMap.forEach((k, v) -> sigGeneSets.addGeneSet(k, v));
                    } else if (propFile.getName().contains(".set2.gmt")) {
                        // account for legacy session files
                        if (em.getAllGeneSets().containsKey(LegacySupport.DATASET2)) {
                            SetOfGeneSets gs = new SetOfGeneSets(LegacySupport.DATASET2, props);
                            gs.setGeneSets(gsMap);
                        }
                    } else {
                        SetOfGeneSets gs = new SetOfGeneSets(parts.dataset, props);
                        gs.setGeneSets(gsMap);
                        em.getDataSets().get(parts.dataset).setSetOfGeneSets(gs);
                    }
                }
                if (propFile.getName().contains(".genes.txt")) {
                    HashMap<String, Integer> genes = params.repopulateHashmap(fullText, 2);
                    genes.forEach(em::addGene);
                    //ticket #188 - unable to open session files that have empty enrichment maps.
                    if (genes != null && !genes.isEmpty())
                        // Ticket #107 : restore also gene count (needed to determine the next free hash in case we do PostAnalysis with a restored session)
                        em.setNumberOfGenes(Math.max(em.getNumberOfGenes(), Collections.max(genes.values()) + 1));
                }
                if (propFile.getName().contains(".hashkey2genes.txt")) {
                    HashMap<Integer, String> hashkey2gene = params.repopulateHashmap(fullText, 5);
                    //ticket #188 - unable to open session files that have empty enrichment maps.
                    if (hashkey2gene != null && !hashkey2gene.isEmpty())
                        // Ticket #107 : restore also gene count (needed to determine the next free hash in case we do PostAnalysis with a restored session)
                        em.setNumberOfGenes(Math.max(em.getNumberOfGenes(), Collections.max(hashkey2gene.keySet()) + 1));
                }
                if ((parts.type != null && (parts.type.equalsIgnoreCase("ENR") || (parts.type.equalsIgnoreCase("SubENR")))) || propFile.getName().contains(".ENR1.txt") || propFile.getName().contains(".SubENR1.txt")) {
                    SetOfEnrichmentResults enrichments;
                    int temp = 1;
                    //check to see if this dataset has enrichment results already
                    if (parts.dataset != null && em.getDataSet(parts.dataset).getEnrichments() != null) {
                        enrichments = em.getDataSet(parts.dataset).getEnrichments();
                    } else if (parts.dataset == null) {
                        enrichments = em.getDataSet(LegacySupport.DATASET1).getEnrichments();
                    /*enrichments = new SetOfEnrichmentResults(EnrichmentMap.DATASET1,props);
                			em.getDataset(EnrichmentMap.DATASET1).setEnrichments(enrichments);*/
                    } else {
                        enrichments = new SetOfEnrichmentResults(parts.dataset, props);
                        em.getDataSet(parts.dataset).setEnrichments(enrichments);
                    }
                    if (parts.type.equalsIgnoreCase("ENR") || propFile.getName().contains(".ENR1.txt")) {
                        if (params.getMethod().equalsIgnoreCase(EnrichmentMapParameters.method_GSEA))
                            enrichments.setEnrichments(params.repopulateHashmap(fullText, 3));
                        else
                            enrichments.setEnrichments(params.repopulateHashmap(fullText, 4));
                    }
                }
                //it would only happen for sessions saved with version 0.8
                if (propFile.getName().contains(".RANKS1.txt") || propFile.getName().contains(".RANKS1Genes.txt")) {
                    Ranking new_ranking;
                    //Check to see if there is already GSEARanking
                    if (em.getDataSet(LegacySupport.DATASET1).getExpressionSets().getAllRanksNames().contains(Ranking.GSEARanking)) {
                        new_ranking = em.getDataSet(LegacySupport.DATASET1).getExpressionSets().getRanksByName(Ranking.GSEARanking);
                    } else {
                        new_ranking = new Ranking();
                        em.getDataSet(LegacySupport.DATASET1).getExpressionSets().addRanks(Ranking.GSEARanking, new_ranking);
                    }
                    if (propFile.getName().contains(".RANKS1.txt")) {
                        Map<Integer, Rank> ranks = (Map<Integer, Rank>) params.repopulateHashmap(fullText, 7);
                        ranks.forEach(new_ranking::addRank);
                    }
                //						if(prop_file.getName().contains(".RANKS1Genes.txt"))
                //							new_ranking.setRank2gene(em.getParams().repopulateHashmap(fullText,7));
                //						if(prop_file.getName().contains(".RANKS1.txt"))
                //							new_ranking.setRanking(em.getParams().repopulateHashmap(fullText,6));
                }
                if (propFile.getName().contains(".RANKS.txt")) {
                    if (parts.ranks_name == null) {
                        //we need to get the name of this set of rankings
                        // network_name.ranking_name.ranks.txt --> split by "." and get 2
                        String[] file_name_tokens = (propFile.getName()).split("\\.");
                        if ((file_name_tokens.length == 4) && (file_name_tokens[1].equals("Dataset 1 Ranking") || file_name_tokens[1].equals("Dataset 2 Ranking")) || (propFile.getName().contains(Ranking.GSEARanking)))
                            parts.ranks_name = Ranking.GSEARanking;
                        else //this is an extra rank file for backwards compatability.  Ignore it.
                        if ((file_name_tokens.length == 4) && (file_name_tokens[1].equals("Dataset 1") || file_name_tokens[1].equals("Dataset 2")) && file_name_tokens[2].equals("RANKS"))
                            continue;
                        else
                            //file name is not structured properly --> default to file name
                            parts.ranks_name = propFile.getName();
                    }
                    Ranking new_ranking = new Ranking();
                    Map<Integer, Rank> ranks = (Map<Integer, Rank>) params.repopulateHashmap(fullText, 6);
                    ranks.forEach(new_ranking::addRank);
                    if (parts.dataset != null)
                        em.getDataSet(parts.dataset).getExpressionSets().addRanks(parts.ranks_name, new_ranking);
                    else
                        em.getDataSet(LegacySupport.DATASET1).getExpressionSets().addRanks(parts.ranks_name, new_ranking);
                }
                //Deal with legacy issues                    
                if (params.isTwoDatasets()) {
                    //make sure there is a Dataset2
                    if (!em.getDataSets().containsKey(LegacySupport.DATASET2))
                        em.createDataSet(LegacySupport.DATASET2, method, new DataSetFiles());
                    if (propFile.getName().contains(".ENR2.txt") || propFile.getName().contains(".SubENR2.txt")) {
                        SetOfEnrichmentResults enrichments;
                        //check to see if this dataset has enrichment results already
                        if (em.getDataSet(LegacySupport.DATASET2).getEnrichments() != null) {
                            enrichments = em.getDataSet(LegacySupport.DATASET2).getEnrichments();
                        } else {
                            enrichments = new SetOfEnrichmentResults(LegacySupport.DATASET2, props);
                            em.getDataSet(LegacySupport.DATASET2).setEnrichments(enrichments);
                        }
                        if (propFile.getName().contains(".ENR2.txt")) {
                            if (params.getMethod().equalsIgnoreCase(EnrichmentMapParameters.method_GSEA))
                                enrichments.setEnrichments(params.repopulateHashmap(fullText, 3));
                            else
                                enrichments.setEnrichments(params.repopulateHashmap(fullText, 4));
                        }
                    }
                    //it would only happen for sessions saved with version 0.8
                    if (propFile.getName().contains(".RANKS2.txt") || propFile.getName().contains(".RANKS2Genes.txt")) {
                        Ranking new_ranking;
                        // Check to see if there is already GSEARanking
                        if (em.getDataSet(LegacySupport.DATASET2).getExpressionSets().getAllRanksNames().contains(Ranking.GSEARanking)) {
                            new_ranking = em.getDataSet(LegacySupport.DATASET2).getExpressionSets().getRanksByName(Ranking.GSEARanking);
                        } else {
                            new_ranking = new Ranking();
                            em.getDataSet(LegacySupport.DATASET2).getExpressionSets().addRanks(Ranking.GSEARanking, new_ranking);
                        }
                        if (propFile.getName().contains(".RANKS2.txt")) {
                            Map<Integer, Rank> ranks = (Map<Integer, Rank>) params.repopulateHashmap(fullText, 6);
                            ranks.forEach(new_ranking::addRank);
                        }
                    }
                }
            }
        }
        //info from the parameters
        for (int i = 0; i < fileList.size(); i++) {
            File prop_file = fileList.get(i);
            FileNameParts parts_exp = ParseFileName(prop_file);
            //unrecognized file
            if ((parts_exp == null) || (parts_exp.name == null))
                continue;
            CyNetwork net = getNetworkByName(parts_exp.name);
            EnrichmentMap map = net == null ? null : enrichmentMapMap.get(net.getSUID());
            EnrichmentMapParameters params = paramsMap.get(net.getSUID());
            Map<String, String> props = params.getProps();
            if (parts_exp.type != null && parts_exp.type.equalsIgnoreCase("expression")) {
                if (map.getDataSets().containsKey(parts_exp.dataset)) {
                    EMDataSet ds = map.getDataSet(parts_exp.dataset);
                    ds.getDataSetFiles().setExpressionFileName(prop_file.getAbsolutePath());
                    ds.getExpressionSets().setFilename(prop_file.getAbsolutePath());
                    ExpressionFileReaderTask expressionFile1 = new ExpressionFileReaderTask(ds);
                    GeneExpressionMatrix matrix = expressionFile1.parse();
                    matrix.restoreProps(parts_exp.dataset, props);
                }
            }
            //Deal with legacy session files.
            if (prop_file.getName().contains("expression1.txt")) {
                EMDataSet ds1 = map.getDataSet(LegacySupport.DATASET1);
                ds1.getDataSetFiles().setExpressionFileName(prop_file.getAbsolutePath());
                ds1.getExpressionSets().setFilename(prop_file.getAbsolutePath());
                ExpressionFileReaderTask expressionFile1 = new ExpressionFileReaderTask(ds1);
                expressionFile1.parse();
            }
            if (prop_file.getName().contains("expression2.txt")) {
                EMDataSet ds2 = map.getDataSet(LegacySupport.DATASET2);
                ds2.getDataSetFiles().setExpressionFileName(prop_file.getAbsolutePath());
                ds2.getExpressionSets().setFilename(prop_file.getAbsolutePath());
                ExpressionFileReaderTask expressionFile2 = new ExpressionFileReaderTask(ds2);
                expressionFile2.parse();
                //are dealing with two distinct expression files.
                if (map.getDataSet(LegacySupport.DATASET2) != null && map.getDataSet(LegacySupport.DATASET2).getGeneSetsOfInterest() != null && !map.getDataSet(LegacySupport.DATASET2).getGeneSetsOfInterest().getGeneSets().isEmpty()) {
                    map.setDistinctExpressionSets(true);
                    map.getDataSet(LegacySupport.DATASET1).setDataSetGenes(new HashSet<Integer>((Set<Integer>) map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getGeneIds()));
                    map.getDataSet(LegacySupport.DATASET2).setDataSetGenes(new HashSet<Integer>((Set<Integer>) map.getDataSet(LegacySupport.DATASET2).getExpressionSets().getGeneIds()));
                }
            }
        }
        //iterate over the networks
        for (Iterator<Long> j = enrichmentMapMap.keySet().iterator(); j.hasNext(); ) {
            Long id = j.next();
            EnrichmentMap map = enrichmentMapMap.get(id);
            //only initialize objects if there is a map for this network
            if (map != null) {
                if (map.getDataSets().size() > 1) {
                    Set<Integer> dataset1_genes = map.getDataSets().get(LegacySupport.DATASET1).getDataSetGenes();
                    Set<Integer> dataset2_genes = map.getDataSets().get(LegacySupport.DATASET2).getDataSetGenes();
                    if (!dataset1_genes.equals(dataset2_genes))
                        map.setDistinctExpressionSets(true);
                }
                //initialize the Genesets (makes sure the leading edge is set correctly)
                //Initialize the set of genesets and GSEA results that we want to compute over
                InitializeGenesetsOfInterestTask genesets_init = new InitializeGenesetsOfInterestTask(map);
                // MKTODO really?
                genesets_init.setThrowIfMissing(false);
                genesets_init.initializeSets(null);
            //					//for each map compute the similarity matrix, (easier than storing it) compute the geneset similarities
            //					ComputeSimilarityTask similarities = new ComputeSimilarityTask(map, ComputeSimilarityTask.ENRICHMENT);
            //					Map<String, GenesetSimilarity> similarity_results = similarities.computeGenesetSimilarities(null);
            //					map.setGenesetSimilarity(similarity_results);
            //
            //					// also compute geneset similarities between Enrichment- and Signature Genesets (if any)
            //					if (! map.getSignatureGenesets().isEmpty()){
            //						ComputeSimilarityTask sigSimilarities = new ComputeSimilarityTask(map, ComputeSimilarityTask.SIGNATURE);
            //						Map<String, GenesetSimilarity> sig_similarity_results = sigSimilarities.computeGenesetSimilarities(null);
            //						map.getGenesetSimilarity().putAll(sig_similarity_results);
            //					}
            }
        //end of if(map != null)
        }
        for (Iterator<Long> j = enrichmentMapMap.keySet().iterator(); j.hasNext(); ) {
            Long id = j.next();
            CyNetwork currentNetwork = cyNetworkManager.getNetwork(id);
            EnrichmentMap map = enrichmentMapMap.get(id);
            map.setLegacy(true);
            emManager.registerEnrichmentMap(map);
            if (!j.hasNext()) {
                //set the last network to be the one viewed and initialize the parameters panel
                cyApplicationManager.setCurrentNetwork(currentNetwork);
            }
        }
    } catch (Exception ee) {
        ee.printStackTrace();
    }
}
Also used : DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) NamingUtil(org.baderlab.csplugins.enrichmentmap.util.NamingUtil) CySession(org.cytoscape.session.CySession) Inject(com.google.inject.Inject) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) EnrichmentMapParameters(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters) Scanner(java.util.Scanner) ExpressionFileReaderTask(org.baderlab.csplugins.enrichmentmap.parsers.ExpressionFileReaderTask) HashMap(java.util.HashMap) Ranking(org.baderlab.csplugins.enrichmentmap.model.Ranking) CyActivator(org.baderlab.csplugins.enrichmentmap.CyActivator) StreamUtil(org.cytoscape.io.util.StreamUtil) SetOfEnrichmentResults(org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults) HashSet(java.util.HashSet) GeneExpressionMatrix(org.baderlab.csplugins.enrichmentmap.model.GeneExpressionMatrix) CyNetwork(org.cytoscape.model.CyNetwork) Map(java.util.Map) CyServiceRegistrar(org.cytoscape.service.util.CyServiceRegistrar) Method(org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method) Iterator(java.util.Iterator) LegacySupport(org.baderlab.csplugins.enrichmentmap.model.LegacySupport) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) Set(java.util.Set) CyNetworkManager(org.cytoscape.model.CyNetworkManager) EnrichmentMapManager(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapManager) SetOfGeneSets(org.baderlab.csplugins.enrichmentmap.model.SetOfGeneSets) File(java.io.File) List(java.util.List) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) CyApplicationManager(org.cytoscape.application.CyApplicationManager) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) EMSignatureDataSet(org.baderlab.csplugins.enrichmentmap.model.EMSignatureDataSet) Collections(java.util.Collections) InitializeGenesetsOfInterestTask(org.baderlab.csplugins.enrichmentmap.task.InitializeGenesetsOfInterestTask) InputStream(java.io.InputStream) Scanner(java.util.Scanner) EMSignatureDataSet(org.baderlab.csplugins.enrichmentmap.model.EMSignatureDataSet) HashSet(java.util.HashSet) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) Set(java.util.Set) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) EMSignatureDataSet(org.baderlab.csplugins.enrichmentmap.model.EMSignatureDataSet) HashMap(java.util.HashMap) CyNetwork(org.cytoscape.model.CyNetwork) SetOfGeneSets(org.baderlab.csplugins.enrichmentmap.model.SetOfGeneSets) Ranking(org.baderlab.csplugins.enrichmentmap.model.Ranking) ExpressionFileReaderTask(org.baderlab.csplugins.enrichmentmap.parsers.ExpressionFileReaderTask) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) InputStream(java.io.InputStream) InitializeGenesetsOfInterestTask(org.baderlab.csplugins.enrichmentmap.task.InitializeGenesetsOfInterestTask) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) Method(org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method) GeneExpressionMatrix(org.baderlab.csplugins.enrichmentmap.model.GeneExpressionMatrix) EnrichmentMapParameters(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) File(java.io.File) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) HashMap(java.util.HashMap) Map(java.util.Map) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) SetOfEnrichmentResults(org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults)

Example 2 with Rank

use of org.baderlab.csplugins.enrichmentmap.model.Rank in project EnrichmentMapApp by BaderLab.

the class GSEALeadingEdgeRankingOption method computeRanking.

@Override
public CompletableFuture<Optional<Map<Integer, RankValue>>> computeRanking(Collection<Integer> genes) {
    initializeLeadingEdge();
    int topRank = getTopRank();
    boolean isNegative = isNegativeGS();
    Map<Integer, GeneExpression> expressions = dataset.getExpressionSets().getExpressionMatrix();
    Ranking ranking = dataset.getExpressionSets().getRanksByName(rankingName);
    Integer[] ranksSubset = new Integer[expressions.size()];
    HashMap<Integer, ArrayList<Integer>> rank2keys = new HashMap<Integer, ArrayList<Integer>>();
    int n = 0;
    Map<Integer, Rank> currentRanks = ranking.getRanking();
    for (Integer key : expressions.keySet()) {
        if (currentRanks.containsKey(key)) {
            ranksSubset[n] = currentRanks.get(key).getRank();
        } else {
            ranksSubset[n] = -1;
        }
        rank2keys.computeIfAbsent(ranksSubset[n], k -> new ArrayList<>()).add(key);
        n++;
    }
    Map<Integer, RankValue> result = new HashMap<>();
    int previous = -1;
    boolean significant = false;
    for (int m = 0; m < ranksSubset.length; m++) {
        //if the current gene doesn't have a rank then don't show it
        if (ranksSubset[m] == -1)
            continue;
        if (ranksSubset[m] == previous)
            continue;
        previous = ranksSubset[m];
        significant = false;
        if (ranksSubset[m] <= topRank && !isNegative && topRank != 0 && topRank != -1)
            significant = true;
        else if (ranksSubset[m] >= topRank && isNegative && topRank != 0 && topRank != -1)
            significant = true;
        List<Integer> keys = rank2keys.get(ranksSubset[m]);
        for (Integer key : keys) {
            Rank rank = currentRanks.get(key);
            result.put(key, new RankValue(rank.getRank(), rank.getScore(), significant));
        }
    }
    // Remove genes that we don't need
    result.keySet().retainAll(genes);
    BasicRankingOption.normalizeRanks(result);
    return CompletableFuture.completedFuture(Optional.of(result));
}
Also used : Method(org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method) GeneExpression(org.baderlab.csplugins.enrichmentmap.model.GeneExpression) DetermineEnrichmentResultFileReader(org.baderlab.csplugins.enrichmentmap.parsers.DetermineEnrichmentResultFileReader) Collection(java.util.Collection) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Ranking(org.baderlab.csplugins.enrichmentmap.model.Ranking) GSEAResult(org.baderlab.csplugins.enrichmentmap.model.GSEAResult) ArrayList(java.util.ArrayList) List(java.util.List) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) SwingUtil(org.baderlab.csplugins.enrichmentmap.view.util.SwingUtil) Map(java.util.Map) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) Optional(java.util.Optional) RankValue(org.baderlab.csplugins.enrichmentmap.view.heatmap.table.RankValue) EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) RankValue(org.baderlab.csplugins.enrichmentmap.view.heatmap.table.RankValue) Ranking(org.baderlab.csplugins.enrichmentmap.model.Ranking) GeneExpression(org.baderlab.csplugins.enrichmentmap.model.GeneExpression)

Example 3 with Rank

use of org.baderlab.csplugins.enrichmentmap.model.Rank in project EnrichmentMapApp by BaderLab.

the class LegacySessionLoadTest method test_1_LoadedLegacyData.

@Test
@SessionFile("em_session_2.2.cys")
public void test_1_LoadedLegacyData() throws Exception {
    EnrichmentMap map = getEnrichmentMap();
    assertEquals("EM1_Enrichment Map", map.getName());
    CyNetwork network = networkManager.getNetwork(map.getNetworkID());
    assertNotNull(network);
    assertEquals(1, map.getDataSetCount());
    assertEquals(14067, map.getNumberOfGenes());
    assertEquals(14067, map.getAllGenes().size());
    // Number of edges: 3339 - that's how many geneset similarity objects there should be!!!
    CyTable edgeTable = network.getDefaultEdgeTable();
    assertEquals(3339, edgeTable.getRowCount());
    EMCreationParameters params = map.getParams();
    String prefix = params.getAttributePrefix();
    assertEquals("EM1_", prefix);
    assertEquals(0.5, params.getCombinedConstant(), 0.0);
    assertFalse(params.isEMgmt());
    assertEquals("Geneset_Overlap", params.getEnrichmentEdgeType());
    assertTrue(params.isFDR());
    assertEquals(GreatFilter.HYPER, params.getGreatFilter());
    assertEquals(0.005, params.getPvalue(), 0.0);
    assertEquals(1.0, params.getPvalueMin(), 0.0);
    assertEquals(0.1, params.getQvalue(), 0.0);
    assertEquals(1.0, params.getQvalueMin(), 0.0);
    assertEquals(0.5, params.getSimilarityCutoff(), 0.0);
    assertEquals(SimilarityMetric.OVERLAP, params.getSimilarityMetric());
    //		assertFalse(params.isDistinctExpressionSets());
    String geneset1 = "RESOLUTION OF SISTER CHROMATID COHESION%REACTOME%REACT_150425.2";
    String geneset2 = "CHROMOSOME, CENTROMERIC REGION%GO%GO:0000775";
    Collection<CyRow> rows = edgeTable.getMatchingRows(CyNetwork.NAME, geneset1 + " (Geneset_Overlap) " + geneset2);
    assertEquals(1, rows.size());
    CyRow row = rows.iterator().next();
    assertEquals("Geneset_Overlap", row.get(CyEdge.INTERACTION, String.class));
    assertEquals(0.6097560975609756, EMStyleBuilder.Columns.EDGE_SIMILARITY_COEFF.get(row, prefix), 0.0);
    EMDataSet dataset = map.getDataSet("Dataset 1");
    assertNotNull(dataset);
    assertSame(map, dataset.getMap());
    assertEquals(Method.GSEA, dataset.getMethod());
    assertEquals(12653, dataset.getDataSetGenes().size());
    assertEquals(389, dataset.getGeneSetsOfInterest().getGeneSets().size());
    //		assertEquals(17259, dataset.getSetofgenesets().getGenesets().size()); // MKTODO why? what is this used for
    assertEndsWith(dataset.getSetOfGeneSets().getFilename(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
    for (long suid : dataset.getNodeSuids()) {
        assertNotNull(network.getNode(suid));
    }
    GeneSet geneset = dataset.getGeneSetsOfInterest().getGeneSets().get("NCRNA PROCESSING%GO%GO:0034470");
    assertEquals(88, geneset.getGenes().size());
    assertEquals("NCRNA PROCESSING%GO%GO:0034470", geneset.getName());
    assertEquals("ncRNA processing", geneset.getDescription());
    assertEquals(Optional.of("GO"), geneset.getSource());
    SetOfEnrichmentResults enrichments = dataset.getEnrichments();
    assertEquals(4756, enrichments.getEnrichments().size());
    assertEndsWith(enrichments.getFilename1(), "gsea_report_for_ES12_1473194913081.xls");
    assertEndsWith(enrichments.getFilename2(), "gsea_report_for_NT12_1473194913081.xls");
    assertEquals("ES12", enrichments.getPhenotype1());
    assertEquals("NT12", enrichments.getPhenotype2());
    EnrichmentResult result = enrichments.getEnrichments().get("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201");
    assertTrue(result instanceof GSEAResult);
    GSEAResult gseaResult = (GSEAResult) result;
    assertEquals("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201", gseaResult.getName());
    assertEquals(0.42844063, gseaResult.getES(), 0.0);
    assertEquals(0.45225498, gseaResult.getFdrqvalue(), 0.0);
    assertEquals(1.0, gseaResult.getFwerqvalue(), 0.0);
    assertEquals(23, gseaResult.getGsSize());
    assertEquals(1.1938541, gseaResult.getNES(), 0.0);
    assertEquals(0.2457786, gseaResult.getPvalue(), 0.0);
    assertEquals(4689, gseaResult.getRankAtMax());
    assertEquals(Optional.of("GO"), gseaResult.getSource());
    GeneExpressionMatrix expressions = dataset.getExpressionSets();
    assertEquals(20326, expressions.getExpressionUniverse());
    assertEquals(3.686190609, expressions.getClosesttoZero(), 0.0);
    //		assertEndsWith(expressions.getFilename(), "MCF7_ExprMx_v2_names.gct");
    assertEquals(15380.42388, expressions.getMaxExpression(), 0.0);
    assertEquals(3.686190609, expressions.getMinExpression(), 0.0);
    assertEquals(20, expressions.getNumConditions());
    assertEquals(12653, expressions.getExpressionMatrix().size());
    assertEquals(12653, expressions.getExpressionMatrix_rowNormalized().size());
    GeneExpression expression = expressions.getExpressionMatrix().get(0);
    assertEquals("MOCOS", expression.getName());
    assertEquals("MOCOS (molybdenum cofactor sulfurase)", expression.getDescription());
    assertEquals(18, expression.getExpression().length);
    Ranking ranking = expressions.getRanks().get("GSEARanking");
    assertEquals(12653, ranking.getAllRanks().size());
    assertEquals(12653, ranking.getRanking().size());
    Rank rank = ranking.getRanking().get(0);
    assertEquals("MOCOS", rank.getName());
    assertEquals(1238, rank.getRank().intValue());
    assertEquals(0.54488367, rank.getScore(), 0.0);
    DataSetFiles files = dataset.getDataSetFiles();
    assertEndsWith(files.getClassFile(), "ES_NT.cls");
    assertEndsWith(files.getEnrichmentFileName1(), "gsea_report_for_ES12_1473194913081.xls");
    assertEndsWith(files.getEnrichmentFileName2(), "gsea_report_for_NT12_1473194913081.xls");
    //		assertEndsWith(files.getExpressionFileName(), "MCF7_ExprMx_v2_names.gct");
    assertEndsWith(files.getGMTFileName(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
    assertEndsWith(files.getGseaHtmlReportFile(), "estrogen_treatment_12hr_gsea_enrichment_results.Gsea.1473194913081/index.html");
    assertEndsWith(files.getRankedFile(), "ranked_gene_list_ES12_versus_NT12_1473194913081.xls");
    assertEquals("ES12", files.getPhenotype1());
    assertEquals("NT12", files.getPhenotype2());
}
Also used : EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) EMCreationParameters(org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters) GSEAResult(org.baderlab.csplugins.enrichmentmap.model.GSEAResult) CyNetwork(org.cytoscape.model.CyNetwork) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) CyRow(org.cytoscape.model.CyRow) GeneExpressionMatrix(org.baderlab.csplugins.enrichmentmap.model.GeneExpressionMatrix) CyTable(org.cytoscape.model.CyTable) Ranking(org.baderlab.csplugins.enrichmentmap.model.Ranking) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) GeneExpression(org.baderlab.csplugins.enrichmentmap.model.GeneExpression) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) SetOfEnrichmentResults(org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults) BaseIntegrationTest(org.baderlab.csplugins.enrichmentmap.integration.BaseIntegrationTest) Test(org.junit.Test) SessionFile(org.baderlab.csplugins.enrichmentmap.integration.SessionFile)

Example 4 with Rank

use of org.baderlab.csplugins.enrichmentmap.model.Rank in project EnrichmentMapApp by BaderLab.

the class BasicRankingOption method computeRanking.

@Override
public CompletableFuture<Optional<Map<Integer, RankValue>>> computeRanking(Collection<Integer> genes) {
    Map<Integer, RankValue> result = new HashMap<>();
    for (Map.Entry<Integer, Rank> entry : ranking.getRanking().entrySet()) {
        Rank rank = entry.getValue();
        result.put(entry.getKey(), new RankValue(rank.getRank(), rank.getScore(), false));
    }
    // Remove genes that we don't need
    result.keySet().retainAll(genes);
    normalizeRanks(result);
    return CompletableFuture.completedFuture(Optional.of(result));
}
Also used : HashMap(java.util.HashMap) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) HashMap(java.util.HashMap) Map(java.util.Map) RankValue(org.baderlab.csplugins.enrichmentmap.view.heatmap.table.RankValue)

Example 5 with Rank

use of org.baderlab.csplugins.enrichmentmap.model.Rank in project EnrichmentMapApp by BaderLab.

the class RanksFileReaderTask method parse.

/**
	 * parse the rank file
	 */
public void parse(TaskMonitor taskMonitor) throws IOException {
    if (taskMonitor == null)
        taskMonitor = new NullTaskMonitor();
    List<String> lines = DatasetLineParser.readLines(RankFileName);
    int lineNumber = 0;
    int currentProgress = 0;
    int maxValue = lines.size();
    taskMonitor.setStatusMessage("Parsing Rank file - " + maxValue + " rows");
    EnrichmentMap map = dataset.getMap();
    // we don't know the number of scores in the rank file yet, but it can't be more than the number of lines.
    Double[] score_collector = new Double[lines.size()];
    boolean gseaDefinedRanks = false;
    Map<Integer, Rank> ranks = new HashMap<>();
    /*
		 * there are two possible Rank files: If loaded through the rpt file the
		 * file is the one generated by GSEA and will have 5 columns (name,
		 * description, empty,empty,score) If the user loaded it through the
		 * generic of specifying advanced options then it will 2 columns
		 * (name,score). The score in either case should be a double and the
		 * name a string so check for either option.
		 */
    //number of found scores
    int nScores = 0;
    for (int i = 0; i < lines.size(); i++) {
        String line = lines.get(i);
        //check to see if the line is commented out and should be ignored.
        if (line.startsWith("#")) {
            // look for ranks_name in comment line e.g.: "# Ranks Name : My Ranks"
            if (Pattern.matches("^# *Ranks[ _-]?Name *:.+", line)) {
                this.ranks_name = line.split(":", 2)[1];
                while (this.ranks_name.startsWith(" ")) this.ranks_name = this.ranks_name.substring(1);
            }
            //ignore comment line
            continue;
        }
        String[] tokens = line.split("\t");
        String name = tokens[0].toUpperCase();
        double score = 0;
        //if there are 5 columns in the data then the rank is the last column
        if (tokens.length == 5) {
            //ignore rows where the expected rank value is not a valid double
            try {
                //gseaDefinedRanks = true;
                score = Double.parseDouble(tokens[4]);
            } catch (NumberFormatException nfe) {
                if (lineNumber == 0) {
                    lineNumber++;
                    continue;
                } else
                    throw new IllegalThreadStateException("rank value for" + tokens[0] + "is not a valid number");
            }
            nScores++;
        } else //if there are 2 columns in the data then the rank is the 2 column
        if (tokens.length == 2) {
            try {
                score = Double.parseDouble(tokens[1]);
            } catch (NumberFormatException nfe) {
                if (lineNumber == 0) {
                    lineNumber++;
                    continue;
                } else
                    throw new IllegalThreadStateException("rank value for" + tokens[0] + "is not a valid number");
            }
            nScores++;
        } else {
            System.out.println("Invalid number of tokens line of Rank File (should be 5 or 2)");
            //skip invalid line
            continue;
        }
        if ((tokens.length == 5) || (dataset.getMethod() == Method.GSEA && !loadFromHeatmap))
            gseaDefinedRanks = true;
        //add score to array of scores
        score_collector[nScores - 1] = score;
        //check to see if the gene is in the genelist
        Integer genekey = map.getHashFromGene(name);
        if (genekey != null) {
            Rank current_ranking;
            // edge compatible files.
            if ((tokens.length == 5) || (dataset.getMethod() == Method.GSEA && !loadFromHeatmap)) {
                current_ranking = new Rank(name, score, nScores);
            } else {
                current_ranking = new Rank(name, score);
            }
            ranks.put(genekey, current_ranking);
        }
        // Calculate Percentage.  This must be a value between 0..100.
        int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
        taskMonitor.setProgress(percentComplete);
        currentProgress++;
    }
    //the none of the genes are in the gene list
    if (ranks.isEmpty()) {
        throw new IllegalThreadStateException("None of the genes in the rank file are found in the expression file.  Make sure the identifiers of the two files match.");
    }
    //remove Null values from collector
    Double[] sort_scores = new Double[nScores];
    double[] scores = new double[nScores];
    for (int i = 0; i < nScores; i++) {
        sort_scores[i] = score_collector[i];
        scores[i] = (double) score_collector[i];
    }
    //after we have loaded in all the scores, sort the score to compute ranks
    //create hash of scores to ranks.
    HashMap<Double, Integer> score2ranks = new HashMap<Double, Integer>();
    //sorts the array in descending order
    Arrays.sort(sort_scores, Collections.reverseOrder());
    //just signed statistics for instance as it will sort them in the opposite direction.
    if (sort_scores[0] <= 1 && sort_scores[sort_scores.length - 1] >= -1)
        Arrays.sort(sort_scores);
    for (int j = 0; j < sort_scores.length; j++) {
        //check to see if this score is already enter
        if (!score2ranks.containsKey(sort_scores[j]))
            score2ranks.put(sort_scores[j], j);
    }
    //only update the ranks if we haven't already defined them using order of scores in file
    if (!gseaDefinedRanks) {
        for (Iterator<Integer> k = ranks.keySet().iterator(); k.hasNext(); ) {
            Integer gene_key = k.next();
            Rank current_ranking = ranks.get(gene_key);
            Integer rank = score2ranks.get(current_ranking.getScore());
            current_ranking.setRank(rank);
        // update rank2gene and gene2score as well
        }
    }
    //check to see if some of the dataset genes are not in this rank file
    Set<Integer> current_genes = dataset.getDataSetGenes();
    Set<Integer> current_ranks = ranks.keySet();
    //intersect the genes with the ranks.  only retain the genes that have ranks.
    Set<Integer> intersection = new HashSet<>(current_genes);
    intersection.retainAll(current_ranks);
    //see if there more genes than there are ranks
    if (!(intersection.size() == current_genes.size())) {
    //JOptionPane.showMessageDialog(Cytoscape.getDesktop(),"Ranks for some of the genes/proteins listed in the expression file are missing. \n These genes/proteins will be excluded from ranked listing in the heat map.");
    }
    //create a new Ranking
    Ranking new_ranking = new Ranking();
    ranks.forEach(new_ranking::addRank);
    //add the Ranks to the expression file ranking
    dataset.getExpressionSets().addRanks(ranks_name, new_ranking);
}
Also used : HashMap(java.util.HashMap) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) Ranking(org.baderlab.csplugins.enrichmentmap.model.Ranking) NullTaskMonitor(org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor) HashSet(java.util.HashSet)

Aggregations

Rank (org.baderlab.csplugins.enrichmentmap.model.Rank)5 HashMap (java.util.HashMap)4 Ranking (org.baderlab.csplugins.enrichmentmap.model.Ranking)4 Map (java.util.Map)3 EMDataSet (org.baderlab.csplugins.enrichmentmap.model.EMDataSet)3 EnrichmentMap (org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap)3 HashSet (java.util.HashSet)2 List (java.util.List)2 DataSetFiles (org.baderlab.csplugins.enrichmentmap.model.DataSetFiles)2 Method (org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method)2 EnrichmentResult (org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult)2 GSEAResult (org.baderlab.csplugins.enrichmentmap.model.GSEAResult)2 GeneExpression (org.baderlab.csplugins.enrichmentmap.model.GeneExpression)2 GeneExpressionMatrix (org.baderlab.csplugins.enrichmentmap.model.GeneExpressionMatrix)2 GeneSet (org.baderlab.csplugins.enrichmentmap.model.GeneSet)2 SetOfEnrichmentResults (org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults)2 CyNetwork (org.cytoscape.model.CyNetwork)2 Inject (com.google.inject.Inject)1 File (java.io.File)1 InputStream (java.io.InputStream)1