Search in sources :

Example 6 with EnrichmentResult

use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult in project EnrichmentMapApp by BaderLab.

the class FileReaderTest method testGSEAEDBEnrichmentsReader.

//test GSEA enrichment results reader
@Test
public void testGSEAEDBEnrichmentsReader(Provider<EnrichmentMapParameters> empFactory) throws Exception {
    //load the test enrichment files - GSEA creates two enrichment results files.
    String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/LoadDataset/GSEA_example_results/edb/results.edb";
    //create a new instance of the parameters
    EnrichmentMapParameters params = empFactory.get();
    //set enrichment file name 
    params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
    //Create a new Enrichment map
    EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
    //get the default dataset
    Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
    DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
    EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
    ParseEDBEnrichmentResults task = new ParseEDBEnrichmentResults(dataset);
    task.run(taskMonitor);
    //Get the enrichment
    Map<String, EnrichmentResult> enrichments = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
    assertEquals(14, enrichments.size());
    //Check the contents of some of the genesets
    // example from file 1 (ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612)
    //check p-values
    assertEquals(0.2271, ((GSEAResult) enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getPvalue(), 0.0);
    //check fdr value
    assertEquals(0.2447, ((GSEAResult) enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getFdrqvalue(), 0.0);
    //check ES value
    assertEquals(0.7852, ((GSEAResult) enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getES(), 0.0);
    //check NES
    assertEquals(1.1793, ((GSEAResult) enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getNES(), 0.0);
    //check ranks at max
    assertEquals(6, ((GSEAResult) enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getRankAtMax());
    //check size
    assertEquals(2, ((GSEAResult) enrichments.get("PROTEASOME ACTIVATOR COMPLEX%GO%GO:0008537")).getGsSize());
    // example from file 2 (EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143)
    //check p-values
    assertEquals(0.4545, ((GSEAResult) enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getPvalue(), 0.0);
    //check fdr value
    assertEquals(0.8650, ((GSEAResult) enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getFdrqvalue(), 0.0);
    //check ES value
    assertEquals(-0.4707, ((GSEAResult) enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getES(), 0.0);
    //check NES
    assertEquals(-0.9696, ((GSEAResult) enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getNES(), 0.0);
    //check ranks at max
    //The Rank at max in the edb file is different from the excel files.  In the excel file that we have been
    //  using up until now they convert the rank as if you are counting from the bottom of the list but in the 
    //edb file they count from the top of the ranked list (going from positive to negative ES scores)
    assertEquals(15, ((GSEAResult) enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getRankAtMax());
    //check size
    assertEquals(39, ((GSEAResult) enrichments.get("PROTEASOME COMPLEX%GO%GO:0000502")).getGsSize());
}
Also used : EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) EnrichmentMapParameters(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) Method(org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) Test(org.junit.Test)

Example 7 with EnrichmentResult

use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult in project EnrichmentMapApp by BaderLab.

the class FileReaderTest method testGSEAEnrichmentsReader.

//test GSEA enrichment results reader
@Test
public void testGSEAEnrichmentsReader(Provider<EnrichmentMapParameters> empFactory) throws Exception {
    //load the test enrichment files - GSEA creates two enrichment results files.
    String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/GSEA_enrichments1.xls";
    String testDataFileName2 = "src/test/resources/org/baderlab/csplugins/enrichmentmap/GSEA_enrichments2.xls";
    //create a new instance of the parameters
    EnrichmentMapParameters params = empFactory.get();
    //set enrichment file name 
    params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
    params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName2(testDataFileName2);
    //Create a new Enrichment map
    EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
    //get the default dataset
    Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
    DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
    EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
    ParseGSEAEnrichmentResults task = new ParseGSEAEnrichmentResults(dataset);
    task.run(taskMonitor);
    //Get the enrichment
    Map<String, EnrichmentResult> enrichments = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
    assertEquals(40, enrichments.size());
    //Check the contents of some of the genesets
    // example from file 1 (ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612)
    //check p-values
    assertEquals(0.0, ((GSEAResult) enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getPvalue(), 0.0);
    //check fdr value
    assertEquals(0.086938426, ((GSEAResult) enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getFdrqvalue(), 0.0);
    //check ES value
    assertEquals(0.6854155, ((GSEAResult) enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getES(), 0.0);
    //check NES
    assertEquals(2.1194055, ((GSEAResult) enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getNES(), 0.0);
    //check ranks at max
    assertEquals(836, ((GSEAResult) enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getRankAtMax());
    //check size
    assertEquals(27, ((GSEAResult) enrichments.get("ANTIGEN PROCESSING AND PRESENTATION%KEGG%HSA04612")).getGsSize());
    // example from file 2 (EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143)
    //check p-values
    assertEquals(0.040152963, ((GSEAResult) enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getPvalue(), 0.0);
    //check fdr value
    assertEquals(1.0, ((GSEAResult) enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getFdrqvalue(), 0.0);
    //check ES value
    assertEquals(-0.49066687, ((GSEAResult) enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getES(), 0.0);
    //check NES
    assertEquals(-1.477554, ((GSEAResult) enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getNES(), 0.0);
    //check ranks at max
    assertEquals(1597, ((GSEAResult) enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getRankAtMax());
    //check size
    assertEquals(17, ((GSEAResult) enrichments.get("EMBRYONIC HEART TUBE MORPHOGENESIS%GO%GO:0003143")).getGsSize());
}
Also used : EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) EnrichmentMapParameters(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) Method(org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) Test(org.junit.Test)

Example 8 with EnrichmentResult

use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult in project EnrichmentMapApp by BaderLab.

the class LegacySessionLoadTest method test_1_LoadedLegacyData.

@Test
@SessionFile("em_session_2.2.cys")
public void test_1_LoadedLegacyData() throws Exception {
    EnrichmentMap map = getEnrichmentMap();
    assertEquals("EM1_Enrichment Map", map.getName());
    CyNetwork network = networkManager.getNetwork(map.getNetworkID());
    assertNotNull(network);
    assertEquals(1, map.getDataSetCount());
    assertEquals(14067, map.getNumberOfGenes());
    assertEquals(14067, map.getAllGenes().size());
    // Number of edges: 3339 - that's how many geneset similarity objects there should be!!!
    CyTable edgeTable = network.getDefaultEdgeTable();
    assertEquals(3339, edgeTable.getRowCount());
    EMCreationParameters params = map.getParams();
    String prefix = params.getAttributePrefix();
    assertEquals("EM1_", prefix);
    assertEquals(0.5, params.getCombinedConstant(), 0.0);
    assertFalse(params.isEMgmt());
    assertEquals("Geneset_Overlap", params.getEnrichmentEdgeType());
    assertTrue(params.isFDR());
    assertEquals(GreatFilter.HYPER, params.getGreatFilter());
    assertEquals(0.005, params.getPvalue(), 0.0);
    assertEquals(1.0, params.getPvalueMin(), 0.0);
    assertEquals(0.1, params.getQvalue(), 0.0);
    assertEquals(1.0, params.getQvalueMin(), 0.0);
    assertEquals(0.5, params.getSimilarityCutoff(), 0.0);
    assertEquals(SimilarityMetric.OVERLAP, params.getSimilarityMetric());
    //		assertFalse(params.isDistinctExpressionSets());
    String geneset1 = "RESOLUTION OF SISTER CHROMATID COHESION%REACTOME%REACT_150425.2";
    String geneset2 = "CHROMOSOME, CENTROMERIC REGION%GO%GO:0000775";
    Collection<CyRow> rows = edgeTable.getMatchingRows(CyNetwork.NAME, geneset1 + " (Geneset_Overlap) " + geneset2);
    assertEquals(1, rows.size());
    CyRow row = rows.iterator().next();
    assertEquals("Geneset_Overlap", row.get(CyEdge.INTERACTION, String.class));
    assertEquals(0.6097560975609756, EMStyleBuilder.Columns.EDGE_SIMILARITY_COEFF.get(row, prefix), 0.0);
    EMDataSet dataset = map.getDataSet("Dataset 1");
    assertNotNull(dataset);
    assertSame(map, dataset.getMap());
    assertEquals(Method.GSEA, dataset.getMethod());
    assertEquals(12653, dataset.getDataSetGenes().size());
    assertEquals(389, dataset.getGeneSetsOfInterest().getGeneSets().size());
    //		assertEquals(17259, dataset.getSetofgenesets().getGenesets().size()); // MKTODO why? what is this used for
    assertEndsWith(dataset.getSetOfGeneSets().getFilename(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
    for (long suid : dataset.getNodeSuids()) {
        assertNotNull(network.getNode(suid));
    }
    GeneSet geneset = dataset.getGeneSetsOfInterest().getGeneSets().get("NCRNA PROCESSING%GO%GO:0034470");
    assertEquals(88, geneset.getGenes().size());
    assertEquals("NCRNA PROCESSING%GO%GO:0034470", geneset.getName());
    assertEquals("ncRNA processing", geneset.getDescription());
    assertEquals(Optional.of("GO"), geneset.getSource());
    SetOfEnrichmentResults enrichments = dataset.getEnrichments();
    assertEquals(4756, enrichments.getEnrichments().size());
    assertEndsWith(enrichments.getFilename1(), "gsea_report_for_ES12_1473194913081.xls");
    assertEndsWith(enrichments.getFilename2(), "gsea_report_for_NT12_1473194913081.xls");
    assertEquals("ES12", enrichments.getPhenotype1());
    assertEquals("NT12", enrichments.getPhenotype2());
    EnrichmentResult result = enrichments.getEnrichments().get("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201");
    assertTrue(result instanceof GSEAResult);
    GSEAResult gseaResult = (GSEAResult) result;
    assertEquals("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201", gseaResult.getName());
    assertEquals(0.42844063, gseaResult.getES(), 0.0);
    assertEquals(0.45225498, gseaResult.getFdrqvalue(), 0.0);
    assertEquals(1.0, gseaResult.getFwerqvalue(), 0.0);
    assertEquals(23, gseaResult.getGsSize());
    assertEquals(1.1938541, gseaResult.getNES(), 0.0);
    assertEquals(0.2457786, gseaResult.getPvalue(), 0.0);
    assertEquals(4689, gseaResult.getRankAtMax());
    assertEquals(Optional.of("GO"), gseaResult.getSource());
    GeneExpressionMatrix expressions = dataset.getExpressionSets();
    assertEquals(20326, expressions.getExpressionUniverse());
    assertEquals(3.686190609, expressions.getClosesttoZero(), 0.0);
    //		assertEndsWith(expressions.getFilename(), "MCF7_ExprMx_v2_names.gct");
    assertEquals(15380.42388, expressions.getMaxExpression(), 0.0);
    assertEquals(3.686190609, expressions.getMinExpression(), 0.0);
    assertEquals(20, expressions.getNumConditions());
    assertEquals(12653, expressions.getExpressionMatrix().size());
    assertEquals(12653, expressions.getExpressionMatrix_rowNormalized().size());
    GeneExpression expression = expressions.getExpressionMatrix().get(0);
    assertEquals("MOCOS", expression.getName());
    assertEquals("MOCOS (molybdenum cofactor sulfurase)", expression.getDescription());
    assertEquals(18, expression.getExpression().length);
    Ranking ranking = expressions.getRanks().get("GSEARanking");
    assertEquals(12653, ranking.getAllRanks().size());
    assertEquals(12653, ranking.getRanking().size());
    Rank rank = ranking.getRanking().get(0);
    assertEquals("MOCOS", rank.getName());
    assertEquals(1238, rank.getRank().intValue());
    assertEquals(0.54488367, rank.getScore(), 0.0);
    DataSetFiles files = dataset.getDataSetFiles();
    assertEndsWith(files.getClassFile(), "ES_NT.cls");
    assertEndsWith(files.getEnrichmentFileName1(), "gsea_report_for_ES12_1473194913081.xls");
    assertEndsWith(files.getEnrichmentFileName2(), "gsea_report_for_NT12_1473194913081.xls");
    //		assertEndsWith(files.getExpressionFileName(), "MCF7_ExprMx_v2_names.gct");
    assertEndsWith(files.getGMTFileName(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
    assertEndsWith(files.getGseaHtmlReportFile(), "estrogen_treatment_12hr_gsea_enrichment_results.Gsea.1473194913081/index.html");
    assertEndsWith(files.getRankedFile(), "ranked_gene_list_ES12_versus_NT12_1473194913081.xls");
    assertEquals("ES12", files.getPhenotype1());
    assertEquals("NT12", files.getPhenotype2());
}
Also used : EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) EMCreationParameters(org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters) GSEAResult(org.baderlab.csplugins.enrichmentmap.model.GSEAResult) CyNetwork(org.cytoscape.model.CyNetwork) Rank(org.baderlab.csplugins.enrichmentmap.model.Rank) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) CyRow(org.cytoscape.model.CyRow) GeneExpressionMatrix(org.baderlab.csplugins.enrichmentmap.model.GeneExpressionMatrix) CyTable(org.cytoscape.model.CyTable) Ranking(org.baderlab.csplugins.enrichmentmap.model.Ranking) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) GeneExpression(org.baderlab.csplugins.enrichmentmap.model.GeneExpression) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) SetOfEnrichmentResults(org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults) BaseIntegrationTest(org.baderlab.csplugins.enrichmentmap.integration.BaseIntegrationTest) Test(org.junit.Test) SessionFile(org.baderlab.csplugins.enrichmentmap.integration.SessionFile)

Example 9 with EnrichmentResult

use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult in project EnrichmentMapApp by BaderLab.

the class ParseBingoEnrichmentResults method parseLines.

@Override
public void parseLines(List<String> lines, EMDataSet dataset, TaskMonitor taskMonitor) {
    if (taskMonitor == null)
        taskMonitor = new NullTaskMonitor();
    taskMonitor.setTitle("Parsing Bingo Enrichment Result file");
    //with Bingo results there are no genesets defined.  first pass through the file
    // needs to parse the genesets
    //the bingo file has 20 lines of info at the top of the file before you get to results.
    //parameters that can be extracted from Bingo files:
    //GO-ID	p-value	corr p-value	x	n	X	N	Description	Genes in test set
    // (column 1 ) GO-Id - is just the numerical part of the GO term (does not contain GO:0000)
    //(column 2 ) p-value
    //(column 3 ) corr pvalue
    //(column 4 ) x - number of genes in the subset of interest with this annotation
    //(column 5 ) n - number of genes in the universe with this annotation
    //(column 6 ) X - number of genes in the subset
    //(column 7 ) N - number of genes in the universe
    //(column 8 ) Description - GO term name
    //(column 9 ) Gene in test set - a list of genes in the subset of interest that are annotated to this term
    // Column 8 is the geneset name
    // Column 9 is the list of genes in this geneset -- therefore pre-filtered.
    Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
    //get the genes (which should also be empty
    EnrichmentMap map = dataset.getMap();
    Map<String, EnrichmentResult> results = dataset.getEnrichments().getEnrichments();
    int currentProgress = 0;
    int maxValue = lines.size();
    boolean FDR = true;
    taskMonitor.setStatusMessage("Parsing Generic Results file -" + maxValue + " rows");
    //skip the first l9 which just has the field names (start i=1)
    //check to see how many columns the data has
    //go through each line until we find the header line
    int k = 0;
    String line = lines.get(k);
    String[] tokens = line.split("\t");
    for (; k < lines.size(); k++) {
        line = lines.get(k);
        tokens = line.split("\t");
        int length = tokens.length;
        if ((length == 9) && tokens[0].equalsIgnoreCase("GO-ID") && tokens[8].equalsIgnoreCase("Genes in test set")) {
            break;
        }
    }
    if (k == lines.size())
        throw new IllegalThreadStateException("Bingo results file is missing data.");
    for (int i = k + 1; i < lines.size(); i++) {
        line = lines.get(i);
        tokens = line.split("\t");
        double pvalue = 1.0;
        double FDRqvalue = 1.0;
        GenericResult result;
        int gs_size = 0;
        double NES = 1.0;
        //The 8th column of the file is the name of the geneset
        final String name = tokens[7].toUpperCase().trim();
        //the 8th column of the file is the description
        final String description = tokens[7].toUpperCase();
        //when there are two different species it is possible that the gene set could
        //already exist in the set of genesets.  if it does exist then add the genes
        //in this set to the geneset
        ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
        if (genesets.containsKey(name))
            builder = builder.addAll(genesets.get(name).getGenes());
        String[] gene_tokens = tokens[8].split("\\|");
        //All subsequent fields in the list are the geneset associated with this geneset.
        for (int j = 0; j < gene_tokens.length; j++) {
            String gene = gene_tokens[j].toUpperCase();
            //if it is already in the hash then get its associated key and put it into the set of genes
            if (map.containsGene(gene)) {
                builder.add(map.getHashFromGene(gene));
            } else if (!gene.isEmpty()) {
                Integer hash = map.addGene(gene).get();
                builder.add(hash);
            }
        }
        //finished parsing that geneset
        //add the current geneset to the hashmap of genesets
        GeneSet gs = new GeneSet(name, description, builder.build());
        genesets.put(name, gs);
        //The 2nd column is the nominal p-value
        if (tokens[1].equalsIgnoreCase("")) {
        //do nothing
        } else {
            pvalue = Double.parseDouble(tokens[1]);
        }
        //the Count is the size of the geneset (restricted by the gene list)
        if (tokens[3].equalsIgnoreCase("")) {
        //do nothing
        } else {
            gs_size = Integer.parseInt(tokens[3]);
        }
        //Use the correct p-value - 3rd column
        if (tokens[2].equalsIgnoreCase("")) {
        //do nothing
        } else {
            FDRqvalue = Double.parseDouble(tokens[2]);
        }
        result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue);
        // Calculate Percentage.  This must be a value between 0..100.
        int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
        taskMonitor.setProgress(percentComplete);
        currentProgress++;
        //check to see if the gene set has already been entered in the results
        //it is possible that one geneset will be in both phenotypes.
        //if it is already exists then we want to make sure the one retained is the result with the
        //lower p-value.
        //ticket #149
        GenericResult temp = (GenericResult) results.get(name);
        if (temp == null)
            results.put(name, result);
        else {
            if (result.getPvalue() < temp.getPvalue())
                results.put(name, result);
        }
    }
    if (FDR)
        dataset.getMap().getParams().setFDR(FDR);
}
Also used : EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) GenericResult(org.baderlab.csplugins.enrichmentmap.model.GenericResult) ImmutableSet(com.google.common.collect.ImmutableSet) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) NullTaskMonitor(org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor)

Example 10 with EnrichmentResult

use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult in project EnrichmentMapApp by BaderLab.

the class ParseDavidEnrichmentResults method parseLines.

/**
	 * Parse david enrichment results file
	 */
@Override
public void parseLines(List<String> lines, EMDataSet dataset, TaskMonitor taskMonitor) {
    if (taskMonitor == null)
        taskMonitor = new NullTaskMonitor();
    taskMonitor.setTitle("Parsing David Enrichment Result file");
    //with David results there are no genesets defined.  first pass through the file
    // needs to parse the genesets
    //parameters that can be extracted from David files:
    //Category	Term	Count	%	PValue	Genes	List Total	Pop Hits	Pop Total	Fold Enrichment	Bonferroni	Benjamini	FDR
    // Count = number of genes in the geneset that came from the input list, number of genes in the genelist mapping toa specific term.
    // List Total - number of genes in the gene list mapping to the category (ie. GO Cellular component)
    // Pop Hits - number of genes in the background gene list mapping to a specific term
    // Pop total - number of gene s in the background gene list mapping to the category (i.e. Go Cellular Component)
    // Column 2 is the geneset name
    // Column 1 is the category (and can be used for the description)
    // Column 6 is the list of genes (from the loaded list) in this geneset -- therefore pre-filtered.
    Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
    EnrichmentMap map = dataset.getMap();
    Map<String, EnrichmentResult> results = dataset.getEnrichments().getEnrichments();
    int currentProgress = 0;
    int maxValue = lines.size();
    taskMonitor.setStatusMessage("Parsing Generic Results file - " + maxValue + " rows");
    boolean FDR = true;
    //skip the first line which just has the field names (start i=1)
    //check to see how many columns the data has
    String line = lines.get(0);
    String[] tokens = line.split("\t");
    int length = tokens.length;
    if (length != 13)
        throw new IllegalThreadStateException("David results file is missing data.");
    for (int i = 1; i < lines.size(); i++) {
        line = lines.get(i);
        tokens = line.split("\t");
        double pvalue = 1.0;
        double FDRqvalue = 1.0;
        GenericResult result;
        int gs_size = 0;
        double NES = 1.0;
        //The second column of the file is the name of the geneset
        final String name = tokens[1].toUpperCase().trim();
        //the first column of the file is the description
        final String description = tokens[0].toUpperCase();
        //when there are two different species it is possible that the gene set could
        //already exist in the set of genesets.  if it does exist then add the genes
        //in this set to the geneset
        ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
        if (genesets.containsKey(name))
            builder = builder.addAll(genesets.get(name).getGenes());
        String[] gene_tokens = tokens[5].split(", ");
        //All subsequent fields in the list are the geneset associated with this geneset.
        for (int j = 0; j < gene_tokens.length; j++) {
            String gene = gene_tokens[j].toUpperCase();
            //if it is already in the hash then get its associated key and put it into the set of genes
            if (map.containsGene(gene)) {
                builder.add(map.getHashFromGene(gene));
            } else if (!gene.isEmpty()) {
                Integer hash = map.addGene(gene).get();
                builder.add(hash);
            }
        }
        //finished parsing that geneset
        //add the current geneset to the hashmap of genesets
        GeneSet gs = new GeneSet(name, description, builder.build());
        genesets.put(name, gs);
        //The 5th column is the nominal p-value
        if (tokens[4].equalsIgnoreCase("")) {
        //do nothing
        } else {
            pvalue = Double.parseDouble(tokens[4]);
        }
        //the Count is the size of the geneset (restricted by the gene list)
        if (tokens[2].equalsIgnoreCase("")) {
        //do nothing
        } else {
            gs_size = Integer.parseInt(tokens[2]);
        }
        //Use the Benjamini value for the fdr
        if (tokens[11].equalsIgnoreCase("")) {
        //do nothing
        } else {
            FDRqvalue = Double.parseDouble(tokens[11]);
        }
        result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue);
        // Calculate Percentage.  This must be a value between 0..100.
        int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
        taskMonitor.setProgress(percentComplete);
        currentProgress++;
        //check to see if the gene set has already been entered in the results
        //it is possible that one geneset will be in both phenotypes.
        //if it is already exists then we want to make sure the one retained is the result with the
        //lower p-value.
        //ticket #149
        GenericResult temp = (GenericResult) results.get(name);
        if (temp == null)
            results.put(name, result);
        else {
            if (result.getPvalue() < temp.getPvalue())
                results.put(name, result);
        }
    }
    if (FDR)
        dataset.getMap().getParams().setFDR(FDR);
}
Also used : EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) GenericResult(org.baderlab.csplugins.enrichmentmap.model.GenericResult) ImmutableSet(com.google.common.collect.ImmutableSet) GeneSet(org.baderlab.csplugins.enrichmentmap.model.GeneSet) NullTaskMonitor(org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor)

Aggregations

EnrichmentResult (org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult)15 EnrichmentMap (org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap)10 EMDataSet (org.baderlab.csplugins.enrichmentmap.model.EMDataSet)8 GeneSet (org.baderlab.csplugins.enrichmentmap.model.GeneSet)7 DataSetFiles (org.baderlab.csplugins.enrichmentmap.model.DataSetFiles)6 GenericResult (org.baderlab.csplugins.enrichmentmap.model.GenericResult)6 NullTaskMonitor (org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor)6 Test (org.junit.Test)6 Method (org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method)5 EnrichmentMapParameters (org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters)5 ImmutableSet (com.google.common.collect.ImmutableSet)4 GSEAResult (org.baderlab.csplugins.enrichmentmap.model.GSEAResult)4 SetOfEnrichmentResults (org.baderlab.csplugins.enrichmentmap.model.SetOfEnrichmentResults)3 EMCreationParameters (org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters)2 Ranking (org.baderlab.csplugins.enrichmentmap.model.Ranking)2 CyRow (org.cytoscape.model.CyRow)2 HashMap (java.util.HashMap)1 Set (java.util.Set)1 BaseIntegrationTest (org.baderlab.csplugins.enrichmentmap.integration.BaseIntegrationTest)1 SessionFile (org.baderlab.csplugins.enrichmentmap.integration.SessionFile)1