Search in sources :

Example 16 with DataSetFiles

use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.

the class GSEAResolver method resolveRPT.

private static Optional<DataSetParameters> resolveRPT(Path gseaFolder, Path rptFile) {
    Optional<Map<String, String>> optParams = parseRPTParameters(rptFile);
    if (!optParams.isPresent())
        return Optional.empty();
    Map<String, String> params = optParams.get();
    // Attempt to resolve the files from the RPT
    Optional<Path> gmtPath = getRptGmt(gseaFolder, params);
    String[] phenotypes = getRptPhenotypes(params);
    Optional<Path> classes = getRptClassFile(params);
    String timestamp = params.get("producer_timestamp");
    String results1FileName = "gsea_report_for_" + phenotypes[0] + "_" + timestamp + ".xls";
    String results2FileName = "gsea_report_for_" + phenotypes[1] + "_" + timestamp + ".xls";
    String rnkFileName = "ranked_gene_list_" + phenotypes[0] + "_versus_" + phenotypes[1] + "_" + timestamp + ".xls";
    Optional<Path> results1 = getRptResultsFile(gseaFolder, results1FileName, params);
    Optional<Path> results2 = getRptResultsFile(gseaFolder, results2FileName, params);
    Optional<Path> rnk = getRptResultsFile(gseaFolder, rnkFileName, params);
    Optional<Path> expr = getRptExpressionFile(params);
    if (!gmtPath.isPresent() && !results1.isPresent() && !results2.isPresent() && !rnk.isPresent() && !expr.isPresent())
        return Optional.empty();
    DataSetFiles files = new DataSetFiles();
    files.setPhenotype1(phenotypes[0]);
    files.setPhenotype2(phenotypes[1]);
    gmtPath.ifPresent(path -> files.setGMTFileName(path.toString()));
    results1.ifPresent(path -> files.setEnrichmentFileName1(path.toString()));
    results2.ifPresent(path -> files.setEnrichmentFileName2(path.toString()));
    rnk.ifPresent(path -> files.setRankedFile(path.toString()));
    expr.ifPresent(path -> files.setExpressionFileName(path.toString()));
    classes.ifPresent(path -> files.setClassFile(path.toString()));
    return Optional.of(new DataSetParameters(getDatasetNameGSEA(gseaFolder), Method.GSEA, files));
}
Also used : Path(java.nio.file.Path) Map(java.util.Map) HashMap(java.util.HashMap) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles)

Example 17 with DataSetFiles

use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.

the class DataSetResolver method createDataSets.

private static List<DataSetParameters> createDataSets(Map<Type, List<Path>> types) {
    List<DataSetParameters> dataSets = new ArrayList<>();
    // All GSEA results are fine
    for (Path gseaFolder : types.get(Type.GSEA_FOLDER)) {
        Optional<DataSetParameters> gseaDataSet = GSEAResolver.resolveGSEAResultsFolder(gseaFolder);
        if (gseaDataSet.isPresent())
            dataSets.add(gseaDataSet.get());
    }
    // Now, iterate over Enrichments, and try to pair up with Ranks and Expressions
    // MKTODO add other enrichment types
    List<Path> expressionFiles = new ArrayList<>(types.get(Type.EXPRESSION));
    List<Path> rankFiles = new ArrayList<>(types.get(Type.RANKS));
    // MKTODO what about other enrichment types?
    for (Path enrichment : types.get(Type.ENRICHMENT_GENERIC)) {
        DataSetFiles files = new DataSetFiles();
        files.setEnrichmentFileName1(enrichment.toAbsolutePath().toString());
        Optional<Path> closestExpression = findClosestMatch(enrichment, expressionFiles);
        Optional<Path> closestRanks = findClosestMatch(enrichment, rankFiles);
        closestExpression.ifPresent(path -> {
            expressionFiles.remove(path);
            files.setExpressionFileName(path.toAbsolutePath().toString());
        });
        closestRanks.ifPresent(path -> {
            rankFiles.remove(path);
            files.setRankedFile(path.toAbsolutePath().toString());
        });
        String name = getDatasetNameGeneric(enrichment.getFileName());
        dataSets.add(new DataSetParameters(name, Method.Generic, files));
    }
    return dataSets;
}
Also used : Path(java.nio.file.Path) ArrayList(java.util.ArrayList) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles)

Example 18 with DataSetFiles

use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.

the class FileReaderTest method testDavidEnrichmentsReader.

//test David enrichment results reader
@Test
public void testDavidEnrichmentsReader(Provider<EnrichmentMapParameters> empFactory) throws Exception {
    //load the test enrichment files - Bingo
    String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/DavidResults.txt";
    //create a new instance of the parameters
    EnrichmentMapParameters params = empFactory.get();
    //set enrichment file name 
    params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
    //Create a new Enrichment map
    EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
    //get the default dataset
    Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
    DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
    EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
    ParseDavidEnrichmentResults task = new ParseDavidEnrichmentResults(dataset);
    task.run(taskMonitor);
    //Get the enrichment
    Map<String, EnrichmentResult> enrichments = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
    assertEquals(215, enrichments.size());
    //check p-values
    assertEquals(0.00000005210169741980237, ((GenericResult) enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getPvalue(), 0.0);
    //check fdr value
    assertEquals(0.000016724505445320226, ((GenericResult) enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getFdrqvalue(), 0.0);
    //check geneset siz
    assertEquals(95, ((GenericResult) enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getGsSize());
    //check p-values
    assertEquals(0.0009179741851709047, ((GenericResult) enrichments.get(((String) "domain:Leucine-zipper").toUpperCase())).getPvalue(), 0.0);
    //check fdr value
    assertEquals(0.46717397126592464, ((GenericResult) enrichments.get(((String) "domain:Leucine-zipper").toUpperCase())).getFdrqvalue(), 0.0);
    //check geneset siz
    assertEquals(11, ((GenericResult) enrichments.get(((String) "domain:Leucine-zipper").toUpperCase())).getGsSize());
}
Also used : EnrichmentResult(org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult) EnrichmentMapParameters(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) Method(org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) Test(org.junit.Test)

Example 19 with DataSetFiles

use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.

the class FileReaderTest method testGMTFileReader.

@Test
public void testGMTFileReader(Provider<EnrichmentMapParameters> empFactory) throws Exception {
    String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/Genesetstestfile.gmt";
    //create a new instance of the parameters
    EnrichmentMapParameters params = empFactory.get();
    //set gmt file name 
    params.getFiles().get(LegacySupport.DATASET1).setGMTFileName(testDataFileName);
    //Create a new Enrichment map
    EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
    //get the default dataset
    Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
    DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
    EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
    //set up task
    GMTFileReaderTask task = new GMTFileReaderTask(dataset);
    task.run(taskMonitor);
    //test to make sure that the file loaded in 10 genesets with a total of 75 genes
    assertEquals(10, map.getAllGeneSets().size());
    assertEquals(75, map.getNumberOfGenes());
}
Also used : EnrichmentMapParameters(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) Method(org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) Test(org.junit.Test)

Example 20 with DataSetFiles

use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.

the class LoadBingoResultsTest method testLoadBingoResult_withoutexpression.

@Test
public void testLoadBingoResult_withoutexpression() throws Exception {
    //for a dataset we require genesets, an expression file (optional), enrichment results
    String testBingoResultsFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/bingo_output/12Hr_topgenes.bgo";
    DataSetFiles files = new DataSetFiles();
    files.setEnrichmentFileName1(testBingoResultsFileName);
    //set the method to Bingo
    double pvalue = 0.00005;
    // 5.0 X 10-8
    double qvaule = 0.00000005;
    double similarityCutoff = 0.25;
    EMCreationParameters params = new EMCreationParameters("EM1_", pvalue, qvaule, NESFilter.ALL, Optional.empty(), SimilarityMetric.JACCARD, similarityCutoff, 0.5);
    //create an new enrichment Map
    EnrichmentMap em = new EnrichmentMap(params, serviceRegistrar);
    EMDataSet dataset = em.createDataSet(LegacySupport.DATASET1, Method.Specialized, files);
    ParseBingoEnrichmentResults enrichmentResultsFilesTask = new ParseBingoEnrichmentResults(dataset);
    enrichmentResultsFilesTask.run(taskMonitor);
    CreateDummyExpressionTask dummyExpressionTask = new CreateDummyExpressionTask(dataset);
    dummyExpressionTask.run(taskMonitor);
    em.filterGenesets();
    InitializeGenesetsOfInterestTask genesets_init = new InitializeGenesetsOfInterestTask(em);
    genesets_init.run(taskMonitor);
    Baton<Map<SimilarityKey, GenesetSimilarity>> baton = new Baton<>();
    ComputeSimilarityTaskParallel similarities = new ComputeSimilarityTaskParallel(em, baton.consumer());
    similarities.run(taskMonitor);
    //check to see if the dataset loaded - there should be 74 genesets
    assertEquals(74, dataset.getSetOfGeneSets().getGeneSets().size());
    //there should also be 74 enrichments (the genesets are built from the bgo file)
    assertEquals(74, dataset.getEnrichments().getEnrichments().size());
    //there should be 11 genesets in the enrichments of interest
    assertEquals(5, dataset.getGeneSetsOfInterest().getGeneSets().size());
    //there should be 6 edges
    assertEquals(6, baton.supplier().get().size());
    //there should be a total of 366 genes
    assertEquals(446, em.getNumberOfGenes());
    //there should be 43 genes in the geneset "nucleolus"
    assertEquals(43, em.getAllGeneSets().get("NUCLEOLUS").getGenes().size());
    //make sure the dummy expression has values for all the genes
    assertEquals(446, dataset.getExpressionSets().getNumGenes());
    assertEquals(446, dataset.getDataSetGenes().size());
}
Also used : EMCreationParameters(org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) EMDataSet(org.baderlab.csplugins.enrichmentmap.model.EMDataSet) Baton(org.baderlab.csplugins.enrichmentmap.util.Baton) ParseBingoEnrichmentResults(org.baderlab.csplugins.enrichmentmap.parsers.ParseBingoEnrichmentResults) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) Map(java.util.Map) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) Test(org.junit.Test)

Aggregations

DataSetFiles (org.baderlab.csplugins.enrichmentmap.model.DataSetFiles)33 EnrichmentMap (org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap)21 Test (org.junit.Test)21 EMDataSet (org.baderlab.csplugins.enrichmentmap.model.EMDataSet)20 EMCreationParameters (org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters)14 Method (org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method)14 EnrichmentMapParameters (org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters)11 Map (java.util.Map)6 EnrichmentResult (org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult)6 DataSetParameters (org.baderlab.csplugins.enrichmentmap.resolver.DataSetParameters)5 CyNetwork (org.cytoscape.model.CyNetwork)5 GMTFileReaderTask (org.baderlab.csplugins.enrichmentmap.parsers.GMTFileReaderTask)4 Baton (org.baderlab.csplugins.enrichmentmap.util.Baton)4 ArrayList (java.util.ArrayList)3 ExpressionFileReaderTask (org.baderlab.csplugins.enrichmentmap.parsers.ExpressionFileReaderTask)3 File (java.io.File)2 Path (java.nio.file.Path)2 HashMap (java.util.HashMap)2 BaseIntegrationTest (org.baderlab.csplugins.enrichmentmap.integration.BaseIntegrationTest)2 SimilarityMetric (org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters.SimilarityMetric)2