use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.
the class GSEAResolver method resolveRPT.
private static Optional<DataSetParameters> resolveRPT(Path gseaFolder, Path rptFile) {
Optional<Map<String, String>> optParams = parseRPTParameters(rptFile);
if (!optParams.isPresent())
return Optional.empty();
Map<String, String> params = optParams.get();
// Attempt to resolve the files from the RPT
Optional<Path> gmtPath = getRptGmt(gseaFolder, params);
String[] phenotypes = getRptPhenotypes(params);
Optional<Path> classes = getRptClassFile(params);
String timestamp = params.get("producer_timestamp");
String results1FileName = "gsea_report_for_" + phenotypes[0] + "_" + timestamp + ".xls";
String results2FileName = "gsea_report_for_" + phenotypes[1] + "_" + timestamp + ".xls";
String rnkFileName = "ranked_gene_list_" + phenotypes[0] + "_versus_" + phenotypes[1] + "_" + timestamp + ".xls";
Optional<Path> results1 = getRptResultsFile(gseaFolder, results1FileName, params);
Optional<Path> results2 = getRptResultsFile(gseaFolder, results2FileName, params);
Optional<Path> rnk = getRptResultsFile(gseaFolder, rnkFileName, params);
Optional<Path> expr = getRptExpressionFile(params);
if (!gmtPath.isPresent() && !results1.isPresent() && !results2.isPresent() && !rnk.isPresent() && !expr.isPresent())
return Optional.empty();
DataSetFiles files = new DataSetFiles();
files.setPhenotype1(phenotypes[0]);
files.setPhenotype2(phenotypes[1]);
gmtPath.ifPresent(path -> files.setGMTFileName(path.toString()));
results1.ifPresent(path -> files.setEnrichmentFileName1(path.toString()));
results2.ifPresent(path -> files.setEnrichmentFileName2(path.toString()));
rnk.ifPresent(path -> files.setRankedFile(path.toString()));
expr.ifPresent(path -> files.setExpressionFileName(path.toString()));
classes.ifPresent(path -> files.setClassFile(path.toString()));
return Optional.of(new DataSetParameters(getDatasetNameGSEA(gseaFolder), Method.GSEA, files));
}
use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.
the class DataSetResolver method createDataSets.
private static List<DataSetParameters> createDataSets(Map<Type, List<Path>> types) {
List<DataSetParameters> dataSets = new ArrayList<>();
// All GSEA results are fine
for (Path gseaFolder : types.get(Type.GSEA_FOLDER)) {
Optional<DataSetParameters> gseaDataSet = GSEAResolver.resolveGSEAResultsFolder(gseaFolder);
if (gseaDataSet.isPresent())
dataSets.add(gseaDataSet.get());
}
// Now, iterate over Enrichments, and try to pair up with Ranks and Expressions
// MKTODO add other enrichment types
List<Path> expressionFiles = new ArrayList<>(types.get(Type.EXPRESSION));
List<Path> rankFiles = new ArrayList<>(types.get(Type.RANKS));
// MKTODO what about other enrichment types?
for (Path enrichment : types.get(Type.ENRICHMENT_GENERIC)) {
DataSetFiles files = new DataSetFiles();
files.setEnrichmentFileName1(enrichment.toAbsolutePath().toString());
Optional<Path> closestExpression = findClosestMatch(enrichment, expressionFiles);
Optional<Path> closestRanks = findClosestMatch(enrichment, rankFiles);
closestExpression.ifPresent(path -> {
expressionFiles.remove(path);
files.setExpressionFileName(path.toAbsolutePath().toString());
});
closestRanks.ifPresent(path -> {
rankFiles.remove(path);
files.setRankedFile(path.toAbsolutePath().toString());
});
String name = getDatasetNameGeneric(enrichment.getFileName());
dataSets.add(new DataSetParameters(name, Method.Generic, files));
}
return dataSets;
}
use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.
the class FileReaderTest method testDavidEnrichmentsReader.
//test David enrichment results reader
@Test
public void testDavidEnrichmentsReader(Provider<EnrichmentMapParameters> empFactory) throws Exception {
//load the test enrichment files - Bingo
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/DavidResults.txt";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set enrichment file name
params.getFiles().get(LegacySupport.DATASET1).setEnrichmentFileName1(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
ParseDavidEnrichmentResults task = new ParseDavidEnrichmentResults(dataset);
task.run(taskMonitor);
//Get the enrichment
Map<String, EnrichmentResult> enrichments = map.getDataSet(LegacySupport.DATASET1).getEnrichments().getEnrichments();
assertEquals(215, enrichments.size());
//check p-values
assertEquals(0.00000005210169741980237, ((GenericResult) enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getPvalue(), 0.0);
//check fdr value
assertEquals(0.000016724505445320226, ((GenericResult) enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getFdrqvalue(), 0.0);
//check geneset siz
assertEquals(95, ((GenericResult) enrichments.get("GO:0031974~MEMBRANE-ENCLOSED LUMEN")).getGsSize());
//check p-values
assertEquals(0.0009179741851709047, ((GenericResult) enrichments.get(((String) "domain:Leucine-zipper").toUpperCase())).getPvalue(), 0.0);
//check fdr value
assertEquals(0.46717397126592464, ((GenericResult) enrichments.get(((String) "domain:Leucine-zipper").toUpperCase())).getFdrqvalue(), 0.0);
//check geneset siz
assertEquals(11, ((GenericResult) enrichments.get(((String) "domain:Leucine-zipper").toUpperCase())).getGsSize());
}
use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.
the class FileReaderTest method testGMTFileReader.
@Test
public void testGMTFileReader(Provider<EnrichmentMapParameters> empFactory) throws Exception {
String testDataFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/Genesetstestfile.gmt";
//create a new instance of the parameters
EnrichmentMapParameters params = empFactory.get();
//set gmt file name
params.getFiles().get(LegacySupport.DATASET1).setGMTFileName(testDataFileName);
//Create a new Enrichment map
EnrichmentMap map = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the default dataset
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
DataSetFiles files = params.getFiles().get(LegacySupport.DATASET1);
EMDataSet dataset = map.createDataSet(LegacySupport.DATASET1, method, files);
//set up task
GMTFileReaderTask task = new GMTFileReaderTask(dataset);
task.run(taskMonitor);
//test to make sure that the file loaded in 10 genesets with a total of 75 genes
assertEquals(10, map.getAllGeneSets().size());
assertEquals(75, map.getNumberOfGenes());
}
use of org.baderlab.csplugins.enrichmentmap.model.DataSetFiles in project EnrichmentMapApp by BaderLab.
the class LoadBingoResultsTest method testLoadBingoResult_withoutexpression.
@Test
public void testLoadBingoResult_withoutexpression() throws Exception {
//for a dataset we require genesets, an expression file (optional), enrichment results
String testBingoResultsFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/bingo_output/12Hr_topgenes.bgo";
DataSetFiles files = new DataSetFiles();
files.setEnrichmentFileName1(testBingoResultsFileName);
//set the method to Bingo
double pvalue = 0.00005;
// 5.0 X 10-8
double qvaule = 0.00000005;
double similarityCutoff = 0.25;
EMCreationParameters params = new EMCreationParameters("EM1_", pvalue, qvaule, NESFilter.ALL, Optional.empty(), SimilarityMetric.JACCARD, similarityCutoff, 0.5);
//create an new enrichment Map
EnrichmentMap em = new EnrichmentMap(params, serviceRegistrar);
EMDataSet dataset = em.createDataSet(LegacySupport.DATASET1, Method.Specialized, files);
ParseBingoEnrichmentResults enrichmentResultsFilesTask = new ParseBingoEnrichmentResults(dataset);
enrichmentResultsFilesTask.run(taskMonitor);
CreateDummyExpressionTask dummyExpressionTask = new CreateDummyExpressionTask(dataset);
dummyExpressionTask.run(taskMonitor);
em.filterGenesets();
InitializeGenesetsOfInterestTask genesets_init = new InitializeGenesetsOfInterestTask(em);
genesets_init.run(taskMonitor);
Baton<Map<SimilarityKey, GenesetSimilarity>> baton = new Baton<>();
ComputeSimilarityTaskParallel similarities = new ComputeSimilarityTaskParallel(em, baton.consumer());
similarities.run(taskMonitor);
//check to see if the dataset loaded - there should be 74 genesets
assertEquals(74, dataset.getSetOfGeneSets().getGeneSets().size());
//there should also be 74 enrichments (the genesets are built from the bgo file)
assertEquals(74, dataset.getEnrichments().getEnrichments().size());
//there should be 11 genesets in the enrichments of interest
assertEquals(5, dataset.getGeneSetsOfInterest().getGeneSets().size());
//there should be 6 edges
assertEquals(6, baton.supplier().get().size());
//there should be a total of 366 genes
assertEquals(446, em.getNumberOfGenes());
//there should be 43 genes in the geneset "nucleolus"
assertEquals(43, em.getAllGeneSets().get("NUCLEOLUS").getGenes().size());
//make sure the dummy expression has values for all the genes
assertEquals(446, dataset.getExpressionSets().getNumGenes());
assertEquals(446, dataset.getDataSetGenes().size());
}
Aggregations