use of org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters in project EnrichmentMapApp by BaderLab.
the class EMBuildCommandTask method buildEnrichmentMap.
/**
* buildEnrichmentMap - parses all GSEA input files and creates an enrichment map
*/
public void buildEnrichmentMap() {
// Note we must continue to use the old constants from EnrichmentMapParameters for backwards compatibility
Method method = EnrichmentMapParameters.stringToMethod(analysisType.getSelectedValue());
SimilarityMetric metric = EnrichmentMapParameters.stringToSimilarityMetric(coeffecients.getSelectedValue());
//Set Dataset1 Files
DataSetFiles dataset1files = new DataSetFiles();
if (gmtFile != null)
dataset1files.setGMTFileName(gmtFile.getAbsolutePath());
if (expressionDataset1 != null)
dataset1files.setExpressionFileName(expressionDataset1.getAbsolutePath());
if (enrichmentsDataset1 != null)
dataset1files.setEnrichmentFileName1(enrichmentsDataset1.getAbsolutePath());
if (enrichments2Dataset1 != null)
dataset1files.setEnrichmentFileName2(enrichments2Dataset1.getAbsolutePath());
if (ranksDataset1 != null)
dataset1files.setRankedFile(ranksDataset1.getAbsolutePath());
if (classDataset1 != null)
dataset1files.setClassFile(classDataset1.getAbsolutePath());
if (phenotype1Dataset1 != null)
dataset1files.setPhenotype1(phenotype1Dataset1);
if (phenotype2Dataset1 != null)
dataset1files.setPhenotype2(phenotype2Dataset1);
//Set Dataset2 Files
DataSetFiles dataset2files = new DataSetFiles();
if (expressionDataset2 != null)
dataset2files.setExpressionFileName(expressionDataset2.getAbsolutePath());
if (enrichmentsDataset2 != null)
dataset2files.setEnrichmentFileName1(enrichmentsDataset2.getAbsolutePath());
if (enrichments2Dataset2 != null)
dataset2files.setEnrichmentFileName2(enrichments2Dataset2.getAbsolutePath());
if (ranksDataset2 != null)
dataset2files.setRankedFile(ranksDataset2.getAbsolutePath());
if (classDataset2 != null)
dataset2files.setClassFile(classDataset2.getAbsolutePath());
if (phenotype1Dataset2 != null)
dataset2files.setPhenotype1(phenotype1Dataset2);
if (phenotype2Dataset2 != null)
dataset2files.setPhenotype2(phenotype2Dataset2);
List<DataSetParameters> dataSets = new ArrayList<>(2);
dataSets.add(new DataSetParameters(LegacySupport.DATASET1, method, dataset1files));
if (!dataset2files.isEmpty()) {
dataSets.add(new DataSetParameters(LegacySupport.DATASET2, method, dataset2files));
}
String prefix = legacySupport.getNextAttributePrefix();
EMCreationParameters creationParams = new EMCreationParameters(prefix, pvalue, qvalue, NESFilter.ALL, Optional.empty(), metric, similaritycutoff, propertyManager.getDefaultCombinedConstant());
if (distinctEdges != null)
creationParams.setCreateDistinctEdges(distinctEdges);
else if (!dataset2files.isEmpty())
creationParams.setCreateDistinctEdges(true);
CreateEnrichmentMapTaskFactory taskFactory = taskFactoryFactory.create(creationParams, dataSets);
insertTasksAfterCurrentTask(taskFactory.createTaskIterator());
}
use of org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters in project EnrichmentMapApp by BaderLab.
the class ParseGREATEnrichmentResults method parseLines.
@Override
public void parseLines(List<String> lines, EMDataSet dataset, TaskMonitor taskMonitor) {
if (taskMonitor == null)
taskMonitor = new NullTaskMonitor();
taskMonitor.setTitle("Parsing Enrichment Result file");
boolean hasBackground = false;
EMCreationParameters params = dataset.getMap().getParams();
//Get the type of filter user specified on the GREAT results
//If it is hyper use column 14 Hypergeometric p-value and 16 FDR for hyper
//If it is binom use column 5 bionomial p-value and 7 FDR for binom
//If they specify both use the highest p-value and q-value from the above columns
GreatFilter filterType = dataset.getMap().getParams().getGreatFilter();
Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
EnrichmentMap map = dataset.getMap();
Map<String, EnrichmentResult> results = dataset.getEnrichments().getEnrichments();
int currentProgress = 0;
int maxValue = lines.size();
taskMonitor.setStatusMessage("Parsing Great Results file - " + maxValue + " rows");
//for great files there is an FDR
dataset.getMap().getParams().setFDR(true);
//skip the first l9 which just has the field names (start i=1)
//check to see how many columns the data has
//go through each line until we find the header line
int k = 0;
String line = lines.get(k);
String[] tokens = line.split("\t");
for (; k < lines.size(); k++) {
line = lines.get(k);
tokens = line.split("\t");
int length = tokens.length;
if ((length == 24) && tokens[3].equalsIgnoreCase("BinomRank")) {
break;
} else //There is not binom rank and no binomial data.
if ((length == 20) && tokens[3].equalsIgnoreCase("Rank")) {
hasBackground = true;
break;
}
}
//go through the rest of the lines
for (int i = k + 1; i < lines.size(); i++) {
line = lines.get(i);
tokens = line.split("\t");
//there are extra lines at the end of the file that should be ignored.
if (!hasBackground && tokens.length != 24)
continue;
if (hasBackground && tokens.length != 20)
continue;
double pvalue = 1.0;
double FDRqvalue = 1.0;
GenericResult result;
int gs_size = 0;
double NES = 1.0;
//details of export file
//http://bejerano.stanford.edu/help/display/GREAT/Export
//The second column of the file is the name of the geneset
final String name = tokens[1].trim() + "-" + tokens[2].trim();
//the first column of the file is the description
final String description = tokens[2].trim();
//when there are two different species it is possible that the gene set could
//already exist in the set of genesets. if it does exist then add the genes
//in this set to the geneset
ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
if (genesets.containsKey(name))
builder = builder.addAll(genesets.get(name).getGenes());
String[] gene_tokens;
if (!hasBackground)
gene_tokens = tokens[23].split(",");
else
gene_tokens = tokens[18].split(",");
//All subsequent fields in the list are the geneset associated with this geneset.
for (int j = 0; j < gene_tokens.length; j++) {
String gene = gene_tokens[j].toUpperCase();
//if it is already in the hash then get its associated key and put it into the set of genes
if (map.containsGene(gene)) {
builder.add(map.getHashFromGene(gene));
} else if (!gene.isEmpty()) {
Integer hash = map.addGene(gene).get();
builder.add(hash);
}
}
//finished parsing that geneset
//add the current geneset to the hashmap of genesets
GeneSet gs = new GeneSet(name, description, builder.build());
genesets.put(name, gs);
//There are two tests run by GREAT, the binomial on regions and the hypergeometric based on genes
//The first pass of results shows only those that are significant both
//The user can then choose to use either or both together
//
//If it is hyper use column 14 Hypergeometric p-value and 16 FDR for hyper
//If it is binom use column 5 bionomial p-value and 7 FDR for binom
//If they specify both use the highest p-value and q-value from the above columns
double hyper_pvalue = 1;
double hyper_fdr = 1;
double binom_pvalue = 1;
double binom_fdr = 1;
if (!hasBackground) {
if (!tokens[4].equalsIgnoreCase(""))
binom_pvalue = Double.parseDouble(tokens[4]);
if (!tokens[6].equalsIgnoreCase(""))
binom_fdr = Double.parseDouble(tokens[6]);
if (!tokens[13].equalsIgnoreCase(""))
hyper_pvalue = Double.parseDouble(tokens[13]);
if (!tokens[15].equalsIgnoreCase(""))
hyper_fdr = Double.parseDouble(tokens[15]);
} else {
if (!tokens[4].equalsIgnoreCase(""))
hyper_pvalue = Double.parseDouble(tokens[4]);
if (!tokens[6].equalsIgnoreCase(""))
hyper_fdr = Double.parseDouble(tokens[6]);
}
if (filterType == GreatFilter.HYPER) {
pvalue = hyper_pvalue;
FDRqvalue = hyper_fdr;
} else if (filterType == GreatFilter.BINOM) {
pvalue = binom_pvalue;
FDRqvalue = binom_fdr;
} else if (filterType == GreatFilter.BOTH) {
pvalue = Math.max(hyper_pvalue, binom_pvalue);
FDRqvalue = Math.max(hyper_fdr, binom_fdr);
} else if (filterType == GreatFilter.EITHER) {
pvalue = Math.min(hyper_pvalue, binom_pvalue);
FDRqvalue = Math.min(hyper_fdr, binom_fdr);
} else {
System.out.println("Invalid attribute setting for GREAT p-value specification");
}
//Keep track of minimum p-value to better calculate jslider
if (pvalue < params.getPvalueMin())
params.setPvalueMin(pvalue);
if (FDRqvalue < params.getQvalueMin())
params.setQvalueMin(FDRqvalue);
//If this is a background set then it is in the 16th column
if ((!hasBackground) && (!tokens[19].equalsIgnoreCase("")))
gs_size = Integer.parseInt(tokens[19]);
else if ((hasBackground) && (!tokens[15].equalsIgnoreCase("")))
gs_size = Integer.parseInt(tokens[15]);
result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue);
// Calculate Percentage. This must be a value between 0..100.
int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
taskMonitor.setProgress(percentComplete);
currentProgress++;
//check to see if the gene set has already been entered in the results
//it is possible that one geneset will be in both phenotypes.
//if it is already exists then we want to make sure the one retained is the result with the
//lower p-value.
//ticket #149
GenericResult temp = (GenericResult) results.get(name);
if (temp == null)
results.put(name, result);
else {
if (result.getPvalue() < temp.getPvalue())
results.put(name, result);
}
}
}
use of org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters in project EnrichmentMapApp by BaderLab.
the class LoadBingoResultsTest method testLoad2BingoResult_withoutexpression.
@Test
public void testLoad2BingoResult_withoutexpression() throws Exception {
//for a dataset we require genesets, an expression file (optional), enrichment results
String testBingoResultsFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/bingo_output/12Hr_topgenes.bgo";
DataSetFiles files = new DataSetFiles();
files.setEnrichmentFileName1(testBingoResultsFileName);
//for a dataset we require genesets, an expression file (optional), enrichment results
String testBingoResultsFileName2 = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/bingo_output/24Hr_topgenes.bgo";
DataSetFiles files2 = new DataSetFiles();
files2.setEnrichmentFileName1(testBingoResultsFileName2);
//set the method to Bingo
double pvalue = 0.00005;
// 5.0 X 10-8
double qvaule = 0.00000005;
double similarityCutoff = 0.25;
EMCreationParameters params = new EMCreationParameters("EM1_", pvalue, qvaule, NESFilter.ALL, Optional.empty(), SimilarityMetric.JACCARD, similarityCutoff, 0.5);
//create an new enrichment Map
EnrichmentMap em = new EnrichmentMap(params, serviceRegistrar);
EMDataSet dataset = em.createDataSet(LegacySupport.DATASET1, Method.Specialized, files);
ParseBingoEnrichmentResults enrichmentResultsFilesTask = new ParseBingoEnrichmentResults(dataset);
enrichmentResultsFilesTask.run(taskMonitor);
//Load second dataset
//create a dataset
EMDataSet dataset2 = em.createDataSet(LegacySupport.DATASET2, Method.Specialized, files2);
//create a DatasetTask
ParseBingoEnrichmentResults enrichmentResultsFiles2Task = new ParseBingoEnrichmentResults(dataset2);
enrichmentResultsFiles2Task.run(taskMonitor);
CreateDummyExpressionTask dummyExpressionTask = new CreateDummyExpressionTask(dataset);
dummyExpressionTask.run(taskMonitor);
CreateDummyExpressionTask dummyExpressionTask2 = new CreateDummyExpressionTask(dataset2);
dummyExpressionTask2.run(taskMonitor);
//check to see if the two datasets are distinct
if (!((dataset.getDataSetGenes().containsAll(dataset2.getDataSetGenes())) && (dataset2.getDataSetGenes().containsAll(dataset.getDataSetGenes()))))
em.setDistinctExpressionSets(true);
em.filterGenesets();
InitializeGenesetsOfInterestTask genesets_init = new InitializeGenesetsOfInterestTask(em);
genesets_init.run(taskMonitor);
// ComputeSimilarityTask similarities = new ComputeSimilarityTask(em);
// similarities.run(taskMonitor);
dataset = em.getDataSet(LegacySupport.DATASET1);
//get the stats for the first dataset
//check to see if the dataset loaded - there should be 74 genesets
assertEquals(74, dataset.getSetOfGeneSets().getGeneSets().size());
//there should also be 74 enrichments (the genesets are built from the bgo file)
assertEquals(74, dataset.getEnrichments().getEnrichments().size());
//there should be 11 genesets in the enrichments of interest
assertEquals(5, dataset.getGeneSetsOfInterest().getGeneSets().size());
//there should be 43 genes in the geneset "nucleolus"
assertEquals(43, dataset.getSetOfGeneSets().getGeneSets().get("NUCLEOLUS").getGenes().size());
//make sure the dummy expression has values for all the genes
assertEquals(446, dataset.getExpressionSets().getNumGenes());
assertEquals(446, dataset.getDataSetGenes().size());
dataset2 = em.getDataSet(LegacySupport.DATASET2);
//check the stats for dataset2
//check to see if the dataset loaded - there should be 74 genesets
assertEquals(87, dataset2.getSetOfGeneSets().getGeneSets().size());
//there should also be 74 enrichments (the genesets are built from the bgo file)
assertEquals(87, dataset2.getEnrichments().getEnrichments().size());
//there should be 11 genesets in the enrichments of interest
assertEquals(2, dataset2.getGeneSetsOfInterest().getGeneSets().size());
//there should be 43 genes in the geneset "nucleolus"
assertEquals(318, dataset2.getSetOfGeneSets().getGeneSets().get("INTRACELLULAR").getGenes().size());
//make sure the dummy expression has values for all the genes
assertEquals(398, dataset2.getExpressionSets().getNumGenes());
assertEquals(398, dataset2.getDataSetGenes().size());
//there should be 20 edges (2 edges for every node because of the distinct expresison sets)
//assertEquals(24,em.getGenesetSimilarity().size());
//there should be a total of 366 genes
assertEquals(704, em.getNumberOfGenes());
}
use of org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters in project EnrichmentMapApp by BaderLab.
the class LoadEdbDatasetTest method testEdbLoad.
@Test
public void testEdbLoad() throws Exception {
//for a dataset we require genesets, an expression file (optional), enrichment results
String testEdbResultsFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/LoadDataset/GSEA_example_results/edb/results.edb";
String testgmtFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/LoadDataset/GSEA_example_results/edb/gene_sets.gmt";
String testrnkFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/LoadDataset/GSEA_example_results/edb/Expressionfile.rnk";
DataSetFiles files = new DataSetFiles();
files.setEnrichmentFileName1(testEdbResultsFileName);
files.setGMTFileName(testgmtFileName);
files.setRankedFile(testrnkFileName);
//set the method to gsea
double similarityCutoff = 0.5;
double pvalue = 1.0;
double qvalue = 1.0;
EMCreationParameters params = new EMCreationParameters("EM1_", pvalue, qvalue, NESFilter.ALL, Optional.empty(), SimilarityMetric.JACCARD, similarityCutoff, 0.5);
//create an new enrichment Map
EnrichmentMap em = new EnrichmentMap(params, serviceRegistrar);
//Load data set
//create a dataset
EMDataSet dataset = em.createDataSet(LegacySupport.DATASET1, Method.GSEA, files);
//create a DatasetTask
//create a DatasetTask
//load Data
GMTFileReaderTask task = new GMTFileReaderTask(dataset);
task.run(taskMonitor);
ParseEDBEnrichmentResults enrichmentResultsFilesTask = new ParseEDBEnrichmentResults(dataset);
enrichmentResultsFilesTask.run(taskMonitor);
//create dummy expression
CreateDummyExpressionTask dummyExpressionTask = new CreateDummyExpressionTask(dataset);
dummyExpressionTask.run(taskMonitor);
em.filterGenesets();
InitializeGenesetsOfInterestTask genesets_init = new InitializeGenesetsOfInterestTask(em);
genesets_init.run(taskMonitor);
//check to see if the dataset loaded
//although the original analysis had 193 genesets because this is loaded from
//edb version it only stores the genesets that overlapped with the dataset analyzed.
assertEquals(14, dataset.getSetOfGeneSets().getGeneSets().size());
assertEquals(14, dataset.getEnrichments().getEnrichments().size());
assertEquals(41, dataset.getDataSetGenes().size());
assertEquals(41, dataset.getExpressionSets().getNumGenes());
}
use of org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters in project EnrichmentMapApp by BaderLab.
the class LegacySessionLoadTest method test_1_LoadedLegacyData.
@Test
@SessionFile("em_session_2.2.cys")
public void test_1_LoadedLegacyData() throws Exception {
EnrichmentMap map = getEnrichmentMap();
assertEquals("EM1_Enrichment Map", map.getName());
CyNetwork network = networkManager.getNetwork(map.getNetworkID());
assertNotNull(network);
assertEquals(1, map.getDataSetCount());
assertEquals(14067, map.getNumberOfGenes());
assertEquals(14067, map.getAllGenes().size());
// Number of edges: 3339 - that's how many geneset similarity objects there should be!!!
CyTable edgeTable = network.getDefaultEdgeTable();
assertEquals(3339, edgeTable.getRowCount());
EMCreationParameters params = map.getParams();
String prefix = params.getAttributePrefix();
assertEquals("EM1_", prefix);
assertEquals(0.5, params.getCombinedConstant(), 0.0);
assertFalse(params.isEMgmt());
assertEquals("Geneset_Overlap", params.getEnrichmentEdgeType());
assertTrue(params.isFDR());
assertEquals(GreatFilter.HYPER, params.getGreatFilter());
assertEquals(0.005, params.getPvalue(), 0.0);
assertEquals(1.0, params.getPvalueMin(), 0.0);
assertEquals(0.1, params.getQvalue(), 0.0);
assertEquals(1.0, params.getQvalueMin(), 0.0);
assertEquals(0.5, params.getSimilarityCutoff(), 0.0);
assertEquals(SimilarityMetric.OVERLAP, params.getSimilarityMetric());
// assertFalse(params.isDistinctExpressionSets());
String geneset1 = "RESOLUTION OF SISTER CHROMATID COHESION%REACTOME%REACT_150425.2";
String geneset2 = "CHROMOSOME, CENTROMERIC REGION%GO%GO:0000775";
Collection<CyRow> rows = edgeTable.getMatchingRows(CyNetwork.NAME, geneset1 + " (Geneset_Overlap) " + geneset2);
assertEquals(1, rows.size());
CyRow row = rows.iterator().next();
assertEquals("Geneset_Overlap", row.get(CyEdge.INTERACTION, String.class));
assertEquals(0.6097560975609756, EMStyleBuilder.Columns.EDGE_SIMILARITY_COEFF.get(row, prefix), 0.0);
EMDataSet dataset = map.getDataSet("Dataset 1");
assertNotNull(dataset);
assertSame(map, dataset.getMap());
assertEquals(Method.GSEA, dataset.getMethod());
assertEquals(12653, dataset.getDataSetGenes().size());
assertEquals(389, dataset.getGeneSetsOfInterest().getGeneSets().size());
// assertEquals(17259, dataset.getSetofgenesets().getGenesets().size()); // MKTODO why? what is this used for
assertEndsWith(dataset.getSetOfGeneSets().getFilename(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
for (long suid : dataset.getNodeSuids()) {
assertNotNull(network.getNode(suid));
}
GeneSet geneset = dataset.getGeneSetsOfInterest().getGeneSets().get("NCRNA PROCESSING%GO%GO:0034470");
assertEquals(88, geneset.getGenes().size());
assertEquals("NCRNA PROCESSING%GO%GO:0034470", geneset.getName());
assertEquals("ncRNA processing", geneset.getDescription());
assertEquals(Optional.of("GO"), geneset.getSource());
SetOfEnrichmentResults enrichments = dataset.getEnrichments();
assertEquals(4756, enrichments.getEnrichments().size());
assertEndsWith(enrichments.getFilename1(), "gsea_report_for_ES12_1473194913081.xls");
assertEndsWith(enrichments.getFilename2(), "gsea_report_for_NT12_1473194913081.xls");
assertEquals("ES12", enrichments.getPhenotype1());
assertEquals("NT12", enrichments.getPhenotype2());
EnrichmentResult result = enrichments.getEnrichments().get("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201");
assertTrue(result instanceof GSEAResult);
GSEAResult gseaResult = (GSEAResult) result;
assertEquals("RIBONUCLEOSIDE TRIPHOSPHATE BIOSYNTHETIC PROCESS%GO%GO:0009201", gseaResult.getName());
assertEquals(0.42844063, gseaResult.getES(), 0.0);
assertEquals(0.45225498, gseaResult.getFdrqvalue(), 0.0);
assertEquals(1.0, gseaResult.getFwerqvalue(), 0.0);
assertEquals(23, gseaResult.getGsSize());
assertEquals(1.1938541, gseaResult.getNES(), 0.0);
assertEquals(0.2457786, gseaResult.getPvalue(), 0.0);
assertEquals(4689, gseaResult.getRankAtMax());
assertEquals(Optional.of("GO"), gseaResult.getSource());
GeneExpressionMatrix expressions = dataset.getExpressionSets();
assertEquals(20326, expressions.getExpressionUniverse());
assertEquals(3.686190609, expressions.getClosesttoZero(), 0.0);
// assertEndsWith(expressions.getFilename(), "MCF7_ExprMx_v2_names.gct");
assertEquals(15380.42388, expressions.getMaxExpression(), 0.0);
assertEquals(3.686190609, expressions.getMinExpression(), 0.0);
assertEquals(20, expressions.getNumConditions());
assertEquals(12653, expressions.getExpressionMatrix().size());
assertEquals(12653, expressions.getExpressionMatrix_rowNormalized().size());
GeneExpression expression = expressions.getExpressionMatrix().get(0);
assertEquals("MOCOS", expression.getName());
assertEquals("MOCOS (molybdenum cofactor sulfurase)", expression.getDescription());
assertEquals(18, expression.getExpression().length);
Ranking ranking = expressions.getRanks().get("GSEARanking");
assertEquals(12653, ranking.getAllRanks().size());
assertEquals(12653, ranking.getRanking().size());
Rank rank = ranking.getRanking().get(0);
assertEquals("MOCOS", rank.getName());
assertEquals(1238, rank.getRank().intValue());
assertEquals(0.54488367, rank.getScore(), 0.0);
DataSetFiles files = dataset.getDataSetFiles();
assertEndsWith(files.getClassFile(), "ES_NT.cls");
assertEndsWith(files.getEnrichmentFileName1(), "gsea_report_for_ES12_1473194913081.xls");
assertEndsWith(files.getEnrichmentFileName2(), "gsea_report_for_NT12_1473194913081.xls");
// assertEndsWith(files.getExpressionFileName(), "MCF7_ExprMx_v2_names.gct");
assertEndsWith(files.getGMTFileName(), "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
assertEndsWith(files.getGseaHtmlReportFile(), "estrogen_treatment_12hr_gsea_enrichment_results.Gsea.1473194913081/index.html");
assertEndsWith(files.getRankedFile(), "ranked_gene_list_ES12_versus_NT12_1473194913081.xls");
assertEquals("ES12", files.getPhenotype1());
assertEquals("NT12", files.getPhenotype2());
}
Aggregations