use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class CreateGMTEnrichmentMapTask method buildEnrichmentMap.
public void buildEnrichmentMap() {
dataset.setMethod(Method.Generic);
// in this case all the genesets are of interest
dataset.setGeneSetsOfInterest(dataset.getSetOfGeneSets());
Map<String, GeneSet> currentSets = dataset.getSetOfGeneSets().getGeneSets();
// create an new Set of Enrichment Results
SetOfEnrichmentResults setOfEnrichments = new SetOfEnrichmentResults();
Map<String, EnrichmentResult> currentEnrichments = setOfEnrichments.getEnrichments();
// need also to put all genesets into enrichment results
for (Iterator<String> i = currentSets.keySet().iterator(); i.hasNext(); ) {
String geneset1Name = i.next();
GeneSet gs = currentSets.get(geneset1Name);
GenericResult tempResult = new GenericResult(gs.getName(), gs.getDescription(), 0.01, gs.getGenes().size());
currentEnrichments.put(gs.getName(), tempResult);
}
dataset.setEnrichments(setOfEnrichments);
}
use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class GMTFileReaderTask method parse.
public void parse() throws IOException, InterruptedException {
try (BufferedReader reader = new BufferedReader(new FileReader(gmtFileName))) {
for (String line; (line = reader.readLine()) != null; ) {
if (cancelled) {
throw new InterruptedException();
}
GeneSet gs = readGeneSet(map, line);
if (gs != null && setOfgenesets != null) {
Map<String, GeneSet> genesets = setOfgenesets.getGeneSets();
genesets.put(gs.getName(), gs);
}
}
}
}
use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class ParseBingoEnrichmentResults method parseLines.
@Override
public void parseLines(List<String> lines, EMDataSet dataset, TaskMonitor taskMonitor) {
if (taskMonitor == null)
taskMonitor = new NullTaskMonitor();
taskMonitor.setTitle("Parsing Bingo Enrichment Result file");
//with Bingo results there are no genesets defined. first pass through the file
// needs to parse the genesets
//the bingo file has 20 lines of info at the top of the file before you get to results.
//parameters that can be extracted from Bingo files:
//GO-ID p-value corr p-value x n X N Description Genes in test set
// (column 1 ) GO-Id - is just the numerical part of the GO term (does not contain GO:0000)
//(column 2 ) p-value
//(column 3 ) corr pvalue
//(column 4 ) x - number of genes in the subset of interest with this annotation
//(column 5 ) n - number of genes in the universe with this annotation
//(column 6 ) X - number of genes in the subset
//(column 7 ) N - number of genes in the universe
//(column 8 ) Description - GO term name
//(column 9 ) Gene in test set - a list of genes in the subset of interest that are annotated to this term
// Column 8 is the geneset name
// Column 9 is the list of genes in this geneset -- therefore pre-filtered.
Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
//get the genes (which should also be empty
EnrichmentMap map = dataset.getMap();
Map<String, EnrichmentResult> results = dataset.getEnrichments().getEnrichments();
int currentProgress = 0;
int maxValue = lines.size();
boolean FDR = true;
taskMonitor.setStatusMessage("Parsing Generic Results file -" + maxValue + " rows");
//skip the first l9 which just has the field names (start i=1)
//check to see how many columns the data has
//go through each line until we find the header line
int k = 0;
String line = lines.get(k);
String[] tokens = line.split("\t");
for (; k < lines.size(); k++) {
line = lines.get(k);
tokens = line.split("\t");
int length = tokens.length;
if ((length == 9) && tokens[0].equalsIgnoreCase("GO-ID") && tokens[8].equalsIgnoreCase("Genes in test set")) {
break;
}
}
if (k == lines.size())
throw new IllegalThreadStateException("Bingo results file is missing data.");
for (int i = k + 1; i < lines.size(); i++) {
line = lines.get(i);
tokens = line.split("\t");
double pvalue = 1.0;
double FDRqvalue = 1.0;
GenericResult result;
int gs_size = 0;
double NES = 1.0;
//The 8th column of the file is the name of the geneset
final String name = tokens[7].toUpperCase().trim();
//the 8th column of the file is the description
final String description = tokens[7].toUpperCase();
//when there are two different species it is possible that the gene set could
//already exist in the set of genesets. if it does exist then add the genes
//in this set to the geneset
ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
if (genesets.containsKey(name))
builder = builder.addAll(genesets.get(name).getGenes());
String[] gene_tokens = tokens[8].split("\\|");
//All subsequent fields in the list are the geneset associated with this geneset.
for (int j = 0; j < gene_tokens.length; j++) {
String gene = gene_tokens[j].toUpperCase();
//if it is already in the hash then get its associated key and put it into the set of genes
if (map.containsGene(gene)) {
builder.add(map.getHashFromGene(gene));
} else if (!gene.isEmpty()) {
Integer hash = map.addGene(gene).get();
builder.add(hash);
}
}
//finished parsing that geneset
//add the current geneset to the hashmap of genesets
GeneSet gs = new GeneSet(name, description, builder.build());
genesets.put(name, gs);
//The 2nd column is the nominal p-value
if (tokens[1].equalsIgnoreCase("")) {
//do nothing
} else {
pvalue = Double.parseDouble(tokens[1]);
}
//the Count is the size of the geneset (restricted by the gene list)
if (tokens[3].equalsIgnoreCase("")) {
//do nothing
} else {
gs_size = Integer.parseInt(tokens[3]);
}
//Use the correct p-value - 3rd column
if (tokens[2].equalsIgnoreCase("")) {
//do nothing
} else {
FDRqvalue = Double.parseDouble(tokens[2]);
}
result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue);
// Calculate Percentage. This must be a value between 0..100.
int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
taskMonitor.setProgress(percentComplete);
currentProgress++;
//check to see if the gene set has already been entered in the results
//it is possible that one geneset will be in both phenotypes.
//if it is already exists then we want to make sure the one retained is the result with the
//lower p-value.
//ticket #149
GenericResult temp = (GenericResult) results.get(name);
if (temp == null)
results.put(name, result);
else {
if (result.getPvalue() < temp.getPvalue())
results.put(name, result);
}
}
if (FDR)
dataset.getMap().getParams().setFDR(FDR);
}
use of org.baderlab.csplugins.enrichmentmap.model.GeneSet in project EnrichmentMapApp by BaderLab.
the class ParseDavidEnrichmentResults method parseLines.
/**
* Parse david enrichment results file
*/
@Override
public void parseLines(List<String> lines, EMDataSet dataset, TaskMonitor taskMonitor) {
if (taskMonitor == null)
taskMonitor = new NullTaskMonitor();
taskMonitor.setTitle("Parsing David Enrichment Result file");
//with David results there are no genesets defined. first pass through the file
// needs to parse the genesets
//parameters that can be extracted from David files:
//Category Term Count % PValue Genes List Total Pop Hits Pop Total Fold Enrichment Bonferroni Benjamini FDR
// Count = number of genes in the geneset that came from the input list, number of genes in the genelist mapping toa specific term.
// List Total - number of genes in the gene list mapping to the category (ie. GO Cellular component)
// Pop Hits - number of genes in the background gene list mapping to a specific term
// Pop total - number of gene s in the background gene list mapping to the category (i.e. Go Cellular Component)
// Column 2 is the geneset name
// Column 1 is the category (and can be used for the description)
// Column 6 is the list of genes (from the loaded list) in this geneset -- therefore pre-filtered.
Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
EnrichmentMap map = dataset.getMap();
Map<String, EnrichmentResult> results = dataset.getEnrichments().getEnrichments();
int currentProgress = 0;
int maxValue = lines.size();
taskMonitor.setStatusMessage("Parsing Generic Results file - " + maxValue + " rows");
boolean FDR = true;
//skip the first line which just has the field names (start i=1)
//check to see how many columns the data has
String line = lines.get(0);
String[] tokens = line.split("\t");
int length = tokens.length;
if (length != 13)
throw new IllegalThreadStateException("David results file is missing data.");
for (int i = 1; i < lines.size(); i++) {
line = lines.get(i);
tokens = line.split("\t");
double pvalue = 1.0;
double FDRqvalue = 1.0;
GenericResult result;
int gs_size = 0;
double NES = 1.0;
//The second column of the file is the name of the geneset
final String name = tokens[1].toUpperCase().trim();
//the first column of the file is the description
final String description = tokens[0].toUpperCase();
//when there are two different species it is possible that the gene set could
//already exist in the set of genesets. if it does exist then add the genes
//in this set to the geneset
ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
if (genesets.containsKey(name))
builder = builder.addAll(genesets.get(name).getGenes());
String[] gene_tokens = tokens[5].split(", ");
//All subsequent fields in the list are the geneset associated with this geneset.
for (int j = 0; j < gene_tokens.length; j++) {
String gene = gene_tokens[j].toUpperCase();
//if it is already in the hash then get its associated key and put it into the set of genes
if (map.containsGene(gene)) {
builder.add(map.getHashFromGene(gene));
} else if (!gene.isEmpty()) {
Integer hash = map.addGene(gene).get();
builder.add(hash);
}
}
//finished parsing that geneset
//add the current geneset to the hashmap of genesets
GeneSet gs = new GeneSet(name, description, builder.build());
genesets.put(name, gs);
//The 5th column is the nominal p-value
if (tokens[4].equalsIgnoreCase("")) {
//do nothing
} else {
pvalue = Double.parseDouble(tokens[4]);
}
//the Count is the size of the geneset (restricted by the gene list)
if (tokens[2].equalsIgnoreCase("")) {
//do nothing
} else {
gs_size = Integer.parseInt(tokens[2]);
}
//Use the Benjamini value for the fdr
if (tokens[11].equalsIgnoreCase("")) {
//do nothing
} else {
FDRqvalue = Double.parseDouble(tokens[11]);
}
result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue);
// Calculate Percentage. This must be a value between 0..100.
int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
taskMonitor.setProgress(percentComplete);
currentProgress++;
//check to see if the gene set has already been entered in the results
//it is possible that one geneset will be in both phenotypes.
//if it is already exists then we want to make sure the one retained is the result with the
//lower p-value.
//ticket #149
GenericResult temp = (GenericResult) results.get(name);
if (temp == null)
results.put(name, result);
else {
if (result.getPvalue() < temp.getPvalue())
results.put(name, result);
}
}
if (FDR)
dataset.getMap().getParams().setFDR(FDR);
}
Aggregations