use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.
the class CreateEnrichmentMapTaskFactory method createTaskIterator.
@Override
public TaskIterator createTaskIterator() {
TaskIterator tasks = new TaskIterator();
if (dataSets.isEmpty())
return tasks;
tasks.append(new TitleTask("Building EnrichmentMap"));
EnrichmentMap map = new EnrichmentMap(params, serviceRegistrar);
for (DataSetParameters dataSetParameters : dataSets) {
String datasetName = dataSetParameters.getName();
Method method = dataSetParameters.getMethod();
DataSetFiles files = dataSetParameters.getFiles();
EMDataSet dataset = map.createDataSet(datasetName, method, files);
// Load GMT File
if (!Strings.isNullOrEmpty(dataset.getSetOfGeneSets().getFilename())) {
tasks.append(new GMTFileReaderTask(dataset));
}
// Load the enrichments
tasks.append(new DetermineEnrichmentResultFileReader(dataset).getParsers());
// If there is no expression file then create a dummy file to associate with this dataset so we can still use the expression viewer (heat map)
if (Strings.isNullOrEmpty(dataset.getDataSetFiles().getExpressionFileName())) {
tasks.append(new CreateDummyExpressionTask(dataset));
} else {
tasks.append(new ExpressionFileReaderTask(dataset));
}
// Load ranks if present
String ranksName = dataset.getMethod() == Method.GSEA ? Ranking.GSEARanking : datasetName;
if (dataset.getExpressionSets().getRanksByName(ranksName) != null) {
String filename = dataset.getExpressionSets().getRanksByName(ranksName).getFilename();
tasks.append(new RanksFileReaderTask(filename, dataset, ranksName, false));
}
if (!Strings.isNullOrEmpty(dataset.getDataSetFiles().getClassFile())) {
tasks.append(new ClassFileReaderTask(dataset));
}
}
// NOTE: First filter out genesets that don't pass the thresholds,
// Then filter the remaining genesets of interest to only contain genes from the expression file.
// Filter out genesets that don't pass the p-value and q-value thresholds
InitializeGenesetsOfInterestTask genesetsTask = new InitializeGenesetsOfInterestTask(map);
// genesetsTask.setThrowIfMissing(false); // TEMPORARY
tasks.append(genesetsTask);
// Trim the genesets to only contain the genes that are in the data file.
tasks.append(new FilterGenesetsByDatasetGenes(map));
// Link the ComputeSimilarityTask to the MasterMapNetworkTask by a "pipe"
Baton<Map<SimilarityKey, GenesetSimilarity>> pipe = new Baton<>();
// Compute the geneset similarities
tasks.append(new ComputeSimilarityTaskParallel(map, pipe.consumer()));
// Create the network
tasks.append(createEMNetworkTaskFactory.create(map, pipe.supplier()));
// Create style and layout
if (!headless) {
tasks.append(createEMViewTaskFactory.create(map));
}
return tasks;
}
use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.
the class PostAnalysisTaskTest method test_4_WidthFunction.
@Test
public void test_4_WidthFunction(@Continuous VisualMappingFunctionFactory cmFactory, EnrichmentMapManager emManager, Provider<WidthFunction> widthFunctionProvider) {
CyNetworkManager networkManager = mock(CyNetworkManager.class);
when(networkManager.getNetworkSet()).thenReturn(Collections.singleton(emNetwork));
mockContinuousMappingFactory(cmFactory);
EdgeSimilarities edges = TestUtils.getEdgeSimilarities(emNetwork);
CyEdge sigEdge1 = edges.getEdge("PA_TOP8_MIDDLE8_BOTTOM8 (sig_Dataset 1) TOP8_PLUS100");
CyEdge sigEdge2 = edges.getEdge("PA_TOP8_MIDDLE8_BOTTOM8 (sig_Dataset 1) TOP1_PLUS100");
EnrichmentMap map = emManager.getEnrichmentMap(emNetwork.getSUID());
assertNotNull(map);
WidthFunction widthFunction = widthFunctionProvider.get();
widthFunction.setEdgeWidths(emNetwork, "EM1_", null);
String widthCol = Columns.EDGE_WIDTH_FORMULA_COLUMN.with("EM1_", null);
double sigWidth1 = emNetwork.getRow(sigEdge1).get(widthCol, Double.class);
assertEquals(8.0, sigWidth1, 0.0);
double sigWidth2 = emNetwork.getRow(sigEdge2).get(widthCol, Double.class);
assertEquals(1.0, sigWidth2, 0.0);
}
use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.
the class LoadGMTFileOnlyTest method testGMTOnly.
@Test
public void testGMTOnly() throws Exception {
//for a dataset we require genesets, an expression file (optional), enrichment results
String testGmtFileName = "src/test/resources/org/baderlab/csplugins/enrichmentmap/task/genesets_subset.gmt";
DataSetFiles files = new DataSetFiles();
files.setGMTFileName(testGmtFileName);
//create an new enrichment Map
double similarityCutoff = 0.5;
double pvalue = 1.0;
double qvalue = 1.0;
EMCreationParameters params = new EMCreationParameters("EM1_", pvalue, qvalue, NESFilter.ALL, Optional.empty(), SimilarityMetric.JACCARD, similarityCutoff, 0.5);
EnrichmentMap em = new EnrichmentMap(params, serviceRegistrar);
//Load data set
//create a dataset
EMDataSet dataset = em.createDataSet(LegacySupport.DATASET1, Method.GSEA, files);
//create a DatasetTask
//set up task
GMTFileReaderTask task = new GMTFileReaderTask(dataset);
task.run(taskMonitor);
dataset.setGeneSetsOfInterest(dataset.getSetOfGeneSets());
//create dummy expression
CreateDummyExpressionTask dummyExpressionTask = new CreateDummyExpressionTask(dataset);
dummyExpressionTask.run(taskMonitor);
em.filterGenesets();
InitializeGenesetsOfInterestTask genesets_init = new InitializeGenesetsOfInterestTask(em);
genesets_init.run(taskMonitor);
Baton<Map<SimilarityKey, GenesetSimilarity>> baton = new Baton<>();
ComputeSimilarityTaskParallel similarities = new ComputeSimilarityTaskParallel(em, baton.consumer());
similarities.run(taskMonitor);
//check to see if the dataset loaded - there should be 36 genesets
assertEquals(36, dataset.getSetOfGeneSets().getGeneSets().size());
//there should be (36 * 35)/2 edges (geneset similarities)
assertEquals(18, baton.supplier().get().size());
//there should be 523 genes
assertEquals(523, em.getNumberOfGenes());
assertEquals(523, dataset.getExpressionSets().getNumGenes());
assertEquals(523, dataset.getDataSetGenes().size());
assertEquals(3, dataset.getExpressionSets().getNumConditions());
}
use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.
the class LegacySessionLoader method loadSession.
/**
* Restore Enrichment maps
*
* @param pStateFileList - list of files associated with thie session
*/
@SuppressWarnings("unchecked")
public void loadSession(CySession session) {
Map<Long, EnrichmentMapParameters> paramsMap = new HashMap<>();
Map<Long, EnrichmentMap> enrichmentMapMap = new HashMap<>();
List<File> fileList = session.getAppFileListMap().get(CyActivator.APP_NAME);
try {
//go through the prop files first to create the correct objects to be able to add other files to.
for (File prop_file : fileList) {
if (prop_file.getName().contains(".props")) {
InputStream reader = streamUtil.getInputStream(prop_file.getAbsolutePath());
String fullText = new Scanner(reader, "UTF-8").useDelimiter("\\A").next();
//Given the file with all the parameters create a new parameter
EnrichmentMapParameters params = enrichmentMapParametersFactory.create(fullText);
EnrichmentMap em = new EnrichmentMap(params.getCreationParameters(), serviceRegistrar);
//get the network name
String param_name = em.getName();
//TODO:distinguish between GSEA and EM saved sessions
String props_name = (prop_file.getName().split("\\."))[0];
String networkName = param_name;
//related to bug ticket #49
if (!props_name.equalsIgnoreCase(param_name))
networkName = props_name;
//after associated the properties with the network
//initialized each Dataset that we have files for
HashMap<String, DataSetFiles> files = params.getFiles();
for (Iterator<String> j = params.getFiles().keySet().iterator(); j.hasNext(); ) {
String current_dataset = j.next();
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
em.createDataSet(current_dataset, method, files.get(current_dataset));
}
CyNetwork network = getNetworkByName(networkName);
Long suid = network.getSUID();
em.setNetworkID(suid);
paramsMap.put(suid, params);
enrichmentMapMap.put(suid, em);
}
}
// go through the rest of the files
for (File propFile : fileList) {
FileNameParts parts = ParseFileName(propFile);
if (parts == null || propFile.getName().contains(".props"))
continue;
CyNetwork net = getNetworkByName(parts.name);
EnrichmentMap em = net == null ? null : enrichmentMapMap.get(net.getSUID());
EnrichmentMapParameters params = paramsMap.get(net.getSUID());
Method method = EnrichmentMapParameters.stringToMethod(params.getMethod());
if (em == null) {
System.out.println("network for file" + propFile.getName() + " does not exist.");
} else if ((!propFile.getName().contains(".props")) && (!propFile.getName().contains(".expression1.txt")) && (!propFile.getName().contains(".expression2.txt"))) {
HashMap<String, String> props = params.getProps();
//if this a dataset specific file make sure there is a dataset object for it
if (!(parts.dataset == null) && em.getDataSet(parts.dataset) == null && !parts.dataset.equalsIgnoreCase("signature"))
em.createDataSet(parts.dataset, method, params.getFiles().get(parts.dataset));
if (parts.type == null)
System.out.println("Sorry, unable to determine the type of the file: " + propFile.getName());
//read the file
InputStream reader = streamUtil.getInputStream(propFile.getAbsolutePath());
String fullText = new Scanner(reader, "UTF-8").useDelimiter("\\A").next();
//if the file is empty then skip it
if (fullText == null || fullText.equalsIgnoreCase(""))
continue;
if (propFile.getName().contains(".gmt")) {
HashMap<String, GeneSet> gsMap = (HashMap<String, GeneSet>) params.repopulateHashmap(fullText, 1);
if (propFile.getName().contains(".signature.gmt")) {
// TODO Find a better way to serialize EMSignatureDataSet
String sdsName = propFile.getName().replace(".signature.gmt", "");
sdsName = NamingUtil.getUniqueName(sdsName, em.getSignatureDataSets().keySet());
EMSignatureDataSet sigDataSet = new EMSignatureDataSet(sdsName);
em.addSignatureDataSet(sigDataSet);
SetOfGeneSets sigGeneSets = sigDataSet.getGeneSetsOfInterest();
gsMap.forEach((k, v) -> sigGeneSets.addGeneSet(k, v));
} else if (propFile.getName().contains(".set2.gmt")) {
// account for legacy session files
if (em.getAllGeneSets().containsKey(LegacySupport.DATASET2)) {
SetOfGeneSets gs = new SetOfGeneSets(LegacySupport.DATASET2, props);
gs.setGeneSets(gsMap);
}
} else {
SetOfGeneSets gs = new SetOfGeneSets(parts.dataset, props);
gs.setGeneSets(gsMap);
em.getDataSets().get(parts.dataset).setSetOfGeneSets(gs);
}
}
if (propFile.getName().contains(".genes.txt")) {
HashMap<String, Integer> genes = params.repopulateHashmap(fullText, 2);
genes.forEach(em::addGene);
//ticket #188 - unable to open session files that have empty enrichment maps.
if (genes != null && !genes.isEmpty())
// Ticket #107 : restore also gene count (needed to determine the next free hash in case we do PostAnalysis with a restored session)
em.setNumberOfGenes(Math.max(em.getNumberOfGenes(), Collections.max(genes.values()) + 1));
}
if (propFile.getName().contains(".hashkey2genes.txt")) {
HashMap<Integer, String> hashkey2gene = params.repopulateHashmap(fullText, 5);
//ticket #188 - unable to open session files that have empty enrichment maps.
if (hashkey2gene != null && !hashkey2gene.isEmpty())
// Ticket #107 : restore also gene count (needed to determine the next free hash in case we do PostAnalysis with a restored session)
em.setNumberOfGenes(Math.max(em.getNumberOfGenes(), Collections.max(hashkey2gene.keySet()) + 1));
}
if ((parts.type != null && (parts.type.equalsIgnoreCase("ENR") || (parts.type.equalsIgnoreCase("SubENR")))) || propFile.getName().contains(".ENR1.txt") || propFile.getName().contains(".SubENR1.txt")) {
SetOfEnrichmentResults enrichments;
int temp = 1;
//check to see if this dataset has enrichment results already
if (parts.dataset != null && em.getDataSet(parts.dataset).getEnrichments() != null) {
enrichments = em.getDataSet(parts.dataset).getEnrichments();
} else if (parts.dataset == null) {
enrichments = em.getDataSet(LegacySupport.DATASET1).getEnrichments();
/*enrichments = new SetOfEnrichmentResults(EnrichmentMap.DATASET1,props);
em.getDataset(EnrichmentMap.DATASET1).setEnrichments(enrichments);*/
} else {
enrichments = new SetOfEnrichmentResults(parts.dataset, props);
em.getDataSet(parts.dataset).setEnrichments(enrichments);
}
if (parts.type.equalsIgnoreCase("ENR") || propFile.getName().contains(".ENR1.txt")) {
if (params.getMethod().equalsIgnoreCase(EnrichmentMapParameters.method_GSEA))
enrichments.setEnrichments(params.repopulateHashmap(fullText, 3));
else
enrichments.setEnrichments(params.repopulateHashmap(fullText, 4));
}
}
//it would only happen for sessions saved with version 0.8
if (propFile.getName().contains(".RANKS1.txt") || propFile.getName().contains(".RANKS1Genes.txt")) {
Ranking new_ranking;
//Check to see if there is already GSEARanking
if (em.getDataSet(LegacySupport.DATASET1).getExpressionSets().getAllRanksNames().contains(Ranking.GSEARanking)) {
new_ranking = em.getDataSet(LegacySupport.DATASET1).getExpressionSets().getRanksByName(Ranking.GSEARanking);
} else {
new_ranking = new Ranking();
em.getDataSet(LegacySupport.DATASET1).getExpressionSets().addRanks(Ranking.GSEARanking, new_ranking);
}
if (propFile.getName().contains(".RANKS1.txt")) {
Map<Integer, Rank> ranks = (Map<Integer, Rank>) params.repopulateHashmap(fullText, 7);
ranks.forEach(new_ranking::addRank);
}
// if(prop_file.getName().contains(".RANKS1Genes.txt"))
// new_ranking.setRank2gene(em.getParams().repopulateHashmap(fullText,7));
// if(prop_file.getName().contains(".RANKS1.txt"))
// new_ranking.setRanking(em.getParams().repopulateHashmap(fullText,6));
}
if (propFile.getName().contains(".RANKS.txt")) {
if (parts.ranks_name == null) {
//we need to get the name of this set of rankings
// network_name.ranking_name.ranks.txt --> split by "." and get 2
String[] file_name_tokens = (propFile.getName()).split("\\.");
if ((file_name_tokens.length == 4) && (file_name_tokens[1].equals("Dataset 1 Ranking") || file_name_tokens[1].equals("Dataset 2 Ranking")) || (propFile.getName().contains(Ranking.GSEARanking)))
parts.ranks_name = Ranking.GSEARanking;
else //this is an extra rank file for backwards compatability. Ignore it.
if ((file_name_tokens.length == 4) && (file_name_tokens[1].equals("Dataset 1") || file_name_tokens[1].equals("Dataset 2")) && file_name_tokens[2].equals("RANKS"))
continue;
else
//file name is not structured properly --> default to file name
parts.ranks_name = propFile.getName();
}
Ranking new_ranking = new Ranking();
Map<Integer, Rank> ranks = (Map<Integer, Rank>) params.repopulateHashmap(fullText, 6);
ranks.forEach(new_ranking::addRank);
if (parts.dataset != null)
em.getDataSet(parts.dataset).getExpressionSets().addRanks(parts.ranks_name, new_ranking);
else
em.getDataSet(LegacySupport.DATASET1).getExpressionSets().addRanks(parts.ranks_name, new_ranking);
}
//Deal with legacy issues
if (params.isTwoDatasets()) {
//make sure there is a Dataset2
if (!em.getDataSets().containsKey(LegacySupport.DATASET2))
em.createDataSet(LegacySupport.DATASET2, method, new DataSetFiles());
if (propFile.getName().contains(".ENR2.txt") || propFile.getName().contains(".SubENR2.txt")) {
SetOfEnrichmentResults enrichments;
//check to see if this dataset has enrichment results already
if (em.getDataSet(LegacySupport.DATASET2).getEnrichments() != null) {
enrichments = em.getDataSet(LegacySupport.DATASET2).getEnrichments();
} else {
enrichments = new SetOfEnrichmentResults(LegacySupport.DATASET2, props);
em.getDataSet(LegacySupport.DATASET2).setEnrichments(enrichments);
}
if (propFile.getName().contains(".ENR2.txt")) {
if (params.getMethod().equalsIgnoreCase(EnrichmentMapParameters.method_GSEA))
enrichments.setEnrichments(params.repopulateHashmap(fullText, 3));
else
enrichments.setEnrichments(params.repopulateHashmap(fullText, 4));
}
}
//it would only happen for sessions saved with version 0.8
if (propFile.getName().contains(".RANKS2.txt") || propFile.getName().contains(".RANKS2Genes.txt")) {
Ranking new_ranking;
// Check to see if there is already GSEARanking
if (em.getDataSet(LegacySupport.DATASET2).getExpressionSets().getAllRanksNames().contains(Ranking.GSEARanking)) {
new_ranking = em.getDataSet(LegacySupport.DATASET2).getExpressionSets().getRanksByName(Ranking.GSEARanking);
} else {
new_ranking = new Ranking();
em.getDataSet(LegacySupport.DATASET2).getExpressionSets().addRanks(Ranking.GSEARanking, new_ranking);
}
if (propFile.getName().contains(".RANKS2.txt")) {
Map<Integer, Rank> ranks = (Map<Integer, Rank>) params.repopulateHashmap(fullText, 6);
ranks.forEach(new_ranking::addRank);
}
}
}
}
}
//info from the parameters
for (int i = 0; i < fileList.size(); i++) {
File prop_file = fileList.get(i);
FileNameParts parts_exp = ParseFileName(prop_file);
//unrecognized file
if ((parts_exp == null) || (parts_exp.name == null))
continue;
CyNetwork net = getNetworkByName(parts_exp.name);
EnrichmentMap map = net == null ? null : enrichmentMapMap.get(net.getSUID());
EnrichmentMapParameters params = paramsMap.get(net.getSUID());
Map<String, String> props = params.getProps();
if (parts_exp.type != null && parts_exp.type.equalsIgnoreCase("expression")) {
if (map.getDataSets().containsKey(parts_exp.dataset)) {
EMDataSet ds = map.getDataSet(parts_exp.dataset);
ds.getDataSetFiles().setExpressionFileName(prop_file.getAbsolutePath());
ds.getExpressionSets().setFilename(prop_file.getAbsolutePath());
ExpressionFileReaderTask expressionFile1 = new ExpressionFileReaderTask(ds);
GeneExpressionMatrix matrix = expressionFile1.parse();
matrix.restoreProps(parts_exp.dataset, props);
}
}
//Deal with legacy session files.
if (prop_file.getName().contains("expression1.txt")) {
EMDataSet ds1 = map.getDataSet(LegacySupport.DATASET1);
ds1.getDataSetFiles().setExpressionFileName(prop_file.getAbsolutePath());
ds1.getExpressionSets().setFilename(prop_file.getAbsolutePath());
ExpressionFileReaderTask expressionFile1 = new ExpressionFileReaderTask(ds1);
expressionFile1.parse();
}
if (prop_file.getName().contains("expression2.txt")) {
EMDataSet ds2 = map.getDataSet(LegacySupport.DATASET2);
ds2.getDataSetFiles().setExpressionFileName(prop_file.getAbsolutePath());
ds2.getExpressionSets().setFilename(prop_file.getAbsolutePath());
ExpressionFileReaderTask expressionFile2 = new ExpressionFileReaderTask(ds2);
expressionFile2.parse();
//are dealing with two distinct expression files.
if (map.getDataSet(LegacySupport.DATASET2) != null && map.getDataSet(LegacySupport.DATASET2).getGeneSetsOfInterest() != null && !map.getDataSet(LegacySupport.DATASET2).getGeneSetsOfInterest().getGeneSets().isEmpty()) {
map.setDistinctExpressionSets(true);
map.getDataSet(LegacySupport.DATASET1).setDataSetGenes(new HashSet<Integer>((Set<Integer>) map.getDataSet(LegacySupport.DATASET1).getExpressionSets().getGeneIds()));
map.getDataSet(LegacySupport.DATASET2).setDataSetGenes(new HashSet<Integer>((Set<Integer>) map.getDataSet(LegacySupport.DATASET2).getExpressionSets().getGeneIds()));
}
}
}
//iterate over the networks
for (Iterator<Long> j = enrichmentMapMap.keySet().iterator(); j.hasNext(); ) {
Long id = j.next();
EnrichmentMap map = enrichmentMapMap.get(id);
//only initialize objects if there is a map for this network
if (map != null) {
if (map.getDataSets().size() > 1) {
Set<Integer> dataset1_genes = map.getDataSets().get(LegacySupport.DATASET1).getDataSetGenes();
Set<Integer> dataset2_genes = map.getDataSets().get(LegacySupport.DATASET2).getDataSetGenes();
if (!dataset1_genes.equals(dataset2_genes))
map.setDistinctExpressionSets(true);
}
//initialize the Genesets (makes sure the leading edge is set correctly)
//Initialize the set of genesets and GSEA results that we want to compute over
InitializeGenesetsOfInterestTask genesets_init = new InitializeGenesetsOfInterestTask(map);
// MKTODO really?
genesets_init.setThrowIfMissing(false);
genesets_init.initializeSets(null);
// //for each map compute the similarity matrix, (easier than storing it) compute the geneset similarities
// ComputeSimilarityTask similarities = new ComputeSimilarityTask(map, ComputeSimilarityTask.ENRICHMENT);
// Map<String, GenesetSimilarity> similarity_results = similarities.computeGenesetSimilarities(null);
// map.setGenesetSimilarity(similarity_results);
//
// // also compute geneset similarities between Enrichment- and Signature Genesets (if any)
// if (! map.getSignatureGenesets().isEmpty()){
// ComputeSimilarityTask sigSimilarities = new ComputeSimilarityTask(map, ComputeSimilarityTask.SIGNATURE);
// Map<String, GenesetSimilarity> sig_similarity_results = sigSimilarities.computeGenesetSimilarities(null);
// map.getGenesetSimilarity().putAll(sig_similarity_results);
// }
}
//end of if(map != null)
}
for (Iterator<Long> j = enrichmentMapMap.keySet().iterator(); j.hasNext(); ) {
Long id = j.next();
CyNetwork currentNetwork = cyNetworkManager.getNetwork(id);
EnrichmentMap map = enrichmentMapMap.get(id);
map.setLegacy(true);
emManager.registerEnrichmentMap(map);
if (!j.hasNext()) {
//set the last network to be the one viewed and initialize the parameters panel
cyApplicationManager.setCurrentNetwork(currentNetwork);
}
}
} catch (Exception ee) {
ee.printStackTrace();
}
}
use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.
the class ParseGREATEnrichmentResults method parseLines.
@Override
public void parseLines(List<String> lines, EMDataSet dataset, TaskMonitor taskMonitor) {
if (taskMonitor == null)
taskMonitor = new NullTaskMonitor();
taskMonitor.setTitle("Parsing Enrichment Result file");
boolean hasBackground = false;
EMCreationParameters params = dataset.getMap().getParams();
//Get the type of filter user specified on the GREAT results
//If it is hyper use column 14 Hypergeometric p-value and 16 FDR for hyper
//If it is binom use column 5 bionomial p-value and 7 FDR for binom
//If they specify both use the highest p-value and q-value from the above columns
GreatFilter filterType = dataset.getMap().getParams().getGreatFilter();
Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
EnrichmentMap map = dataset.getMap();
Map<String, EnrichmentResult> results = dataset.getEnrichments().getEnrichments();
int currentProgress = 0;
int maxValue = lines.size();
taskMonitor.setStatusMessage("Parsing Great Results file - " + maxValue + " rows");
//for great files there is an FDR
dataset.getMap().getParams().setFDR(true);
//skip the first l9 which just has the field names (start i=1)
//check to see how many columns the data has
//go through each line until we find the header line
int k = 0;
String line = lines.get(k);
String[] tokens = line.split("\t");
for (; k < lines.size(); k++) {
line = lines.get(k);
tokens = line.split("\t");
int length = tokens.length;
if ((length == 24) && tokens[3].equalsIgnoreCase("BinomRank")) {
break;
} else //There is not binom rank and no binomial data.
if ((length == 20) && tokens[3].equalsIgnoreCase("Rank")) {
hasBackground = true;
break;
}
}
//go through the rest of the lines
for (int i = k + 1; i < lines.size(); i++) {
line = lines.get(i);
tokens = line.split("\t");
//there are extra lines at the end of the file that should be ignored.
if (!hasBackground && tokens.length != 24)
continue;
if (hasBackground && tokens.length != 20)
continue;
double pvalue = 1.0;
double FDRqvalue = 1.0;
GenericResult result;
int gs_size = 0;
double NES = 1.0;
//details of export file
//http://bejerano.stanford.edu/help/display/GREAT/Export
//The second column of the file is the name of the geneset
final String name = tokens[1].trim() + "-" + tokens[2].trim();
//the first column of the file is the description
final String description = tokens[2].trim();
//when there are two different species it is possible that the gene set could
//already exist in the set of genesets. if it does exist then add the genes
//in this set to the geneset
ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
if (genesets.containsKey(name))
builder = builder.addAll(genesets.get(name).getGenes());
String[] gene_tokens;
if (!hasBackground)
gene_tokens = tokens[23].split(",");
else
gene_tokens = tokens[18].split(",");
//All subsequent fields in the list are the geneset associated with this geneset.
for (int j = 0; j < gene_tokens.length; j++) {
String gene = gene_tokens[j].toUpperCase();
//if it is already in the hash then get its associated key and put it into the set of genes
if (map.containsGene(gene)) {
builder.add(map.getHashFromGene(gene));
} else if (!gene.isEmpty()) {
Integer hash = map.addGene(gene).get();
builder.add(hash);
}
}
//finished parsing that geneset
//add the current geneset to the hashmap of genesets
GeneSet gs = new GeneSet(name, description, builder.build());
genesets.put(name, gs);
//There are two tests run by GREAT, the binomial on regions and the hypergeometric based on genes
//The first pass of results shows only those that are significant both
//The user can then choose to use either or both together
//
//If it is hyper use column 14 Hypergeometric p-value and 16 FDR for hyper
//If it is binom use column 5 bionomial p-value and 7 FDR for binom
//If they specify both use the highest p-value and q-value from the above columns
double hyper_pvalue = 1;
double hyper_fdr = 1;
double binom_pvalue = 1;
double binom_fdr = 1;
if (!hasBackground) {
if (!tokens[4].equalsIgnoreCase(""))
binom_pvalue = Double.parseDouble(tokens[4]);
if (!tokens[6].equalsIgnoreCase(""))
binom_fdr = Double.parseDouble(tokens[6]);
if (!tokens[13].equalsIgnoreCase(""))
hyper_pvalue = Double.parseDouble(tokens[13]);
if (!tokens[15].equalsIgnoreCase(""))
hyper_fdr = Double.parseDouble(tokens[15]);
} else {
if (!tokens[4].equalsIgnoreCase(""))
hyper_pvalue = Double.parseDouble(tokens[4]);
if (!tokens[6].equalsIgnoreCase(""))
hyper_fdr = Double.parseDouble(tokens[6]);
}
if (filterType == GreatFilter.HYPER) {
pvalue = hyper_pvalue;
FDRqvalue = hyper_fdr;
} else if (filterType == GreatFilter.BINOM) {
pvalue = binom_pvalue;
FDRqvalue = binom_fdr;
} else if (filterType == GreatFilter.BOTH) {
pvalue = Math.max(hyper_pvalue, binom_pvalue);
FDRqvalue = Math.max(hyper_fdr, binom_fdr);
} else if (filterType == GreatFilter.EITHER) {
pvalue = Math.min(hyper_pvalue, binom_pvalue);
FDRqvalue = Math.min(hyper_fdr, binom_fdr);
} else {
System.out.println("Invalid attribute setting for GREAT p-value specification");
}
//Keep track of minimum p-value to better calculate jslider
if (pvalue < params.getPvalueMin())
params.setPvalueMin(pvalue);
if (FDRqvalue < params.getQvalueMin())
params.setQvalueMin(FDRqvalue);
//If this is a background set then it is in the 16th column
if ((!hasBackground) && (!tokens[19].equalsIgnoreCase("")))
gs_size = Integer.parseInt(tokens[19]);
else if ((hasBackground) && (!tokens[15].equalsIgnoreCase("")))
gs_size = Integer.parseInt(tokens[15]);
result = new GenericResult(name, description, pvalue, gs_size, FDRqvalue);
// Calculate Percentage. This must be a value between 0..100.
int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
taskMonitor.setProgress(percentComplete);
currentProgress++;
//check to see if the gene set has already been entered in the results
//it is possible that one geneset will be in both phenotypes.
//if it is already exists then we want to make sure the one retained is the result with the
//lower p-value.
//ticket #149
GenericResult temp = (GenericResult) results.get(name);
if (temp == null)
results.put(name, result);
else {
if (result.getPvalue() < temp.getPvalue())
results.put(name, result);
}
}
}
Aggregations