use of org.baderlab.csplugins.enrichmentmap.model.EMDataSet in project EnrichmentMapApp by BaderLab.
the class FilterGenesetsByDatasetGenes method filterGenesets.
/*
* Filter all the genesets by the dataset genes. If there are multiple sets
* of genesets make sure to filter by the specific dataset genes.
*/
public void filterGenesets(TaskMonitor taskMonitor) {
Map<String, EMDataSet> datasets = map.getDataSets();
for (String k : datasets.keySet()) {
taskMonitor.setStatusMessage("Filtering Data Set: " + k);
EMDataSet current_set = datasets.get(k);
//only filter the genesets if dataset genes are not null or empty
Set<Integer> datasetGenes = current_set.getDataSetGenes();
if (datasetGenes != null && !datasetGenes.isEmpty()) {
current_set.getGeneSetsOfInterest().filterGeneSets(datasetGenes);
} else {
System.out.println("Dataset Genes is empty, because expression and ranks not provided: " + current_set.getName());
}
}
// of the genes in the expression file are in the specified genesets.
if (!anyGenesLeftAfterFiltering(datasets.values()))
throw new IllegalThreadStateException("No genes in the expression file are found in the GMT file ");
// if there are multiple datasets check to see if they have the same set of genes
if (datasetsAreDistinct(datasets.values())) {
map.setDistinctExpressionSets(true);
}
}
use of org.baderlab.csplugins.enrichmentmap.model.EMDataSet in project EnrichmentMapApp by BaderLab.
the class FilterGenesetsByDatasetGenes method datasetsAreDistinct.
private static boolean datasetsAreDistinct(Collection<EMDataSet> datasets) {
Set<Set<Integer>> uniqueGeneSets = new HashSet<>();
for (EMDataSet dataset : datasets) {
Set<Integer> genes = dataset.getDataSetGenes();
uniqueGeneSets.add(genes);
if (uniqueGeneSets.size() > 1) {
return true;
}
}
return false;
}
use of org.baderlab.csplugins.enrichmentmap.model.EMDataSet in project EnrichmentMapApp by BaderLab.
the class InitializeGenesetsOfInterestTask method initializeSets.
/**
* filter the genesets, restricting them to only those passing the user
* specified thresholds.
*
* @return true if successful and false otherwise.
*/
public boolean initializeSets(TaskMonitor tm) {
if (tm == null)
tm = new NullTaskMonitor();
DiscreteTaskMonitor taskMonitor = new DiscreteTaskMonitor(tm, map.getDataSetCount());
//create subset of genesets that contains only the genesets of interest with pvalue and qbalue less than values specified by the user.
//Go through each Dataset populating the Gene set of interest in each dataset object
Map<String, EMDataSet> datasets = map.getDataSets();
// count how many experiments (DataSets) contain the geneset
Optional<Integer> minExperiments = map.getParams().getMinExperiments();
Map<String, Integer> occurrences = minExperiments.isPresent() ? new HashMap<>() : null;
for (String datasetName : datasets.keySet()) {
taskMonitor.inc();
EMDataSet dataset = datasets.get(datasetName);
// all these maps use the geneset name as key
Map<String, EnrichmentResult> enrichmentResults = dataset.getEnrichments().getEnrichments();
Map<String, GeneSet> genesets = dataset.getSetOfGeneSets().getGeneSets();
Map<String, GeneSet> genesetsOfInterest = dataset.getGeneSetsOfInterest().getGeneSets();
// If there are no genesets associated with this dataset then get the complete set assumption being that the gmt file applies to all datasets.
if (genesets == null || genesets.isEmpty()) {
genesets = map.getAllGeneSets();
}
//if there are no enrichment Results then do nothing
if (enrichmentResults == null || enrichmentResults.isEmpty()) {
return false;
}
//iterate through the GSEA Results to figure out which genesets we want to use
for (String genesetName : enrichmentResults.keySet()) {
EnrichmentResult result = enrichmentResults.get(genesetName);
// update rank at max for leading edge calculation
if (dataset.getMethod() == Method.GSEA) {
Ranking ranks = dataset.getExpressionSets().getRanksByName(datasetName);
updateRankAtMax((GSEAResult) result, ranks);
}
if (result.geneSetOfInterest(map.getParams())) {
GeneSet geneset = genesets.get(genesetName);
if (geneset != null) {
// while we are checking, update the size of the genesets based on post filtered data
result.setGsSize(geneset.getGenes().size());
if (occurrences != null) {
occurrences.merge(genesetName, 1, (v, d) -> v + 1);
}
genesetsOfInterest.put(genesetName, geneset);
} else if (throwIfMissing) {
throw new IllegalThreadStateException("The Geneset: " + genesetName + " is not found in the GMT file.");
}
}
}
}
// Remove gene-sets that don't pass the minimum occurrence cutoff
if (occurrences != null) {
for (EMDataSet dataset : datasets.values()) {
Map<String, GeneSet> genesetsOfInterest = dataset.getGeneSetsOfInterest().getGeneSets();
genesetsOfInterest.keySet().removeIf(geneset -> occurrences.getOrDefault(geneset, 0) < minExperiments.get());
}
}
return true;
}
use of org.baderlab.csplugins.enrichmentmap.model.EMDataSet in project EnrichmentMapApp by BaderLab.
the class CreateEMNetworkTask method createEdges.
/**
* Note, we expect that GenesetSimilarity object that don't pass the cutoff have already been filtered out.
* @param network
* @param nodes
*/
private void createEdges(CyNetwork network, Map<String, CyNode> nodes) {
Map<SimilarityKey, GenesetSimilarity> similarities = supplier.get();
for (SimilarityKey key : similarities.keySet()) {
GenesetSimilarity similarity = similarities.get(key);
CyNode node1 = nodes.get(similarity.getGeneset1Name());
CyNode node2 = nodes.get(similarity.getGeneset2Name());
CyEdge edge = network.addEdge(node1, node2, false);
String datasetName = key.getName();
if (datasetName != null) {
EMDataSet dataset = map.getDataSet(datasetName);
if (dataset != null) {
dataset.addEdgeSuid(edge.getSUID());
}
}
List<String> overlapGenes = similarity.getOverlappingGenes().stream().map(map::getGeneFromHashKey).collect(Collectors.toList());
String edgeName = key.toString();
CyRow row = network.getRow(edge);
row.set(CyNetwork.NAME, edgeName);
row.set(CyEdge.INTERACTION, similarity.getInteractionType());
Columns.EDGE_SIMILARITY_COEFF.set(row, prefix, null, similarity.getSimilarityCoeffecient());
Columns.EDGE_OVERLAP_SIZE.set(row, prefix, null, similarity.getSizeOfOverlap());
Columns.EDGE_OVERLAP_GENES.set(row, prefix, null, overlapGenes);
if (key.isCompound()) {
Columns.EDGE_DATASET.set(row, prefix, null, Columns.EDGE_DATASET_VALUE_COMPOUND);
} else {
Columns.EDGE_DATASET.set(row, prefix, null, similarity.getDataSetName());
}
}
}
use of org.baderlab.csplugins.enrichmentmap.model.EMDataSet in project EnrichmentMapApp by BaderLab.
the class CreateEnrichmentMapTaskFactory method createTaskIterator.
@Override
public TaskIterator createTaskIterator() {
TaskIterator tasks = new TaskIterator();
if (dataSets.isEmpty())
return tasks;
tasks.append(new TitleTask("Building EnrichmentMap"));
EnrichmentMap map = new EnrichmentMap(params, serviceRegistrar);
for (DataSetParameters dataSetParameters : dataSets) {
String datasetName = dataSetParameters.getName();
Method method = dataSetParameters.getMethod();
DataSetFiles files = dataSetParameters.getFiles();
EMDataSet dataset = map.createDataSet(datasetName, method, files);
// Load GMT File
if (!Strings.isNullOrEmpty(dataset.getSetOfGeneSets().getFilename())) {
tasks.append(new GMTFileReaderTask(dataset));
}
// Load the enrichments
tasks.append(new DetermineEnrichmentResultFileReader(dataset).getParsers());
// If there is no expression file then create a dummy file to associate with this dataset so we can still use the expression viewer (heat map)
if (Strings.isNullOrEmpty(dataset.getDataSetFiles().getExpressionFileName())) {
tasks.append(new CreateDummyExpressionTask(dataset));
} else {
tasks.append(new ExpressionFileReaderTask(dataset));
}
// Load ranks if present
String ranksName = dataset.getMethod() == Method.GSEA ? Ranking.GSEARanking : datasetName;
if (dataset.getExpressionSets().getRanksByName(ranksName) != null) {
String filename = dataset.getExpressionSets().getRanksByName(ranksName).getFilename();
tasks.append(new RanksFileReaderTask(filename, dataset, ranksName, false));
}
if (!Strings.isNullOrEmpty(dataset.getDataSetFiles().getClassFile())) {
tasks.append(new ClassFileReaderTask(dataset));
}
}
// NOTE: First filter out genesets that don't pass the thresholds,
// Then filter the remaining genesets of interest to only contain genes from the expression file.
// Filter out genesets that don't pass the p-value and q-value thresholds
InitializeGenesetsOfInterestTask genesetsTask = new InitializeGenesetsOfInterestTask(map);
// genesetsTask.setThrowIfMissing(false); // TEMPORARY
tasks.append(genesetsTask);
// Trim the genesets to only contain the genes that are in the data file.
tasks.append(new FilterGenesetsByDatasetGenes(map));
// Link the ComputeSimilarityTask to the MasterMapNetworkTask by a "pipe"
Baton<Map<SimilarityKey, GenesetSimilarity>> pipe = new Baton<>();
// Compute the geneset similarities
tasks.append(new ComputeSimilarityTaskParallel(map, pipe.consumer()));
// Create the network
tasks.append(createEMNetworkTaskFactory.create(map, pipe.supplier()));
// Create style and layout
if (!headless) {
tasks.append(createEMViewTaskFactory.create(map));
}
return tasks;
}
Aggregations