use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.
the class ExpressionFileReaderTask method parse.
/**
* Parse expression/rank file
*/
public GeneExpressionMatrix parse(TaskMonitor taskMonitor) throws IOException {
if (taskMonitor == null)
taskMonitor = new NullTaskMonitor();
//Need to check if the file specified as an expression file is actually a rank file
//If it is a rank file it can either be 5 or 2 columns but it is important that the rank
//value is extracted from the right column and placed in the expression matrix as if it
//was an expression value in order for other features to work.
//Also a problem with old session files that imported a rank file so it also
//important to check if the file only has two columns. If it only has two columns,
//check to see if the second column is a double. If it is then consider that column
//expression
boolean twoColumns = false;
Set<Integer> datasetGenes = dataset.getDataSetGenes();
// Map<Integer,String> genes = dataset.getMap().getGenes();
EnrichmentMap map = dataset.getMap();
String expressionFileName = dataset.getExpressionSets().getFilename();
List<String> lines = DatasetLineParser.readLines(expressionFileName);
int currentProgress = 0;
int maxValue = lines.size();
int expressionUniverse = 0;
taskMonitor.setStatusMessage("Parsing GCT file - " + maxValue + " rows");
GeneExpressionMatrix expressionMatrix = dataset.getExpressionSets();
//GeneExpressionMatrix expressionMatrix = new GeneExpressionMatrix(lines[0].split("\t"));
//HashMap<Integer,GeneExpression> expression = new HashMap<Integer, GeneExpression>();
Map<Integer, GeneExpression> expression = expressionMatrix.getExpressionMatrix();
for (int i = 0; i < lines.size(); i++) {
String line = lines.get(i);
String[] tokens = line.split("\t");
//The first column of the file is the name of the geneset
String Name = tokens[0].toUpperCase().trim();
//the first time we have given them default headings
if (i == 0 && (expressionMatrix == null || expressionMatrix.getExpressionMatrix().isEmpty()) && expressionMatrix.getColumnNames() == null) {
//otherwise the first line is the header
if (Name.equalsIgnoreCase("#1.2")) {
line = lines.get(2);
i = 2;
} else {
line = lines.get(0);
//ignore all comment lines
int k = 0;
while (line.startsWith("#")) {
k++;
line = lines.get(k);
}
i = k;
}
tokens = line.split("\t");
//check to see if the second column contains expression values.
if (tokens.length == 2) {
twoColumns = true;
//if we are loading a GSEA edb rnk file then their might not be column names
try {
int temp = Integer.parseInt(tokens[1]);
i = -1;
tokens[0] = "Name";
tokens[1] = "Rank/Score";
} catch (NumberFormatException v) {
try {
double temp2 = Double.parseDouble(tokens[1]);
i = -1;
tokens[0] = "Name";
tokens[1] = "Rank/Score";
} catch (NumberFormatException v2) {
//if it isn't a double or int then we have a title line.
}
}
}
//expressionMatrix = new GeneExpressionMatrix(tokens);
expressionMatrix.setColumnNames(tokens);
expressionMatrix.setNumConditions(expressionMatrix.getColumnNames().length);
expressionMatrix.setExpressionMatrix(expression);
continue;
}
//Check to see if this gene is in the genes list
//Currently we only load gene expression data for genes that are already in the gene list (i.e. are listed in at least one geneset)
//TODO:is there the possibility that we need all the expression genes? Currently this great decreases space when saving sessions
Integer genekey = map.getHashFromGene(Name);
if (genekey != null) {
//we want the genes hashmap and dataset genes hashmap to have the same keys so it is easier to compare.
datasetGenes.add(genekey);
String description = "";
//check to see if the second column is parseable
if (twoColumns) {
try {
Double.parseDouble(tokens[1]);
} catch (NumberFormatException e) {
description = tokens[1];
}
} else {
description = tokens[1];
}
GeneExpression expres = new GeneExpression(Name, description);
expres.setExpression(tokens);
double newMax = expres.newMax(expressionMatrix.getMaxExpression());
if (newMax != -100)
expressionMatrix.setMaxExpression(newMax);
double newMin = expres.newMin(expressionMatrix.getMinExpression());
if (newMin != -100)
expressionMatrix.setMinExpression(newMin);
double newClosest = expres.newclosesttoZero(expressionMatrix.getClosesttoZero());
if (newClosest != -100)
expressionMatrix.setClosesttoZero(newClosest);
expression.put(genekey, expres);
}
expressionUniverse++;
// Calculate Percentage. This must be a value between 0..100.
int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
taskMonitor.setProgress(percentComplete);
currentProgress++;
}
//set the number of genes
expressionMatrix.setExpressionUniverse(expressionUniverse);
//row Normalize expressionset
expressionMatrix.rowNormalizeMatrix();
return expressionMatrix;
//TODO: intialize phenotypes associated with class files from expression file load
/*
* if(dataset == 1){ //set up the classes definition if it is set.
* //check to see if the phenotypes were already set in the params from
* a session load if(params.getTemp_class1() != null)
* expressionMatrix.setPhenotypes(params.getTemp_class1());
* if(params.getClassFile1() != null)
* expressionMatrix.setPhenotypes(setClasses( params.getClassFile1()));
* //params.getEM().addExpression(EnrichmentMap.DATASET1,
* expressionMatrix); } else{ //set up the classes definition if it is
* set.
*
* //check to see if the phenotypes were already set in the params from
* a session load if(params.getTemp_class2() != null)
* expressionMatrix.setPhenotypes(params.getTemp_class2()); else
* if(params.getClassFile2() != null)
* expressionMatrix.setPhenotypes(setClasses( params.getClassFile2()));
* //params.getEM().addExpression(EnrichmentMap.DATASET2,
* expressionMatrix); }
*/
}
use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.
the class HeatMapRanksTest method setUp.
@Before
public void setUp(EnrichmentMapManager emManager) {
DataSetFiles files = new DataSetFiles();
files.setGMTFileName(PATH + "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
files.setExpressionFileName(PATH + "MCF7_ExprMx_v2_names.gct");
files.setEnrichmentFileName1(PATH + "gsea_report_for_ES12_1473194913081.xls");
files.setEnrichmentFileName2(PATH + "gsea_report_for_NT12_1473194913081.xls");
files.setRankedFile(PATH + "ranked_gene_list_ES12_versus_NT12_1473194913081.xls");
files.setClassFile(PATH + "ES_NT.cls");
EMCreationParameters params = new EMCreationParameters("HeatMapRanks_", 0.005, 0.1, NESFilter.ALL, Optional.empty(), SimilarityMetric.OVERLAP, 0.5, 0.5);
Map<Long, EnrichmentMap> maps = emManager.getAllEnrichmentMaps();
assertEquals(0, maps.size());
buildEnrichmentMap(params, files, Method.GSEA, LegacySupport.DATASET1);
maps = emManager.getAllEnrichmentMaps();
assertEquals(1, maps.size());
}
Aggregations