Search in sources :

Example 56 with EnrichmentMap

use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.

the class ExpressionFileReaderTask method parse.

/**
	 * Parse expression/rank file
	 */
public GeneExpressionMatrix parse(TaskMonitor taskMonitor) throws IOException {
    if (taskMonitor == null)
        taskMonitor = new NullTaskMonitor();
    //Need to check if the file specified as an expression file is actually a rank file
    //If it is a rank file it can either be 5 or 2 columns but it is important that the rank
    //value is extracted from the right column and placed in the expression matrix as if it
    //was an expression value in order for other features to work.
    //Also a problem with old session files that imported a rank file so it also
    //important to check if the file only has two columns.  If it only has two columns,
    //check to see if the second column is a double.  If it is then consider that column
    //expression
    boolean twoColumns = false;
    Set<Integer> datasetGenes = dataset.getDataSetGenes();
    //		Map<Integer,String> genes = dataset.getMap().getGenes();
    EnrichmentMap map = dataset.getMap();
    String expressionFileName = dataset.getExpressionSets().getFilename();
    List<String> lines = DatasetLineParser.readLines(expressionFileName);
    int currentProgress = 0;
    int maxValue = lines.size();
    int expressionUniverse = 0;
    taskMonitor.setStatusMessage("Parsing GCT file - " + maxValue + " rows");
    GeneExpressionMatrix expressionMatrix = dataset.getExpressionSets();
    //GeneExpressionMatrix expressionMatrix = new GeneExpressionMatrix(lines[0].split("\t"));
    //HashMap<Integer,GeneExpression> expression = new HashMap<Integer, GeneExpression>();
    Map<Integer, GeneExpression> expression = expressionMatrix.getExpressionMatrix();
    for (int i = 0; i < lines.size(); i++) {
        String line = lines.get(i);
        String[] tokens = line.split("\t");
        //The first column of the file is the name of the geneset
        String Name = tokens[0].toUpperCase().trim();
        //the first time we have given them default headings
        if (i == 0 && (expressionMatrix == null || expressionMatrix.getExpressionMatrix().isEmpty()) && expressionMatrix.getColumnNames() == null) {
            //otherwise the first line is the header
            if (Name.equalsIgnoreCase("#1.2")) {
                line = lines.get(2);
                i = 2;
            } else {
                line = lines.get(0);
                //ignore all comment lines
                int k = 0;
                while (line.startsWith("#")) {
                    k++;
                    line = lines.get(k);
                }
                i = k;
            }
            tokens = line.split("\t");
            //check to see if the second column contains expression values.
            if (tokens.length == 2) {
                twoColumns = true;
                //if we are loading a GSEA edb rnk file then their might not be column names
                try {
                    int temp = Integer.parseInt(tokens[1]);
                    i = -1;
                    tokens[0] = "Name";
                    tokens[1] = "Rank/Score";
                } catch (NumberFormatException v) {
                    try {
                        double temp2 = Double.parseDouble(tokens[1]);
                        i = -1;
                        tokens[0] = "Name";
                        tokens[1] = "Rank/Score";
                    } catch (NumberFormatException v2) {
                    //if it isn't a double or int then we have a title line.
                    }
                }
            }
            //expressionMatrix = new GeneExpressionMatrix(tokens);
            expressionMatrix.setColumnNames(tokens);
            expressionMatrix.setNumConditions(expressionMatrix.getColumnNames().length);
            expressionMatrix.setExpressionMatrix(expression);
            continue;
        }
        //Check to see if this gene is in the genes list
        //Currently we only load gene expression data for genes that are already in the gene list (i.e. are listed in at least one geneset)
        //TODO:is there the possibility that we need all the expression genes?  Currently this great decreases space when saving sessions
        Integer genekey = map.getHashFromGene(Name);
        if (genekey != null) {
            //we want the genes hashmap and dataset genes hashmap to have the same keys so it is easier to compare.
            datasetGenes.add(genekey);
            String description = "";
            //check to see if the second column is parseable
            if (twoColumns) {
                try {
                    Double.parseDouble(tokens[1]);
                } catch (NumberFormatException e) {
                    description = tokens[1];
                }
            } else {
                description = tokens[1];
            }
            GeneExpression expres = new GeneExpression(Name, description);
            expres.setExpression(tokens);
            double newMax = expres.newMax(expressionMatrix.getMaxExpression());
            if (newMax != -100)
                expressionMatrix.setMaxExpression(newMax);
            double newMin = expres.newMin(expressionMatrix.getMinExpression());
            if (newMin != -100)
                expressionMatrix.setMinExpression(newMin);
            double newClosest = expres.newclosesttoZero(expressionMatrix.getClosesttoZero());
            if (newClosest != -100)
                expressionMatrix.setClosesttoZero(newClosest);
            expression.put(genekey, expres);
        }
        expressionUniverse++;
        // Calculate Percentage.  This must be a value between 0..100.
        int percentComplete = (int) (((double) currentProgress / maxValue) * 100);
        taskMonitor.setProgress(percentComplete);
        currentProgress++;
    }
    //set the number of genes
    expressionMatrix.setExpressionUniverse(expressionUniverse);
    //row Normalize expressionset
    expressionMatrix.rowNormalizeMatrix();
    return expressionMatrix;
//TODO: intialize phenotypes associated with class files from expression file load
/*
		 * if(dataset == 1){ //set up the classes definition if it is set.
		 * //check to see if the phenotypes were already set in the params from
		 * a session load if(params.getTemp_class1() != null)
		 * expressionMatrix.setPhenotypes(params.getTemp_class1());
		 * if(params.getClassFile1() != null)
		 * expressionMatrix.setPhenotypes(setClasses( params.getClassFile1()));
		 * //params.getEM().addExpression(EnrichmentMap.DATASET1,
		 * expressionMatrix); } else{ //set up the classes definition if it is
		 * set.
		 * 
		 * //check to see if the phenotypes were already set in the params from
		 * a session load if(params.getTemp_class2() != null)
		 * expressionMatrix.setPhenotypes(params.getTemp_class2()); else
		 * if(params.getClassFile2() != null)
		 * expressionMatrix.setPhenotypes(setClasses( params.getClassFile2()));
		 * //params.getEM().addExpression(EnrichmentMap.DATASET2,
		 * expressionMatrix); }
		 */
}
Also used : EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) GeneExpressionMatrix(org.baderlab.csplugins.enrichmentmap.model.GeneExpressionMatrix) GeneExpression(org.baderlab.csplugins.enrichmentmap.model.GeneExpression) NullTaskMonitor(org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor)

Example 57 with EnrichmentMap

use of org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap in project EnrichmentMapApp by BaderLab.

the class HeatMapRanksTest method setUp.

@Before
public void setUp(EnrichmentMapManager emManager) {
    DataSetFiles files = new DataSetFiles();
    files.setGMTFileName(PATH + "Human_GO_AllPathways_no_GO_iea_April_15_2013_symbol.gmt");
    files.setExpressionFileName(PATH + "MCF7_ExprMx_v2_names.gct");
    files.setEnrichmentFileName1(PATH + "gsea_report_for_ES12_1473194913081.xls");
    files.setEnrichmentFileName2(PATH + "gsea_report_for_NT12_1473194913081.xls");
    files.setRankedFile(PATH + "ranked_gene_list_ES12_versus_NT12_1473194913081.xls");
    files.setClassFile(PATH + "ES_NT.cls");
    EMCreationParameters params = new EMCreationParameters("HeatMapRanks_", 0.005, 0.1, NESFilter.ALL, Optional.empty(), SimilarityMetric.OVERLAP, 0.5, 0.5);
    Map<Long, EnrichmentMap> maps = emManager.getAllEnrichmentMaps();
    assertEquals(0, maps.size());
    buildEnrichmentMap(params, files, Method.GSEA, LegacySupport.DATASET1);
    maps = emManager.getAllEnrichmentMaps();
    assertEquals(1, maps.size());
}
Also used : EMCreationParameters(org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters) EnrichmentMap(org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap) DataSetFiles(org.baderlab.csplugins.enrichmentmap.model.DataSetFiles) Before(org.junit.Before)

Aggregations

EnrichmentMap (org.baderlab.csplugins.enrichmentmap.model.EnrichmentMap)57 EMDataSet (org.baderlab.csplugins.enrichmentmap.model.EMDataSet)27 Test (org.junit.Test)22 DataSetFiles (org.baderlab.csplugins.enrichmentmap.model.DataSetFiles)21 EMCreationParameters (org.baderlab.csplugins.enrichmentmap.model.EMCreationParameters)13 Method (org.baderlab.csplugins.enrichmentmap.model.EMDataSet.Method)13 CyNetwork (org.cytoscape.model.CyNetwork)12 EnrichmentMapParameters (org.baderlab.csplugins.enrichmentmap.model.EnrichmentMapParameters)11 EnrichmentResult (org.baderlab.csplugins.enrichmentmap.model.EnrichmentResult)10 CyNetworkView (org.cytoscape.view.model.CyNetworkView)10 Map (java.util.Map)8 CyEdge (org.cytoscape.model.CyEdge)7 CyRow (org.cytoscape.model.CyRow)7 NullTaskMonitor (org.baderlab.csplugins.enrichmentmap.util.NullTaskMonitor)6 TaskIterator (org.cytoscape.work.TaskIterator)6 ImmutableSet (com.google.common.collect.ImmutableSet)5 HashSet (java.util.HashSet)5 GeneSet (org.baderlab.csplugins.enrichmentmap.model.GeneSet)5 Inject (com.google.inject.Inject)4 ActionEvent (java.awt.event.ActionEvent)4