Search in sources :

Example 1 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class ExperimentDEDVEndpoint method invokeInternal.

/**
 * Reads the given <code>requestElement</code>, and sends a the response back.
 *
 * @param requestElement the contents of the SOAP message as DOM elements
 * @param document a DOM document to be used for constructing <code>Node</code>s
 * @return the response element
 */
@Override
protected Element invokeInternal(Element requestElement, Document document) throws Exception {
    StopWatch watch = new StopWatch();
    watch.start();
    setLocalName(EXPERIMENT_LOCAL_NAME);
    String eeid = "";
    Collection<String> eeResults = getSingleNodeValue(requestElement, "ee_id");
    for (String id : eeResults) {
        eeid = id;
    }
    // Check to make sure we haven't already generated this EE report.
    Document doc = readReport(DEFAULT_FILENAME + eeid + DEFAULT_EXTENSION);
    if (doc != null) {
        // Successfully got report from disk
        watch.stop();
        Long time = watch.getTime();
        log.info("XML response for ee" + eeid + " retrieved from disk in " + time + "ms.");
        return doc.getDocumentElement();
    }
    // Build the matrix
    ExpressionExperiment ee = expressionExperimentService.load(Long.parseLong(eeid));
    ee = expressionExperimentService.thawLite(ee);
    ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService.getProcessedExpressionDataMatrix(ee);
    // start building the wrapper
    // build xml manually rather than use buildWrapper inherited from AbstractGemmeEndpoint
    String elementName1 = "dedv";
    String elementName2 = "geneIdist";
    // log.info( "Building " + EXPERIMENT_LOCAL_NAME + " XML response" );
    Element responseWrapper = document.createElementNS(NAMESPACE_URI, EXPERIMENT_LOCAL_NAME);
    Element responseElement = document.createElementNS(NAMESPACE_URI, EXPERIMENT_LOCAL_NAME + RESPONSE);
    responseWrapper.appendChild(responseElement);
    if (dmatrix == null || (dmatrix.rows() == 0))
        responseElement.appendChild(document.createTextNode("No " + elementName1 + " result"));
    else {
        for (int rowNum = 0; rowNum < dmatrix.rows(); rowNum++) {
            // data vector string for output
            String elementString1 = encode(dmatrix.getRow(rowNum));
            String elementString2 = "";
            CompositeSequence de = dmatrix.getDesignElementForRow(rowNum);
            Collection<Gene> geneCol = compositeSequenceService.getGenes(de);
            for (Gene gene : geneCol) {
                if (elementString2.equals(""))
                    elementString2 = elementString2.concat(gene.getId().toString());
                else
                    elementString2 = elementString2.concat(DELIMITER + gene.getId().toString());
            }
            Element e1 = document.createElement(elementName1);
            e1.appendChild(document.createTextNode(elementString1));
            responseElement.appendChild(e1);
            Element e2 = document.createElement(elementName2);
            e2.appendChild(document.createTextNode(elementString2));
            responseElement.appendChild(e2);
        }
    }
    watch.stop();
    Long time = watch.getTime();
    log.info("XML response for ee:" + eeid + " created from scratch in " + time + "ms.");
    writeReport(responseWrapper, document, DEFAULT_FILENAME + eeid);
    return responseWrapper;
}
Also used : Gene(ubic.gemma.model.genome.Gene) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) Element(org.w3c.dom.Element) Document(org.w3c.dom.Document) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 2 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class DataUpdaterTest method testAddData.

@Test
public void testAddData() throws Exception {
    /*
         * Load a regular data set that has no data. Platform is (basically) irrelevant.
         */
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
    ExpressionExperiment ee;
    try {
        // RNA-seq data.
        Collection<?> results = geoService.fetchAndLoad("GSE37646", false, true, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        // log.warn( "Test skipped because GSE37646 was not removed from the system prior to test" );
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    ee = experimentService.thawLite(ee);
    List<BioAssay> bioAssays = new ArrayList<>(ee.getBioAssays());
    assertEquals(31, bioAssays.size());
    List<BioMaterial> bms = new ArrayList<>();
    for (BioAssay ba : bioAssays) {
        bms.add(ba.getSampleUsed());
    }
    targetArrayDesign = this.getTestPersistentArrayDesign(100, true);
    DoubleMatrix<CompositeSequence, BioMaterial> rawMatrix = new DenseDoubleMatrix<>(targetArrayDesign.getCompositeSequences().size(), bms.size());
    /*
         * make up some fake data on another platform, and match it to those samples
         */
    for (int i = 0; i < rawMatrix.rows(); i++) {
        for (int j = 0; j < rawMatrix.columns(); j++) {
            rawMatrix.set(i, j, (i + 1) * (j + 1) * Math.random() / 100.0);
        }
    }
    List<CompositeSequence> probes = new ArrayList<>(targetArrayDesign.getCompositeSequences());
    rawMatrix.setRowNames(probes);
    rawMatrix.setColumnNames(bms);
    QuantitationType qt = this.makeQt(true);
    ExpressionDataDoubleMatrix data = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
    assertNotNull(data.getBestBioAssayDimension());
    assertEquals(rawMatrix.columns(), data.getBestBioAssayDimension().getBioAssays().size());
    assertEquals(probes.size(), data.getMatrix().rows());
    /*
         * Replace it.
         */
    ee = dataUpdater.replaceData(ee, targetArrayDesign, data);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    ee = experimentService.thaw(ee);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    assertEquals(100, ee.getRawExpressionDataVectors().size());
    for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
        assertTrue(v.getQuantitationType().getIsPreferred());
    }
    assertEquals(100, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(31, v.getBioAssays().size());
    }
    /*
         * Test adding data (non-preferred)
         */
    qt = this.makeQt(false);
    ExpressionDataDoubleMatrix moreData = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
    ee = dataUpdater.addData(ee, targetArrayDesign, moreData);
    ee = experimentService.thaw(ee);
    try {
        // add preferred data twice.
        dataUpdater.addData(ee, targetArrayDesign, data);
        fail("Should have gotten an exception");
    } catch (IllegalArgumentException e) {
    // okay.
    }
    dataUpdater.deleteData(ee, qt);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ArrayList(java.util.ArrayList) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 3 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class DataUpdaterTest method testLoadRNASeqDataWithMissingSamples.

/*
     * Test case where some samples cannot be used.
     *
     */
@Test
public void testLoadRNASeqDataWithMissingSamples() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee = experimentService.findByShortName("GSE29006");
    if (ee != null) {
        experimentService.remove(ee);
    }
    assertTrue(experimentService.findByShortName("GSE29006") == null);
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = experimentService.thaw(ee);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        try {
            dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
            fail("Should have gotten an exception");
        } catch (IllegalArgumentException e) {
        // Expected
        }
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
    }
    /*
         * Check
         */
    ee = experimentService.thaw(ee);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
    assertEquals(199, mat.rows());
    assertTrue(mat.getQuantitationTypes().iterator().next().getName().startsWith("log2cpm"));
    assertEquals(4, ee.getBioAssays().size());
    assertEquals(199 * 3, ee.getRawExpressionDataVectors().size());
    assertEquals(199, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(4, v.getBioAssays().size());
    }
}
Also used : InputStream(java.io.InputStream) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 4 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method run.

@Override
public Collection<DifferentialExpressionAnalysis> run(ExpressionExperiment expressionExperiment, ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config) {
    /*
         * I apologize for this being so complicated. Basically there are four phases:
         *
         * 1. Get the data matrix and factors
         *
         * 2. Determine baseline groups; build model and contrasts
         *
         * 3. Run the analysis
         *
         * 4. Postprocess the analysis
         *
         * By far the most complex is #2 -- it depends on which factors and what kind they are.
         */
    /*
         * Initialize our matrix and factor lists...
         */
    List<ExperimentalFactor> factors = config.getFactorsToInclude();
    /*
         * FIXME this is the place to strip put the outliers.
         */
    List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(dmatrix, factors);
    // enforce ordering
    dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, dmatrix);
    /*
         * Do the analysis, by subsets if requested
         */
    Collection<DifferentialExpressionAnalysis> results = new HashSet<>();
    ExperimentalFactor subsetFactor = config.getSubsetFactor();
    if (subsetFactor != null) {
        if (factors.contains(subsetFactor)) {
            throw new IllegalStateException("Subset factor cannot also be included in the analysis [ Factor was: " + subsetFactor + "]");
        }
        Map<FactorValue, ExpressionDataDoubleMatrix> subsets = this.makeSubSets(config, dmatrix, samplesUsed, subsetFactor);
        LinearModelAnalyzer.log.info("Total number of subsets: " + subsets.size());
        /*
             * Now analyze each subset
             */
        for (FactorValue subsetFactorValue : subsets.keySet()) {
            LinearModelAnalyzer.log.info("Analyzing subset: " + subsetFactorValue);
            /*
                 * Checking for DE_Exclude characteristics, which should not be included in the analysis.
                 * As requested in issue #4458 (bugzilla)
                 */
            boolean include = true;
            for (Characteristic c : subsetFactorValue.getCharacteristics()) {
                if (LinearModelAnalyzer.EXCLUDE_CHARACTERISTICS_VALUES.contains(c.getValue())) {
                    include = false;
                    break;
                }
            }
            if (!include) {
                LinearModelAnalyzer.log.warn(LinearModelAnalyzer.EXCLUDE_WARNING);
                continue;
            }
            List<BioMaterial> bioMaterials = ExperimentalDesignUtils.getOrderedSamples(subsets.get(subsetFactorValue), factors);
            /*
                 * make a EESubSet
                 */
            ExpressionExperimentSubSet eeSubSet = ExpressionExperimentSubSet.Factory.newInstance();
            eeSubSet.setSourceExperiment(expressionExperiment);
            eeSubSet.setName("Subset for " + subsetFactorValue);
            Collection<BioAssay> bioAssays = new HashSet<>();
            for (BioMaterial bm : bioMaterials) {
                bioAssays.addAll(bm.getBioAssaysUsedIn());
            }
            eeSubSet.getBioAssays().addAll(bioAssays);
            Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(subsets.get(subsetFactorValue), eeSubSet, factors);
            DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(factors, config, subsetFactorValue);
            if (subsetFactors.isEmpty()) {
                LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
                continue;
            }
            /*
                 * Run analysis on the subset.
                 */
            DifferentialExpressionAnalysis analysis = this.doAnalysis(eeSubSet, subsetConfig, subsets.get(subsetFactorValue), bioMaterials, new ArrayList<>(subsetFactors), subsetFactorValue);
            if (analysis == null) {
                LinearModelAnalyzer.log.warn("No analysis results were obtained for subset: " + subsetFactorValue);
                continue;
            }
            results.add(analysis);
        }
    } else {
        /*
             * Analyze the whole thing as one
             */
        DifferentialExpressionAnalysis analysis = this.doAnalysis(expressionExperiment, config, dmatrix, samplesUsed, factors, null);
        if (analysis == null) {
            LinearModelAnalyzer.log.warn("No analysis results were obtained");
        } else {
            results.add(analysis);
        }
    }
    return results;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) Characteristic(ubic.gemma.model.common.description.Characteristic) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 5 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method run.

@Override
public DifferentialExpressionAnalysis run(ExpressionExperimentSubSet subset, DifferentialExpressionAnalysisConfig config) {
    /*
         * Start by setting it up like the full experiment.
         */
    ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService.getProcessedExpressionDataMatrix(subset.getSourceExperiment());
    ExperimentalFactor ef = config.getSubsetFactor();
    Collection<BioMaterial> bmTmp = new HashSet<>();
    for (BioAssay ba : subset.getBioAssays()) {
        bmTmp.add(ba.getSampleUsed());
    }
    List<BioMaterial> samplesInSubset = new ArrayList<>(bmTmp);
    FactorValue subsetFactorValue = null;
    for (BioMaterial bm : samplesInSubset) {
        Collection<FactorValue> fvs = bm.getFactorValues();
        for (FactorValue fv : fvs) {
            if (fv.getExperimentalFactor().equals(ef)) {
                if (subsetFactorValue == null) {
                    subsetFactorValue = fv;
                } else if (!subsetFactorValue.equals(fv)) {
                    throw new IllegalStateException("This subset has more than one factor value for the supposed subset factor: " + fv + " and " + subsetFactorValue);
                }
            }
        }
    }
    samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
    // slice.
    ExpressionDataDoubleMatrix subsetMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
    Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(dmatrix, subset, config.getFactorsToInclude());
    if (subsetFactors.isEmpty()) {
        LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
        return null;
    }
    DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(config.getFactorsToInclude(), config, subsetFactorValue);
    DifferentialExpressionAnalysis analysis = this.doAnalysis(subset, subsetConfig, subsetMatrix, samplesInSubset, config.getFactorsToInclude(), subsetFactorValue);
    if (analysis == null) {
        throw new IllegalStateException("Subset could not be analyzed with config: " + config);
    }
    return analysis;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)41 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 Test (org.junit.Test)9 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)7 ArrayList (java.util.ArrayList)6 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)5 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)4 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 InputStream (java.io.InputStream)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)4 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)4 Algebra (cern.colt.matrix.linalg.Algebra)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)3 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)3