Search in sources :

Example 36 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class DataUpdaterTest method testLoadRNASeqData.

/*
     * More realistic test of RNA seq. GSE19166
     *

     */
@Test
public void testLoadRNASeqData() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee;
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE19166", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    ee = experimentService.thaw(ee);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        assertEquals(199, probeNames.size());
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        assertEquals(199, targetArrayDesign.getCompositeSequences().size());
        // Main step.
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
    }
    ee = experimentService.thaw(ee);
    // should have: log2cpm, counts, rpkm, and counts-masked ('preferred')
    assertEquals(4, ee.getQuantitationTypes().size());
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    assertNotNull(ee.getNumberOfDataVectors());
    assertEquals(199, ee.getNumberOfDataVectors().intValue());
    // GSM475204 GSM475205 GSM475206 GSM475207 GSM475208 GSM475209
    // 3949585 3929008 3712314 3693219 3574068 3579631
    ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
    assertEquals(199, mat.rows());
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM475204", 3949585);
    assertEquals(3 * 199, ee.getRawExpressionDataVectors().size());
    assertEquals(199, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(6, v.getBioAssays().size());
    }
    assertTrue(!dataVectorService.getProcessedDataVectors(experimentService.load(ee.getId())).isEmpty());
}
Also used : InputStream(java.io.InputStream) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 37 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class LinkAnalysisServiceImpl method analyze.

private void analyze(ExpressionExperiment ee, FilterConfig filterConfig, LinkAnalysisConfig linkAnalysisConfig, LinkAnalysis la, Collection<ProcessedExpressionDataVector> dataVectors) {
    this.qcCheck(linkAnalysisConfig, ee);
    ExpressionDataDoubleMatrix datamatrix = expressionDataMatrixService.getFilteredMatrix(ee, filterConfig, dataVectors);
    this.setUpForAnalysis(ee, la, dataVectors, datamatrix);
    Map<CompositeSequence, Set<Gene>> probeToGeneMap = la.getProbeToGeneMap();
    assert !probeToGeneMap.isEmpty();
    /*
         * remove probes that have no gene mapped to them, not just those that have no sequence
         */
    datamatrix = this.filterUnmappedProbes(datamatrix, probeToGeneMap);
    this.checkDatamatrix(datamatrix);
    LinkAnalysisServiceImpl.log.info("Starting link analysis... " + ee);
    this.normalize(datamatrix, linkAnalysisConfig);
    /*
         * Link analysis section.
         */
    this.addAnalysisObj(ee, filterConfig, linkAnalysisConfig, la);
    la.analyze();
    CoexpCorrelationDistribution corrDist = la.getCorrelationDistribution();
    // another qc check.
    if (linkAnalysisConfig.isCheckCorrelationDistribution()) {
        this.diagnoseCorrelationDistribution(ee, corrDist);
    }
}
Also used : CoexpCorrelationDistribution(ubic.gemma.model.analysis.expression.coexpression.CoexpCorrelationDistribution) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 38 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class GeeqServiceImpl method scoreMissingValues.

private void scoreMissingValues(ExpressionExperiment ee, Geeq gq, boolean hasRawData) {
    double score;
    boolean hasProcessedVectors = true;
    boolean hasMissingValues = false;
    String problems = "";
    if (!hasRawData) {
        try {
            ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService.getProcessedExpressionDataMatrix(ee);
            hasMissingValues = dmatrix.hasMissingValues();
        } catch (IllegalArgumentException e) {
            hasProcessedVectors = false;
        } catch (Exception e) {
            hasProcessedVectors = false;
            problems = GeeqServiceImpl.ERR_MSG_MISSING_VALS + e.getMessage();
        }
    }
    score = hasRawData || (!hasMissingValues && hasProcessedVectors) ? GeeqServiceImpl.P_10 : GeeqServiceImpl.N_10;
    gq.setNoVectors(!hasProcessedVectors);
    gq.addOtherIssues(problems);
    gq.setsScoreMissingValues(score);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)

Example 39 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class DataUpdater method replaceData.

/**
 * Replace the data associated with the experiment (or add it if there is none). These data become the 'preferred'
 * quantitation type. Note that this replaces the "raw" data. Similar to
 * AffyPowerToolsProbesetSummarize.convertDesignElementDataVectors and code in
 * SimpleExpressionDataLoaderService.
 * This method exists in addition to the other replaceData to allow more direct reading of data from files, allowing
 * sample- and element-matching to happen here.
 *
 * @param ee             ee
 * @param targetPlatform (this only works for a single-platform data set)
 * @param qt             qt
 * @param data           data
 * @return ee
 */
// Possible external use
@SuppressWarnings("UnusedReturnValue")
public ExpressionExperiment replaceData(ExpressionExperiment ee, ArrayDesign targetPlatform, QuantitationType qt, DoubleMatrix<String, String> data) {
    targetPlatform = this.arrayDesignService.thaw(targetPlatform);
    ee = this.experimentService.thawLite(ee);
    DoubleMatrix<CompositeSequence, BioMaterial> rdata = this.matchElementsToRowNames(targetPlatform, data);
    this.matchBioMaterialsToColNames(ee, data, rdata);
    ExpressionDataDoubleMatrix eematrix = new ExpressionDataDoubleMatrix(ee, qt, rdata);
    return this.replaceData(ee, targetPlatform, eematrix);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 40 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class DataUpdater method addCountData.

/**
 * Replaces data. Starting with the count data, we compute the log2cpm, which is the preferred quantitation type we
 * use internally. Counts and FPKM (if provided) are stored in addition.
 *
 * @param ee                  ee
 * @param targetArrayDesign   - this should be one of the "Generic" gene-based platforms. The data set will be
 *                            switched to use it.
 * @param countMatrix         Representing 'raw' counts (added after rpkm, if provided).
 * @param rpkmMatrix          Representing per-gene normalized data, optional (RPKM or FPKM)
 * @param allowMissingSamples if true, samples that are missing data will be deleted from the experiment.
 */
public void addCountData(ExpressionExperiment ee, ArrayDesign targetArrayDesign, DoubleMatrix<String, String> countMatrix, DoubleMatrix<String, String> rpkmMatrix, Integer readLength, Boolean isPairedReads, boolean allowMissingSamples) {
    if (countMatrix == null)
        throw new IllegalArgumentException("You must provide count matrix (rpkm is optional)");
    targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
    ee = experimentService.thawLite(ee);
    ee = this.dealWithMissingSamples(ee, countMatrix, allowMissingSamples);
    DoubleMatrix<CompositeSequence, BioMaterial> properCountMatrix = this.matchElementsToRowNames(targetArrayDesign, countMatrix);
    this.matchBioMaterialsToColNames(ee, countMatrix, properCountMatrix);
    assert !properCountMatrix.getColNames().isEmpty();
    assert !properCountMatrix.getRowNames().isEmpty();
    QuantitationType countqt = this.makeCountQt();
    ExpressionDataDoubleMatrix countEEMatrix = new ExpressionDataDoubleMatrix(ee, countqt, properCountMatrix);
    QuantitationType log2cpmQt = this.makelog2cpmQt();
    DoubleMatrix1D librarySize = MatrixStats.colSums(countMatrix);
    DoubleMatrix<CompositeSequence, BioMaterial> log2cpmMatrix = MatrixStats.convertToLog2Cpm(properCountMatrix, librarySize);
    ExpressionDataDoubleMatrix log2cpmEEMatrix = new ExpressionDataDoubleMatrix(ee, log2cpmQt, log2cpmMatrix);
    ee = this.replaceData(ee, targetArrayDesign, log2cpmEEMatrix);
    ee = this.addData(ee, targetArrayDesign, countEEMatrix);
    this.addTotalCountInformation(ee, countEEMatrix, readLength, isPairedReads);
    if (rpkmMatrix != null) {
        DoubleMatrix<CompositeSequence, BioMaterial> properRPKMMatrix = this.matchElementsToRowNames(targetArrayDesign, rpkmMatrix);
        this.matchBioMaterialsToColNames(ee, rpkmMatrix, properRPKMMatrix);
        assert !properRPKMMatrix.getColNames().isEmpty();
        assert !properRPKMMatrix.getRowNames().isEmpty();
        QuantitationType rpkmqt = this.makeRPKMQt();
        ExpressionDataDoubleMatrix rpkmEEMatrix = new ExpressionDataDoubleMatrix(ee, rpkmqt, properRPKMMatrix);
        ee = this.addData(ee, targetArrayDesign, rpkmEEMatrix);
    }
    assert !processedExpressionDataVectorService.getProcessedDataVectors(ee).isEmpty();
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)41 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 Test (org.junit.Test)9 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)7 ArrayList (java.util.ArrayList)6 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)5 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)4 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 InputStream (java.io.InputStream)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)4 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)4 Algebra (cern.colt.matrix.linalg.Algebra)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)3 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)3