Search in sources :

Example 6 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class MeanVarianceServiceTest method testServiceCreateCountData.

@Test
public final void testServiceCreateCountData() throws Exception {
    // so it doesn't look for soft files
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ee = eeService.findByShortName("GSE29006");
    if (ee != null) {
        eeService.remove(ee);
    }
    assertNull(eeService.findByShortName("GSE29006"));
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = eeService.thaw(ee);
    qt = this.createOrUpdateQt(ScaleType.COUNT);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        // we have to find the right generic platform to use.
        ArrayDesign targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        try {
            dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
            fail("Should have gotten an exception");
        } catch (IllegalArgumentException e) {
        // Expected
        }
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
    }
    ee = eeService.thaw(this.ee);
    assertNotNull(ee.getId());
    MeanVarianceRelation mvr = meanVarianceService.create(ee, true);
    // convert byte[] to array[]
    // warning: order may have changed
    double[] means = MeanVarianceServiceTest.bac.byteArrayToDoubles(mvr.getMeans());
    double[] variances = MeanVarianceServiceTest.bac.byteArrayToDoubles(mvr.getVariances());
    if (means != null) {
        Arrays.sort(means);
    }
    if (variances != null) {
        Arrays.sort(variances);
    }
    // check sizes
    int expectedMeanVarianceLength = 199;
    // NAs removed
    int expectedLowessLength = 197;
    assert means != null;
    assertEquals(expectedMeanVarianceLength, means.length);
    assert variances != null;
    assertEquals(expectedMeanVarianceLength, variances.length);
    int idx = 0;
    assertEquals(1.037011, means[idx], 0.0001);
    assertEquals(0.00023724336, variances[idx], 0.000001);
    idx = expectedLowessLength - 1;
    assertEquals(15.23313, means[idx], 0.0001);
    assertEquals(4.84529, variances[idx], 0.0001);
}
Also used : InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) MeanVarianceRelation(ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 7 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class DataUpdaterTest method testLoadRNASeqData.

/*
     * More realistic test of RNA seq. GSE19166
     *

     */
@Test
public void testLoadRNASeqData() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee;
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE19166", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    ee = experimentService.thaw(ee);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        assertEquals(199, probeNames.size());
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        assertEquals(199, targetArrayDesign.getCompositeSequences().size());
        // Main step.
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
    }
    ee = experimentService.thaw(ee);
    // should have: log2cpm, counts, rpkm, and counts-masked ('preferred')
    assertEquals(4, ee.getQuantitationTypes().size());
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    assertNotNull(ee.getNumberOfDataVectors());
    assertEquals(199, ee.getNumberOfDataVectors().intValue());
    // GSM475204 GSM475205 GSM475206 GSM475207 GSM475208 GSM475209
    // 3949585 3929008 3712314 3693219 3574068 3579631
    ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
    assertEquals(199, mat.rows());
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM475204", 3949585);
    assertEquals(3 * 199, ee.getRawExpressionDataVectors().size());
    assertEquals(199, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(6, v.getBioAssays().size());
    }
    assertTrue(!dataVectorService.getProcessedDataVectors(experimentService.load(ee.getId())).isEmpty());
}
Also used : InputStream(java.io.InputStream) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 8 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class ComBatTest method test2WithMissingValues.

@Test
public void test2WithMissingValues() throws Exception {
    DoubleMatrixReader f = new DoubleMatrixReader();
    DoubleMatrix<String, String> testMatrix = f.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.madata.withmissing.small.txt"));
    StringMatrixReader of = new StringMatrixReader();
    StringMatrix<String, String> sampleInfo = of.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.metadata.small.txt"));
    @SuppressWarnings({ "unchecked", "rawtypes" }) ComBat<String, String> comBat = new ComBat(testMatrix, sampleInfo);
    DoubleMatrix2D X = comBat.getDesignMatrix();
    assertEquals(1, X.get(0, 0), 0.001);
    assertEquals(0, X.get(3, 0), 0.001);
    assertEquals(1, X.get(4, 2), 0.001);
    DoubleMatrix2D y = new DenseDoubleMatrix2D(testMatrix.asArray());
    DoubleMatrix2D sdata = comBat.standardize(y, X);
    assertEquals(-0.23640626, sdata.get(17, 1), 0.0001);
    assertEquals(0.51027241, sdata.get(8, 2), 0.001);
    assertEquals(0.2107944, sdata.get(0, 8), 0.001);
    assertEquals(0.23769649, sdata.get(3, 7), 0.001);
    assertEquals(Double.NaN, sdata.get(7, 6), 0.001);
    DoubleMatrix2D finalResult = comBat.run();
    assertEquals(10.660466, finalResult.get(7, 0), 0.0001);
    assertEquals(11.733197, finalResult.get(7, 7), 0.0001);
    assertEquals(Double.NaN, finalResult.get(7, 6), 0.0001);
    assertEquals(6.802441, finalResult.get(10, 7), 0.0001);
// log.info( finalResult );
// X08.1 X54.1 X36.1 X23.1 X17.1 X40.1 X45.1 X55.1 X11.1
// 1553129_at 3.861661 3.656498 3.891722 3.969015 3.928164 3.859776 3.885422 3.831730 3.853814
// 213447_at 6.233625 5.400615 5.583825 6.034642 6.457188 6.173610 5.322877 4.591996 6.655735
// 242039_at 8.155451 8.487645 7.512280 7.043722 7.570154 7.928574 8.138381 8.538423 7.937447
// 223394_at 7.794531 8.178473 8.285406 8.316963 7.845536 8.255656 8.604694 8.184320 7.311231
// 227758_at 3.813320 3.474997 NA 3.663592 3.701014 NA 3.648964 3.618175 3.985569
// 207696_at 3.576939 3.525421 3.561366 3.506479 3.516473 3.593750 3.628095 3.676431 3.599589
// 241107_at 6.264194 5.926644 5.654168 5.730628 6.185137 5.587933 5.527347 5.895269 6.441413
// 228980_at 10.660466 11.090106 10.769495 10.990729 10.616753 11.819747 NA 11.733197 10.516411
// 204452_s_at 6.038281 5.403597 5.950596 6.443812 5.676120 5.238702 5.616082 5.290953 5.543041
// 1562443_at 4.618687 3.961298 4.671874 4.512624 4.829666 4.138126 4.232039 4.048561 4.696936
// 232018_at 6.221217 6.882512 6.093883 5.937127 5.987227 6.502522 6.940522 6.802441 5.673800
// 1561877_at 3.793029 3.751057 3.719922 3.866485 4.070190 3.658865 3.465794 3.854070 3.878497
// 221183_at 6.800233 5.559318 6.247321 6.566830 6.731457 5.701761 6.062595 5.097052 7.117171
// 206162_x_at 5.273091 5.238142 5.023724 4.886765 5.162352 5.564269 5.573007 5.980072 5.558662
// 214502_at 4.047844 NA 3.841319 4.006797 NA 4.504433 3.992359 4.192473 3.773261
// 234099_at 7.628902 6.875036 7.101699 6.929775 7.202759 6.431563 6.622195 6.751740 7.740300
// 237400_at 4.396190 NA 4.978136 4.775859 5.379108 5.809133 4.611809 4.853239 4.734252
// 240254_at 4.062600 3.851718 4.274175 4.153745 4.030111 6.324506 4.089158 3.739869 4.426321
// 209053_s_at 5.970077 6.378914 6.241240 6.450990 5.944027 6.702078 6.463590 6.372133 5.964286
}
Also used : StringMatrixReader(ubic.basecode.io.reader.StringMatrixReader) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) Test(org.junit.Test)

Example 9 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class ComBatTest method test3NoCovariate.

/*
     * Case where we only have batch, no other covariates
     *
     */
@Test
public void test3NoCovariate() throws Exception {
    DoubleMatrixReader f = new DoubleMatrixReader();
    DoubleMatrix<String, String> testMatrix = f.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.madata.small.txt"));
    StringMatrixReader of = new StringMatrixReader();
    StringMatrix<String, String> sampleInfo = of.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.metadata.nocov.small.txt"));
    @SuppressWarnings({ "unchecked", "rawtypes" }) ComBat<String, String> comBat = new ComBat(testMatrix, sampleInfo);
    DoubleMatrix2D X = comBat.getDesignMatrix();
    assertEquals(1, X.get(0, 0), 0.001);
    assertEquals(0, X.get(3, 0), 0.001);
    DoubleMatrix2D y = new DenseDoubleMatrix2D(testMatrix.asArray());
    DoubleMatrix2D sdata = comBat.standardize(y, X);
    assertEquals(-0.57397393, sdata.get(17, 1), 0.0001);
    assertEquals(1.10827459, sdata.get(8, 2), 0.001);
    assertEquals(0.45359207, sdata.get(0, 8), 0.001);
    assertEquals(0.15470664, sdata.get(3, 7), 0.001);
    DoubleMatrix2D finalResult = comBat.run();
    assertEquals(10.678412, finalResult.get(7, 0), 0.0001);
    assertEquals(11.677158, finalResult.get(7, 7), 0.0001);
    assertEquals(6.735682, finalResult.get(10, 7), 0.0001);
// log.info( finalResult );
// X08.1 X54.1 X36.1 X23.1 X17.1 X40.1 X45.1 X55.1 X11.1
// 1553129_at 3.862883 3.666102 3.891911 3.968716 3.920277 3.854683 3.879281 3.837050 3.858994
// 213447_at 6.233228 5.393208 5.601587 6.008113 6.456638 6.170673 5.312780 4.707229 6.568215
// 242039_at 8.164096 8.502608 7.550607 7.146657 7.549429 7.914666 8.128463 8.437540 7.919430
// 223394_at 7.785804 8.172181 8.258501 8.286628 7.847273 8.259993 8.611244 8.164748 7.386591
// 227758_at 3.816278 3.475673 3.642967 3.670807 3.694009 3.641609 3.641609 3.633687 3.958491
// 207696_at 3.580712 3.527869 3.565008 3.518815 3.510254 3.589519 3.624747 3.666631 3.601962
// 241107_at 6.256698 5.922966 5.679003 5.749597 6.179683 5.589234 5.529333 5.900868 6.405107
// 228980_at 10.678412 11.107177 10.773365 10.974208 10.592659 11.793201 10.935671 11.677158 10.572522
// 204452_s_at 6.021874 5.366750 5.931940 6.337327 5.712018 5.260514 5.650047 5.355390 5.562588
// 1562443_at 4.609301 3.964679 4.663442 4.516068 4.818594 4.140485 4.232574 4.084472 4.684495
// 232018_at 6.226565 6.878310 6.097034 5.956156 5.989567 6.497420 6.929095 6.735682 5.721358
// 1561877_at 3.789971 3.749341 3.728529 3.867675 4.058129 3.659954 3.473056 3.856079 3.879270
// 221183_at 6.790431 5.543900 6.253127 6.541754 6.737387 5.703031 6.065498 5.210347 7.035212
// 206162_x_at 5.294347 5.257304 5.090348 4.979353 5.118390 5.544392 5.553653 5.884388 5.542866
// 214502_at 4.047479 3.865124 3.805045 3.963706 3.906569 4.478499 3.972879 4.154100 3.752158
// 234099_at 7.596681 6.861572 7.109911 6.946028 7.208003 6.455995 6.641884 6.761660 7.703987
// 237400_at 4.437312 5.820944 5.026344 4.825555 5.319276 5.731938 4.582957 4.979099 4.860988
// 240254_at 4.106446 3.903017 4.238474 4.121097 4.000663 6.213980 4.057623 3.815918 4.484964
// 209053_s_at 5.979671 6.378071 6.241459 6.440983 5.946471 6.685171 6.452771 6.374475 5.986512
}
Also used : StringMatrixReader(ubic.basecode.io.reader.StringMatrixReader) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) Test(org.junit.Test)

Example 10 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class ComBatTest method test1.

@Test
public void test1() throws Exception {
    DoubleMatrixReader f = new DoubleMatrixReader();
    DoubleMatrix<String, String> testMatrix = f.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.madata.small.txt"));
    StringMatrixReader of = new StringMatrixReader();
    StringMatrix<String, String> sampleInfo = of.read(this.getClass().getResourceAsStream("/data/analysis/preprocess/batcheffects/example.metadata.small.txt"));
    @SuppressWarnings({ "unchecked", "rawtypes" }) ComBat<String, String> comBat = new ComBat(testMatrix, sampleInfo);
    DoubleMatrix2D X = comBat.getDesignMatrix();
    assertEquals(1, X.get(0, 0), 0.001);
    assertEquals(0, X.get(3, 0), 0.001);
    assertEquals(1, X.get(4, 2), 0.001);
    DoubleMatrix2D y = new DenseDoubleMatrix2D(testMatrix.asArray());
    DoubleMatrix2D sdata = comBat.standardize(y, X);
    assertEquals(-0.25074, sdata.get(17, 1), 0.0001);
    assertEquals(0.54122, sdata.get(8, 2), 0.001);
    assertEquals(0.22358, sdata.get(0, 8), 0.001);
    assertEquals(0.25211, sdata.get(3, 7), 0.001);
    DoubleMatrix2D finalResult = comBat.run();
    assertEquals(10.67558, finalResult.get(7, 0), 0.0001);
    assertEquals(11.68505, finalResult.get(7, 7), 0.0001);
    assertEquals(6.769583, finalResult.get(10, 7), 0.0001);
}
Also used : StringMatrixReader(ubic.basecode.io.reader.StringMatrixReader) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) Test(org.junit.Test)

Aggregations

DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)15 Test (org.junit.Test)11 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)6 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)6 StringMatrixReader (ubic.basecode.io.reader.StringMatrixReader)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)5 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)5 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)5 InputStream (java.io.InputStream)4 GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)4 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)3 IOException (java.io.IOException)2 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)2 DataUpdater (ubic.gemma.core.loader.expression.geo.DataUpdater)2 DifferentialExpressionAnalysis (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis)2 ExpressionAnalysisResultSet (ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)2 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)2 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)2 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)2