Search in sources :

Example 11 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class DifferentialExpressionAnalyzerServiceTest method testAnalyzeAndDelete.

@Test
public void testAnalyzeAndDelete() throws Exception {
    assert ee.getId() != null;
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    Collection<ExperimentalFactor> factors = ee.getExperimentalDesign().getExperimentalFactors();
    config.setFactorsToInclude(factors);
    config.addInteractionToInclude(factors);
    Collection<DifferentialExpressionAnalysis> analyses = differentialExpressionAnalyzerService.runDifferentialExpressionAnalyses(ee, config);
    assertNotNull(analyses);
    assertTrue(!analyses.isEmpty());
    assertNotNull(analyses.iterator().next());
    DifferentialExpressionAnalysis analysis = differentialExpressionAnalysisService.thawFully(analyses.iterator().next());
    aclTestUtils.checkHasAcl(analysis);
    aclTestUtils.checkLacksAces(analysis);
    aclTestUtils.checkHasAclParent(analysis, ee);
    for (ExpressionAnalysisResultSet rs : analysis.getResultSets()) {
        assertTrue(!rs.getResults().isEmpty());
        assertEquals(99, rs.getResults().size());
    }
    // avoid adding annotations for genes, it confuses the reader.
    for (ArrayDesign ad : expressionExperimentService.getArrayDesignsUsed(ee)) {
        this.arrayDesignAnnotationService.deleteExistingFiles(ad);
    }
    Collection<File> outputLocations = expressionDataFileService.writeOrLocateDiffExpressionDataFiles(ee, true);
    assertEquals(1, outputLocations.size());
    File outputLocation = outputLocations.iterator().next();
    // NOte that this reader generally won't work for experiment files because of the gene annotations.
    DoubleMatrixReader r = new DoubleMatrixReader();
    assertTrue(outputLocation.canRead());
    DoubleMatrix<String, String> readIn = r.read(outputLocation.getAbsolutePath());
    assertEquals(99, readIn.rows());
    System.out.println(readIn.toString());
    assertEquals(9, readIn.columns());
    expressionDataFileService.deleteAllFiles(ee);
    // / remove the analysis
    int numDeleted = differentialExpressionAnalyzerService.deleteAnalyses(ee);
    assertTrue(numDeleted > 0);
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DifferentialExpressionAnalysis(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet) File(java.io.File) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 12 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class QuantileNormalizerTest method setUp.

@Before
public void setUp() throws Exception {
    DoubleMatrixReader reader = new DoubleMatrixReader();
    tester = reader.read(this.getClass().getResourceAsStream("/data/testdata.txt"));
    assert tester != null;
    qn = new QuantileNormalizer<>();
    QuantileNormalizerTest.log.debug("Setup done");
}
Also used : DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) Before(org.junit.Before)

Example 13 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class DiffExTest method testCountData.

/**
 * Test differential expression analysis on RNA-seq data. See bug 3383. R code in voomtest.R
 */
@Test
public void testCountData() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ExpressionExperiment ee = eeService.findByShortName("GSE29006");
    if (ee != null) {
        eeService.remove(ee);
    }
    assertTrue(eeService.findByShortName("GSE29006") == null);
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = eeService.thaw(ee);
    try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_design.txt")) {
        assertNotNull(is);
        experimentalDesignImporter.importDesign(ee, is);
    }
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    ArrayDesign targetArrayDesign;
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors();
        assertEquals(1, experimentalFactors.size());
        List<String> probeNames = countMatrix.getRowNames();
        assertEquals(199, probeNames.size());
        // we have to find the right generic platform to use.
        targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        // the experiment has 8 samples but the data has 4 columns so allow missing samples
        // GSM718707 GSM718708 GSM718709 GSM718710
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, null, 36, true, true);
    }
    // make sure to do a thawRawAndProcessed() to get the addCountData() updates
    ee = eeService.thaw(ee);
    // verify rows and columns
    Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee);
    assertEquals(199, processedDataArrays.size());
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(4, v.getBioAssays().size());
    }
    // I confirmed that log2cpm is working same as voom here; not bothering to test directly.
    TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
    // DE analysis without weights to assist comparison to R
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    config.setUseWeights(false);
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
    assertNotNull(analyses);
    assertEquals(1, analyses.size());
    DifferentialExpressionAnalysis results = analyses.iterator().next();
    boolean found = false;
    ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        if (r.getProbe().getName().equals("ENSG00000000938")) {
            found = true;
            ContrastResult contrast = r.getContrasts().iterator().next();
            assertEquals(0.007055717, r.getPvalue(), // R: 0.006190738; coeff = 2.2695215; t=12.650422; R with our weights: 0.009858270, 2.2317534; t=9.997007
            0.00001);
            // up to sign
            assertEquals(2.2300049, Math.abs(contrast.getCoefficient()), 0.001);
            break;
        }
    }
    assertTrue(found);
    // With weights
    config = new DifferentialExpressionAnalysisConfig();
    // <----
    config.setUseWeights(true);
    config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
    analyses = analyzer.run(ee, config);
    results = analyses.iterator().next();
    resultSet = results.getResultSets().iterator().next();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        if (r.getProbe().getName().equals("ENSG00000000938")) {
            assertEquals(1, r.getContrasts().size());
            ContrastResult contrast = r.getContrasts().iterator().next();
            // yes!
            assertEquals(2.232816, Math.abs(contrast.getCoefficient()), 0.001);
            assertEquals(0.000311, contrast.getPvalue(), 0.00001);
            assertEquals(56.66342, Math.abs(contrast.getTstat()), 0.001);
            assertEquals(0.007068, r.getPvalue(), 0.00001);
            break;
        }
    }
}
Also used : InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DifferentialExpressionAnalysis(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 14 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class RNASeqDataAddCli method doWork.

@Override
protected Exception doWork(String[] args) {
    Exception exception = super.processCommandLine(args);
    if (exception != null)
        return exception;
    DataUpdater serv = this.getBean(DataUpdater.class);
    if (this.justbackfillLog2cpm) {
        for (BioAssaySet bas : this.expressionExperiments) {
            try {
                ExpressionExperiment ee = (ExpressionExperiment) bas;
                Collection<QuantitationType> pqts = this.eeService.getPreferredQuantitationType(ee);
                if (pqts.size() > 1)
                    throw new IllegalArgumentException("Cannot process when there is more than one preferred QT");
                if (pqts.isEmpty())
                    throw new IllegalArgumentException("No preferred quantitation type for " + ee.getShortName());
                QuantitationType qt = pqts.iterator().next();
                if (!qt.getType().equals(StandardQuantitationType.COUNT)) {
                    AbstractCLI.log.warn("Preferred data is not counts for " + ee);
                    this.errorObjects.add(ee.getShortName() + ": Preferred data is not counts");
                    continue;
                }
                serv.log2cpmFromCounts(ee, qt);
                this.successObjects.add(ee);
            } catch (Exception e) {
                AbstractCLI.log.error(e, e);
                this.errorObjects.add(((ExpressionExperiment) bas).getShortName() + ": " + e.getMessage());
            }
        }
        this.summarizeProcessing();
        return null;
    }
    /*
         * Usual cases.
         */
    if (this.expressionExperiments.size() > 1) {
        throw new IllegalArgumentException("Sorry, can only process one experiment with this tool.");
    }
    ArrayDesign targetArrayDesign = this.locateArrayDesign(this.platformName);
    ExpressionExperiment ee = (ExpressionExperiment) this.expressionExperiments.iterator().next();
    if (this.expressionExperiments.size() > 1) {
        AbstractCLI.log.warn("This CLI can only deal with one experiment at a time; only the first one will be processed");
    }
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try {
        DoubleMatrix<String, String> countMatrix = null;
        DoubleMatrix<String, String> rpkmMatrix = null;
        if (this.countFile != null) {
            countMatrix = reader.read(countFile);
        }
        if (this.rpkmFile != null) {
            rpkmMatrix = reader.read(rpkmFile);
        }
        serv.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, readLength, isPairedReads, allowMissingSamples);
    } catch (IOException e) {
        AbstractCLI.log.error("Failed while processing " + ee, e);
        return e;
    }
    return null;
}
Also used : BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) DataUpdater(ubic.gemma.core.loader.expression.geo.DataUpdater) IOException(java.io.IOException) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) StandardQuantitationType(ubic.gemma.model.common.quantitationtype.StandardQuantitationType) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) IOException(java.io.IOException) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader)

Example 15 with DoubleMatrixReader

use of ubic.basecode.io.reader.DoubleMatrixReader in project Gemma by PavlidisLab.

the class ReplaceDataCli method doWork.

@Override
protected Exception doWork(String[] args) {
    Exception exception = super.processCommandLine(args);
    if (exception != null) {
        return exception;
    }
    DataUpdater dataUpdater = this.getBean(DataUpdater.class);
    if (this.expressionExperiments.size() > 1) {
        throw new IllegalArgumentException("Sorry, This CLI can only deal with one experiment at a time.");
    }
    ExpressionExperiment ee = (ExpressionExperiment) this.expressionExperiments.iterator().next();
    Collection<ArrayDesign> arrayDesignsUsed = this.eeService.getArrayDesignsUsed(ee);
    if (arrayDesignsUsed.size() > 1) {
        throw new IllegalArgumentException("Sorry, can only process single-platform data sets with this tool.");
    }
    ArrayDesign targetArrayDesign = arrayDesignsUsed.iterator().next();
    Collection<QuantitationType> qts = eeService.getPreferredQuantitationType(ee);
    if (qts.size() > 1) {
        throw new IllegalArgumentException("Experiment must have just one preferred quantitation type to replace data for");
    }
    QuantitationType qt = qts.iterator().next();
    if (qt == null) {
        throw new IllegalArgumentException("Experiment must have a preferred quantitation type to replace data for");
    }
    try {
        DoubleMatrixReader reader = new DoubleMatrixReader();
        DoubleMatrix<String, String> data = reader.read(file);
        dataUpdater.replaceData(ee, targetArrayDesign, qt, data);
    } catch (IOException e) {
        AbstractCLI.log.error("Failed while processing " + ee, e);
        return e;
    }
    return null;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) DataUpdater(ubic.gemma.core.loader.expression.geo.DataUpdater) IOException(java.io.IOException) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) IOException(java.io.IOException) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader)

Aggregations

DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)15 Test (org.junit.Test)11 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)6 DenseDoubleMatrix2D (cern.colt.matrix.impl.DenseDoubleMatrix2D)6 StringMatrixReader (ubic.basecode.io.reader.StringMatrixReader)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)5 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)5 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)5 InputStream (java.io.InputStream)4 GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)4 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)3 IOException (java.io.IOException)2 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)2 DataUpdater (ubic.gemma.core.loader.expression.geo.DataUpdater)2 DifferentialExpressionAnalysis (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis)2 ExpressionAnalysisResultSet (ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)2 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)2 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)2 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)2