Search in sources :

Example 76 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ExpressionDataSVD method removeHighestComponents.

/**
 * Provide a reconstructed matrix removing the first N components (the most significant ones). If the matrix was
 * normalized first, removing the first component replicates the normalization approach taken by Nielsen et al.
 * (Lancet 359, 2002) and Alter et al. (PNAS 2000). Correction by ANOVA would yield similar results if the nuisance
 * variable is known.
 *
 * @param numComponentsToRemove The number of components to remove, starting from the largest eigenvalue.
 * @return the reconstructed matrix; values that were missing before are re-masked.
 */
public ExpressionDataDoubleMatrix removeHighestComponents(int numComponentsToRemove) {
    DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
    for (int i = 0; i < numComponentsToRemove; i++) {
        copy.set(i, i, 0.0);
    }
    double[][] rawU = svd.getU().getRawMatrix();
    double[][] rawS = copy.getRawMatrix();
    double[][] rawV = svd.getV().getRawMatrix();
    DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
    DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
    DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
    Algebra a = new Algebra();
    DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
    reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
    reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
    // re-mask the missing values.
    for (int i = 0; i < reconstructed.rows(); i++) {
        for (int j = 0; j < reconstructed.columns(); j++) {
            if (Double.isNaN(this.missingValueInfo.get(i, j))) {
                reconstructed.set(i, j, Double.NaN);
            }
        }
    }
    return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 77 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method regressionResiduals.

/**
 * @param matrix      on which to perform regression.
 * @param config      containing configuration of factors to include. Any interactions or subset configuration is
 *                    ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
 *                    ignored)
 * @param retainScale if true, the data retain the global mean (intercept)
 * @return residuals from the regression.
 */
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
    if (config.getFactorsToInclude().isEmpty()) {
        LinearModelAnalyzer.log.warn("No factors");
        return matrix;
    }
    /*
         * Note that this method relies on similar code to doAnalysis, for the setup stages.
         */
    List<ExperimentalFactor> factors = config.getFactorsToInclude();
    List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
    ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    // perform weighted least squares regression on COUNT data
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    LeastSquaresFit fit;
    if (quantitationType.getScale().equals(ScaleType.COUNT)) {
        LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
        // note: data is not log transformed
        DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
        MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
    } else {
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
    }
    DoubleMatrix2D residuals = fit.getResiduals();
    if (retainScale) {
        DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
        for (int i = 0; i < residuals.rows(); i++) {
            residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
        }
    }
    DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
    f.setRowNames(dmatrix.getMatrix().getRowNames());
    f.setColumnNames(dmatrix.getMatrix().getColNames());
    return new ExpressionDataDoubleMatrix(dmatrix, f);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 78 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class TTestAnalyzerTest method testOneSampleTtest.

@Test
public void testOneSampleTtest() throws Exception {
    if (!connected) {
        log.warn("Could not establish R connection.  Skipping test ...");
        return;
    }
    this.configureVectors(super.biomaterials, "/data/stat-tests/onesample-ttest-data.txt");
    this.configureMocks();
    Collection<ExperimentalFactor> factors = new HashSet<>();
    factors.add(super.experimentalFactorA_Area);
    /*
         * Remove factorValue from all the samples.
         */
    Iterator<FactorValue> iterator = experimentalFactorA_Area.getFactorValues().iterator();
    FactorValue toUse = iterator.next();
    FactorValue toRemove = iterator.next();
    experimentalFactorA_Area.getFactorValues().remove(toRemove);
    for (BioMaterial bm : super.biomaterials) {
        bm.getFactorValues().remove(toRemove);
        bm.getFactorValues().add(toUse);
    }
    // must be for one-sample to make sense.
    quantitationType.setIsRatio(true);
    quantitationType.setScale(ScaleType.LOG2);
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    config.setFactorsToInclude(factors);
    Collection<DifferentialExpressionAnalysis> expressionAnalyses = analyzer.run(expressionExperiment, config);
    DifferentialExpressionAnalysis expressionAnalysis = expressionAnalyses.iterator().next();
    Collection<ExpressionAnalysisResultSet> resultSets = expressionAnalysis.getResultSets();
    ExpressionAnalysisResultSet resultSet = resultSets.iterator().next();
    assertEquals(null, resultSet.getBaselineGroup());
    int numResults = resultSet.getResults().size();
    assertEquals(BaseAnalyzerConfigurationTest.NUM_DESIGN_ELEMENTS - 4, numResults);
    // check
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        CompositeSequence probe = r.getProbe();
        Double pvalue = r.getPvalue();
        // Double stat = probeAnalysisResult.getEffectSize();
        log.debug("probe: " + probe + "; p-value: " + pvalue);
        assertNotNull(pvalue);
        switch(probe.getName()) {
            case "probe_0":
                assertEquals(0.03505, pvalue, 0.00001);
                break;
            case "probe_16":
                assertEquals(0.03476, pvalue, 0.0001);
                break;
            case "probe_17":
                assertEquals(0.03578, pvalue, 0.0001);
                break;
            case "probe_75":
                assertEquals(0.8897, pvalue, 0.0001);
                // assertEquals( -0.1507, stat, 0.0001 );
                break;
            case "probe_94":
                assertEquals(0.002717, pvalue, 0.0001);
                // assertEquals( 6.6087, stat, 0.001 );
                break;
        }
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 79 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ProcessedExpressionDataCreateServiceTest method testReorder.

@Test
public void testReorder() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE404");
    if (old != null) {
        eeService.remove(old);
    }
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse404Short")));
        @SuppressWarnings("unchecked") Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE404", false, true, false);
        this.ee = results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        this.ee = (ExpressionExperiment) e.getData();
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance();
    factor.setType(FactorType.CATEGORICAL);
    factor.setName(ee.getShortName() + " design");
    factor.setExperimentalDesign(ee.getExperimentalDesign());
    factor = eeService.addFactor(ee, factor);
    FactorValue fv1 = FactorValue.Factory.newInstance();
    FactorValue fv2 = FactorValue.Factory.newInstance();
    fv1.setValue("foo");
    fv1.setExperimentalFactor(factor);
    fv2.setValue("bar");
    fv2.setIsBaseline(true);
    fv2.setExperimentalFactor(factor);
    eeService.addFactorValue(ee, fv1);
    eeService.addFactorValue(ee, fv2);
    List<BioAssay> basInOrder = new ArrayList<>(ee.getBioAssays());
    Collections.sort(basInOrder, new Comparator<BioAssay>() {

        @Override
        public int compare(BioAssay o1, BioAssay o2) {
            return o1.getId().compareTo(o2.getId());
        }
    });
    int i = 0;
    for (BioAssay ba : basInOrder) {
        // bioAssayService.thawRawAndProcessed( ba );
        BioMaterial bm = ba.getSampleUsed();
        assert fv1.getId() != null;
        if (!bm.getFactorValues().isEmpty()) {
            continue;
        }
        if (i % 2 == 0) {
            bm.getFactorValues().add(fv1);
        // log.info( bm + " " + bm.getId() + " => " + fv1 );
        } else {
            bm.getFactorValues().add(fv2);
        // log.info( bm + " " + bm.getId() + " => " + fv2 );
        }
        bioMaterialService.update(bm);
        i++;
    }
    factor = this.experimentalFactorService.load(factor.getId());
    assertEquals(2, factor.getFactorValues().size());
    /*
         * All that was setup. Now do the interesting bit
         */
    processedExpressionDataVectorService.reorderByDesign(ee.getId());
    /*
         * Now check the vectors...
         */
    Collection<ProcessedExpressionDataVector> resortedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    // ExpressionDataDoubleMatrix newMat = new ExpressionDataDoubleMatrix( resortedVectors );
    // log.info( newMat );
    boolean foundVector = false;
    assertTrue(resortedVectors.size() > 0);
    for (ProcessedExpressionDataVector vector : resortedVectors) {
        i = 0;
        log.debug(vector.getDesignElement().getName() + " .........................");
        // thawingto avoid lazy error because we are outside of transaction in this test. All references in code run
        // inside a transaction
        BioAssayDimension bioAssayDimension = vector.getBioAssayDimension();
        bioAssayDimensionService.thawLite(bioAssayDimension);
        Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
        for (BioAssay ba : bioAssays) {
            BioMaterial bm = ba.getSampleUsed();
            assertEquals(1, bm.getFactorValues().size());
            FactorValue fv = bm.getFactorValues().iterator().next();
            assertNotNull(fv.getId());
            log.debug(ba.getId() + " " + fv.getId() + " " + fv);
            if (i < 10) {
                // first because it is baseline;
                assertEquals(fv2, fv);
            }
            i++;
        }
        /*
             * spot check the data, same place as before.
             */
        if (vector.getDesignElement().getName().equals("40")) {
            foundVector = true;
            ByteArrayConverter conv = new ByteArrayConverter();
            Double[] d = ArrayUtils.toObject(conv.byteArrayToDoubles(vector.getData()));
            assertEquals(20, d.length);
            assertEquals(-0.08, d[1], 0.001);
            assertEquals(0.45, d[10], 0.001);
            assertEquals(Double.NaN, d[19], 0.001);
        }
    }
    assertTrue("test vector not found", foundVector);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 80 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class CharacteristicServiceTest method setup.

@Before
public void setup() {
    ee = this.getTestPersistentBasicExpressionExperiment();
    ee.setCharacteristics(this.getTestPersistentCharacteristics(2));
    Characteristic[] eeChars = ee.getCharacteristics().toArray(new Characteristic[0]);
    eeChar1 = eeChars[0];
    eeChar2 = eeChars[1];
    eeService.update(ee);
    BioAssay ba = ee.getBioAssays().toArray(new BioAssay[0])[0];
    BioMaterial bm = ba.getSampleUsed();
    bm.setCharacteristics(this.getTestPersistentCharacteristics(1));
    bmService.update(bm);
    for (ExperimentalFactor ef : testHelper.getExperimentalFactors(ee.getExperimentalDesign())) {
        eeService.addFactor(ee, ef);
    }
    ExperimentalFactor ef = ee.getExperimentalDesign().getExperimentalFactors().iterator().next();
    for (FactorValue f : testHelper.getFactorValues(ef)) {
        eeService.addFactorValue(ee, f);
    }
    FactorValue fv = ef.getFactorValues().iterator().next();
    fv.setCharacteristics(this.getTestPersistentCharacteristics(1));
    fvService.update(fv);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Before(org.junit.Before)

Aggregations

BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)132 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)67 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)27 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)22 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)19 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 HashSet (java.util.HashSet)13 Test (org.junit.Test)13 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)12 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)12 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)10 InputStream (java.io.InputStream)7 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)7 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)7 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)7 Characteristic (ubic.gemma.model.common.description.Characteristic)6 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)6 DoubleArrayList (cern.colt.list.DoubleArrayList)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)5 ArrayList (java.util.ArrayList)5