Search in sources :

Example 11 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixTest method testConstructExpressionDataDoubleMatrixWithGeoValues.

/**
 * This is a self-contained test. That is, it does not depend on the setup in onSetUpInTransaction}. It tests
 * creating an {@link ExpressionDataDoubleMatrix} using real values from the Gene Expression Omnibus (GEO). That is,
 * we have obtained information from GSE994. The probe sets used are 218120_s_at and 121_at, and the samples used
 * are GSM15697 and GSM15744. Specifically, we the Gemma objects that correspond to the GEO objects are:
 * DesignElement 1 = 218120_s_at, DesignElement 2 = 121_at
 * BioAssay 1 = "Current Smoker 73", BioAssay 2 = "Former Smoker 34"
 * BioMaterial 1 = "GSM15697", BioMaterial 2 = "GSM15744"
 * BioAssayDimension = "GSM15697, GSM15744" (the names of all the biomaterials).
 */
@Test
public void testConstructExpressionDataDoubleMatrixWithGeoValues() {
    ByteArrayConverter bac = new ByteArrayConverter();
    ee = ExpressionExperiment.Factory.newInstance();
    QuantitationType qt = QuantitationType.Factory.newInstance();
    qt.setName("VALUE");
    qt.setIsBackgroundSubtracted(false);
    qt.setIsNormalized(false);
    qt.setIsBackground(false);
    qt.setIsRatio(false);
    qt.setIsPreferred(true);
    qt.setIsMaskedPreferred(false);
    qt.setRepresentation(PrimitiveType.DOUBLE);
    BioAssayDimension bioAssayDimension = BioAssayDimension.Factory.newInstance();
    bioAssayDimension.setName("GSM15697, GSM15744");
    List<BioAssay> assays = new ArrayList<>();
    BioAssay assay1 = BioAssay.Factory.newInstance();
    assay1.setName("Current Smoker 73");
    BioMaterial sample1 = BioMaterial.Factory.newInstance();
    sample1.setName("GSM15697");
    assay1.setSampleUsed(sample1);
    assays.add(assay1);
    BioAssay assay2 = BioAssay.Factory.newInstance();
    assay2.setName("Former Smoker 34");
    BioMaterial sample2 = BioMaterial.Factory.newInstance();
    sample2.setName("GSM15744");
    assay2.setSampleUsed(sample2);
    assays.add(assay2);
    bioAssayDimension.setBioAssays(assays);
    RawExpressionDataVector vector1 = RawExpressionDataVector.Factory.newInstance();
    double[] ddata1 = { 74.9, 101.7 };
    byte[] bdata1 = bac.doubleArrayToBytes(ddata1);
    vector1.setData(bdata1);
    vector1.setQuantitationType(qt);
    vector1.setBioAssayDimension(bioAssayDimension);
    RawExpressionDataVector vector2 = RawExpressionDataVector.Factory.newInstance();
    double[] ddata2 = { 404.6, 318.7 };
    byte[] bdata2 = bac.doubleArrayToBytes(ddata2);
    vector2.setData(bdata2);
    vector2.setQuantitationType(qt);
    vector2.setBioAssayDimension(bioAssayDimension);
    ArrayDesign ad = ArrayDesign.Factory.newInstance();
    ad.setName("test ar");
    CompositeSequence de1 = CompositeSequence.Factory.newInstance();
    de1.setName("218120_s_at");
    vector1.setDesignElement(de1);
    BioSequence bs1 = BioSequence.Factory.newInstance();
    bs1.setName("test1");
    de1.setBiologicalCharacteristic(bs1);
    de1.setArrayDesign(ad);
    CompositeSequence de2 = CompositeSequence.Factory.newInstance();
    de2.setName("121_at");
    BioSequence bs2 = BioSequence.Factory.newInstance();
    bs2.setName("test2");
    de2.setBiologicalCharacteristic(bs2);
    de2.setArrayDesign(ad);
    vector2.setDesignElement(de2);
    Collection<RawExpressionDataVector> eeVectors = new LinkedHashSet<>();
    eeVectors.add(vector1);
    eeVectors.add(vector2);
    ee.setRawExpressionDataVectors(eeVectors);
    ExpressionDataDoubleMatrix expressionDataMatrix = new ExpressionDataDoubleMatrix(eeVectors);
    assertNotNull(expressionDataMatrix);
    assertEquals(expressionDataMatrix.rows(), 2);
    assertEquals(expressionDataMatrix.columns(), 2);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 12 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class DataUpdaterTest method testAddData.

@Test
public void testAddData() throws Exception {
    /*
         * Load a regular data set that has no data. Platform is (basically) irrelevant.
         */
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
    ExpressionExperiment ee;
    try {
        // RNA-seq data.
        Collection<?> results = geoService.fetchAndLoad("GSE37646", false, true, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        // log.warn( "Test skipped because GSE37646 was not removed from the system prior to test" );
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    ee = experimentService.thawLite(ee);
    List<BioAssay> bioAssays = new ArrayList<>(ee.getBioAssays());
    assertEquals(31, bioAssays.size());
    List<BioMaterial> bms = new ArrayList<>();
    for (BioAssay ba : bioAssays) {
        bms.add(ba.getSampleUsed());
    }
    targetArrayDesign = this.getTestPersistentArrayDesign(100, true);
    DoubleMatrix<CompositeSequence, BioMaterial> rawMatrix = new DenseDoubleMatrix<>(targetArrayDesign.getCompositeSequences().size(), bms.size());
    /*
         * make up some fake data on another platform, and match it to those samples
         */
    for (int i = 0; i < rawMatrix.rows(); i++) {
        for (int j = 0; j < rawMatrix.columns(); j++) {
            rawMatrix.set(i, j, (i + 1) * (j + 1) * Math.random() / 100.0);
        }
    }
    List<CompositeSequence> probes = new ArrayList<>(targetArrayDesign.getCompositeSequences());
    rawMatrix.setRowNames(probes);
    rawMatrix.setColumnNames(bms);
    QuantitationType qt = this.makeQt(true);
    ExpressionDataDoubleMatrix data = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
    assertNotNull(data.getBestBioAssayDimension());
    assertEquals(rawMatrix.columns(), data.getBestBioAssayDimension().getBioAssays().size());
    assertEquals(probes.size(), data.getMatrix().rows());
    /*
         * Replace it.
         */
    ee = dataUpdater.replaceData(ee, targetArrayDesign, data);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    ee = experimentService.thaw(ee);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    assertEquals(100, ee.getRawExpressionDataVectors().size());
    for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
        assertTrue(v.getQuantitationType().getIsPreferred());
    }
    assertEquals(100, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(31, v.getBioAssays().size());
    }
    /*
         * Test adding data (non-preferred)
         */
    qt = this.makeQt(false);
    ExpressionDataDoubleMatrix moreData = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
    ee = dataUpdater.addData(ee, targetArrayDesign, moreData);
    ee = experimentService.thaw(ee);
    try {
        // add preferred data twice.
        dataUpdater.addData(ee, targetArrayDesign, data);
        fail("Should have gotten an exception");
    } catch (IllegalArgumentException e) {
    // okay.
    }
    dataUpdater.deleteData(ee, qt);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ArrayList(java.util.ArrayList) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 13 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ExperimentalDesignImportDuplicateValueTest method testParse.

/*
     * Note that this test will fail if you run it again on a dirty DB. Sorry!
     */
@Test
public final void testParse() throws Exception {
    try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/expdesign.import.testfull.txt")) {
        experimentalDesignImporter.importDesign(ee, is);
    }
    Collection<BioMaterial> bms = new HashSet<>();
    for (BioAssay ba : ee.getBioAssays()) {
        BioMaterial bm = ba.getSampleUsed();
        bms.add(bm);
    }
    checkResults(bms);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) InputStream(java.io.InputStream) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) HashSet(java.util.HashSet) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 14 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method run.

@Override
public Collection<DifferentialExpressionAnalysis> run(ExpressionExperiment expressionExperiment, ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config) {
    /*
         * I apologize for this being so complicated. Basically there are four phases:
         *
         * 1. Get the data matrix and factors
         *
         * 2. Determine baseline groups; build model and contrasts
         *
         * 3. Run the analysis
         *
         * 4. Postprocess the analysis
         *
         * By far the most complex is #2 -- it depends on which factors and what kind they are.
         */
    /*
         * Initialize our matrix and factor lists...
         */
    List<ExperimentalFactor> factors = config.getFactorsToInclude();
    /*
         * FIXME this is the place to strip put the outliers.
         */
    List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(dmatrix, factors);
    // enforce ordering
    dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, dmatrix);
    /*
         * Do the analysis, by subsets if requested
         */
    Collection<DifferentialExpressionAnalysis> results = new HashSet<>();
    ExperimentalFactor subsetFactor = config.getSubsetFactor();
    if (subsetFactor != null) {
        if (factors.contains(subsetFactor)) {
            throw new IllegalStateException("Subset factor cannot also be included in the analysis [ Factor was: " + subsetFactor + "]");
        }
        Map<FactorValue, ExpressionDataDoubleMatrix> subsets = this.makeSubSets(config, dmatrix, samplesUsed, subsetFactor);
        LinearModelAnalyzer.log.info("Total number of subsets: " + subsets.size());
        /*
             * Now analyze each subset
             */
        for (FactorValue subsetFactorValue : subsets.keySet()) {
            LinearModelAnalyzer.log.info("Analyzing subset: " + subsetFactorValue);
            /*
                 * Checking for DE_Exclude characteristics, which should not be included in the analysis.
                 * As requested in issue #4458 (bugzilla)
                 */
            boolean include = true;
            for (Characteristic c : subsetFactorValue.getCharacteristics()) {
                if (LinearModelAnalyzer.EXCLUDE_CHARACTERISTICS_VALUES.contains(c.getValue())) {
                    include = false;
                    break;
                }
            }
            if (!include) {
                LinearModelAnalyzer.log.warn(LinearModelAnalyzer.EXCLUDE_WARNING);
                continue;
            }
            List<BioMaterial> bioMaterials = ExperimentalDesignUtils.getOrderedSamples(subsets.get(subsetFactorValue), factors);
            /*
                 * make a EESubSet
                 */
            ExpressionExperimentSubSet eeSubSet = ExpressionExperimentSubSet.Factory.newInstance();
            eeSubSet.setSourceExperiment(expressionExperiment);
            eeSubSet.setName("Subset for " + subsetFactorValue);
            Collection<BioAssay> bioAssays = new HashSet<>();
            for (BioMaterial bm : bioMaterials) {
                bioAssays.addAll(bm.getBioAssaysUsedIn());
            }
            eeSubSet.getBioAssays().addAll(bioAssays);
            Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(subsets.get(subsetFactorValue), eeSubSet, factors);
            DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(factors, config, subsetFactorValue);
            if (subsetFactors.isEmpty()) {
                LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
                continue;
            }
            /*
                 * Run analysis on the subset.
                 */
            DifferentialExpressionAnalysis analysis = this.doAnalysis(eeSubSet, subsetConfig, subsets.get(subsetFactorValue), bioMaterials, new ArrayList<>(subsetFactors), subsetFactorValue);
            if (analysis == null) {
                LinearModelAnalyzer.log.warn("No analysis results were obtained for subset: " + subsetFactorValue);
                continue;
            }
            results.add(analysis);
        }
    } else {
        /*
             * Analyze the whole thing as one
             */
        DifferentialExpressionAnalysis analysis = this.doAnalysis(expressionExperiment, config, dmatrix, samplesUsed, factors, null);
        if (analysis == null) {
            LinearModelAnalyzer.log.warn("No analysis results were obtained");
        } else {
            results.add(analysis);
        }
    }
    return results;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) Characteristic(ubic.gemma.model.common.description.Characteristic) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 15 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method doAnalysis.

/**
 * @param bioAssaySet       source data, could be a SubSet
 * @param dmatrix           data
 * @param samplesUsed       analyzed
 * @param factors           included in the model
 * @param subsetFactorValue null unless analyzing a subset (only used for book-keeping)
 * @return analysis, or null if there was a problem.
 */
private DifferentialExpressionAnalysis doAnalysis(BioAssaySet bioAssaySet, DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors, FactorValue subsetFactorValue) {
    // We may want to change this to fall back to running normally, though the real fix is to just finish the ebayes implementation.
    if (config.getModerateStatistics() && dmatrix.hasMissingValues()) {
        throw new UnsupportedOperationException("Ebayes cannot be used when there are values missing in the data");
    }
    if (factors.isEmpty()) {
        LinearModelAnalyzer.log.error("Must provide at least one factor");
        return null;
    }
    if (samplesUsed.size() <= factors.size()) {
        LinearModelAnalyzer.log.error("Must have more samples than factors");
        return null;
    }
    final Map<String, Collection<ExperimentalFactor>> label2Factors = this.getRNames(factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    this.dropIncompleteFactors(samplesUsed, factors, baselineConditions);
    if (factors.isEmpty()) {
        LinearModelAnalyzer.log.error("Must provide at least one factor; they were all removed due to incomplete values");
        return null;
    }
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);
    /*
         * Build our factor terms, with interactions handled specially
         */
    List<String[]> interactionFactorLists = new ArrayList<>();
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    config.setBaseLineFactorValues(baselineConditions);
    boolean oneSampleTTest = interceptFactor != null && factors.size() == 1;
    if (!oneSampleTTest) {
        this.buildModelFormula(config, label2Factors, interactionFactorLists);
    }
    // calculate library size before log transformation (FIXME we compute this twice)
    DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
    /*
         * FIXME: remove columns that are marked as outliers.
         */
    dmatrix = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(quantitationType, dmatrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    if (LinearModelAnalyzer.log.isDebugEnabled())
        this.outputForDebugging(dmatrix, designMatrix);
    /*
         * PREPARATION FOR 'NATIVE' FITTING
         */
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    DesignMatrix properDesignMatrix = this.makeDesignMatrix(designMatrix, interactionFactorLists, baselineConditions);
    /*
         * Run the analysis NOTE this can be simplified if we strip out R code.
         */
    final Map<String, LinearModelSummary> rawResults = this.runAnalysis(namedMatrix, sNamedMatrix, properDesignMatrix, librarySize, config);
    if (rawResults.size() == 0) {
        LinearModelAnalyzer.log.error("Got no results from the analysis");
        return null;
    }
    /*
         * Initialize data structures we need to hold results.
         */
    // this used to be a Set, but a List is much faster.
    Map<String, List<DifferentialExpressionAnalysisResult>> resultLists = new HashMap<>();
    Map<String, List<Double>> pvaluesForQvalue = new HashMap<>();
    for (String factorName : label2Factors.keySet()) {
        resultLists.put(factorName, new ArrayList<DifferentialExpressionAnalysisResult>());
        pvaluesForQvalue.put(factorName, new ArrayList<Double>());
    }
    for (String[] fs : interactionFactorLists) {
        String intF = StringUtils.join(fs, ":");
        resultLists.put(intF, new ArrayList<DifferentialExpressionAnalysisResult>());
        pvaluesForQvalue.put(intF, new ArrayList<Double>());
    }
    if (pvaluesForQvalue.isEmpty()) {
        LinearModelAnalyzer.log.warn("No results were obtained for the current stage of analysis.");
        return null;
    }
    /*
         * Create result objects for each model fit. Keeping things in order is important.
         */
    final Transformer rowNameExtractor = TransformerUtils.invokerTransformer("getId");
    boolean warned = false;
    int notUsable = 0;
    int processed = 0;
    for (CompositeSequence el : namedMatrix.getRowNames()) {
        if (++processed % 15000 == 0) {
            LinearModelAnalyzer.log.info("Processed results for " + processed + " elements ...");
        }
        LinearModelSummary lm = rawResults.get(rowNameExtractor.transform(el).toString());
        if (LinearModelAnalyzer.log.isDebugEnabled())
            LinearModelAnalyzer.log.debug(el.getName() + "\n" + lm);
        if (lm == null) {
            if (!warned) {
                LinearModelAnalyzer.log.warn("No result for " + el + ", further warnings suppressed");
                warned = true;
            }
            notUsable++;
            continue;
        }
        for (String factorName : label2Factors.keySet()) {
            if (!pvaluesForQvalue.containsKey(factorName)) {
                // was dropped.
                continue;
            }
            Double overallPValue;
            DifferentialExpressionAnalysisResult probeAnalysisResult = DifferentialExpressionAnalysisResult.Factory.newInstance();
            probeAnalysisResult.setProbe(el);
            if (lm.getCoefficients() == null) {
                // probeAnalysisResult.setPvalue( null );
                // pvaluesForQvalue.get( factorName ).add( overallPValue );
                // resultLists.get( factorName ).add( probeAnalysisResult );
                notUsable++;
                continue;
            }
            Collection<ExperimentalFactor> factorsForName = label2Factors.get(factorName);
            if (factorsForName.size() > 1) {
                /*
                     * Interactions
                     */
                if (factorsForName.size() > 2) {
                    LinearModelAnalyzer.log.error("Handling more than two-way interactions is not implemented");
                    return null;
                }
                assert factorName.contains(":");
                String[] factorNames = StringUtils.split(factorName, ":");
                assert factorNames.length == factorsForName.size();
                overallPValue = lm.getInteractionEffectP(factorNames);
                if (overallPValue != null && !Double.isNaN(overallPValue)) {
                    Map<String, Double> interactionContrastTStats = lm.getContrastTStats(factorName);
                    Map<String, Double> interactionContrastCoeffs = lm.getContrastCoefficients(factorName);
                    Map<String, Double> interactionContrastPValues = lm.getContrastPValues(factorName);
                    for (String term : interactionContrastPValues.keySet()) {
                        Double contrastPvalue = interactionContrastPValues.get(term);
                        this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, interactionContrastTStats, interactionContrastCoeffs);
                    }
                } else {
                    if (!warned) {
                        LinearModelAnalyzer.log.warn("Interaction could not be computed for " + el + ", further warnings suppressed");
                        warned = true;
                    }
                    if (LinearModelAnalyzer.log.isDebugEnabled())
                        LinearModelAnalyzer.log.debug("Interaction could not be computed for " + el + ", further warnings suppressed");
                    // will over count?
                    notUsable++;
                    continue;
                }
            } else {
                /*
                     * Main effect
                     */
                assert factorsForName.size() == 1;
                ExperimentalFactor experimentalFactor = factorsForName.iterator().next();
                if (interceptFactor != null && factorsForName.size() == 1 && experimentalFactor.equals(interceptFactor)) {
                    overallPValue = lm.getInterceptP();
                } else {
                    overallPValue = lm.getMainEffectP(factorName);
                }
                /*
                     * Add contrasts unless overall pvalue is NaN
                     */
                if (overallPValue != null && !Double.isNaN(overallPValue)) {
                    Map<String, Double> mainEffectContrastTStats = lm.getContrastTStats(factorName);
                    Map<String, Double> mainEffectContrastPvalues = lm.getContrastPValues(factorName);
                    Map<String, Double> mainEffectContrastCoeffs = lm.getContrastCoefficients(factorName);
                    for (String term : mainEffectContrastPvalues.keySet()) {
                        Double contrastPvalue = mainEffectContrastPvalues.get(term);
                        this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, mainEffectContrastTStats, mainEffectContrastCoeffs);
                    }
                } else {
                    if (!warned) {
                        LinearModelAnalyzer.log.warn("ANOVA could not be done for " + experimentalFactor + " on " + el + ", further warnings suppressed");
                        warned = true;
                    }
                    if (LinearModelAnalyzer.log.isDebugEnabled())
                        LinearModelAnalyzer.log.debug("ANOVA could not be done for " + experimentalFactor + " on " + el);
                    // will over count?
                    notUsable++;
                    continue;
                }
            }
            assert !Double.isNaN(overallPValue) : "We should not be keeping non-number pvalues (null or NaNs)";
            probeAnalysisResult.setPvalue(this.nan2Null(overallPValue));
            pvaluesForQvalue.get(factorName).add(overallPValue);
            resultLists.get(factorName).add(probeAnalysisResult);
        }
    // over terms
    }
    if (notUsable > 0) {
        LinearModelAnalyzer.log.info(notUsable + " elements or results were not usable - model could not be fit, etc.");
    }
    this.getRanksAndQvalues(resultLists, pvaluesForQvalue);
    DifferentialExpressionAnalysis expressionAnalysis = this.makeAnalysisEntity(bioAssaySet, config, label2Factors, baselineConditions, interceptFactor, interactionFactorLists, oneSampleTTest, resultLists, subsetFactorValue);
    LinearModelAnalyzer.log.info("Analysis processing phase done ...");
    return expressionAnalysis;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Transformer(org.apache.commons.collections.Transformer) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)132 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)67 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)27 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)22 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)19 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 HashSet (java.util.HashSet)13 Test (org.junit.Test)13 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)12 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)12 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)10 InputStream (java.io.InputStream)7 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)7 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)7 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)7 Characteristic (ubic.gemma.model.common.description.Characteristic)6 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)6 DoubleArrayList (cern.colt.list.DoubleArrayList)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)5 ArrayList (java.util.ArrayList)5