Search in sources :

Example 16 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class AffyProbeReaderTest method getProbeMatchingName.

private CompositeSequence getProbeMatchingName(String name) {
    Collection<CompositeSequence> keySet = apr.getKeySet();
    CompositeSequence cs = null;
    for (CompositeSequence compositeSequence : keySet) {
        if (compositeSequence.getName().equals(name)) {
            cs = compositeSequence;
            break;
        }
    }
    return cs;
}
Also used : CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 17 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class TwoWayAnovaWithInteractionsAnalyzerTest method checkResults.

/**
 * @param resultSet the result set to check
 */
private void checkResults(ExpressionAnalysisResultSet resultSet) {
    Collection<ExperimentalFactor> factors = resultSet.getExperimentalFactors();
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        CompositeSequence probe = r.getProbe();
        Double pvalue = r.getPvalue();
        pvalue = pvalue == null ? Double.NaN : pvalue;
        Double qvalue = r.getCorrectedPvalue();
        assertNotNull(probe);
        if (factors.size() == 1) {
            ExperimentalFactor f = factors.iterator().next();
            if (f.equals(super.experimentalFactorA_Area)) {
                assertEquals(factorValueA2, resultSet.getBaselineGroup());
                switch(probe.getName()) {
                    case "probe_98":
                        assertEquals(0.8769, pvalue, 0.001);
                        assertNotNull(qvalue);
                        break;
                    case "probe_10":
                        assertEquals(5.158e-10, pvalue, 1e-12);
                        break;
                    case "probe_4":
                        assertEquals(0.0048, pvalue, 0.0001);
                        break;
                }
            } else {
                assertEquals(factorValueB2, resultSet.getBaselineGroup());
                if (probe.getName().equals("probe_98")) {
                    assertEquals(0.6888, pvalue, 0.001);
                } else if (probe.getName().equals("probe_10")) {
                    assertEquals(0.07970, pvalue, 0.00001);
                }
            }
        } else {
            assertEquals(null, resultSet.getBaselineGroup());
            switch(probe.getName()) {
                case "probe_98":
                    assertEquals(0.7893, pvalue, 0.001);
                    break;
                case "probe_10":
                    assertEquals(0.04514, pvalue, 0.00001);
                    break;
                case "probe_4":
                    assertEquals(Double.NaN, pvalue, 0.00001);
                    break;
            }
        }
    }
}
Also used : ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 18 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class TwoWayAnovaWithoutInteractionsAnalyzerTest method checkResults.

/**
 * @param resultSet the result set to check
 */
private void checkResults(ExpressionAnalysisResultSet resultSet) {
    Collection<ExperimentalFactor> factors = resultSet.getExperimentalFactors();
    assertEquals("Should not have an interaction term", 1, factors.size());
    ExperimentalFactor f = factors.iterator().next();
    boolean found = false;
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        CompositeSequence probe = r.getProbe();
        Double pvalue = r.getPvalue();
        if (f.equals(super.experimentalFactorB) && probe.getName().equals("probe_1")) {
            assertEquals(0.501040, pvalue, 0.001);
            found = true;
        }
        Collection<ContrastResult> contrasts = r.getContrasts();
        Double stat;
        if (contrasts.isEmpty()) {
            continue;
        }
        stat = contrasts.iterator().next().getTstat();
        assertNotNull(probe);
        if (f.equals(super.experimentalFactorA_Area)) {
            assertEquals(factorValueA2, resultSet.getBaselineGroup());
            switch(probe.getName()) {
                case // id=1001
                "probe_1":
                    assertEquals(0.001814, pvalue, 0.00001);
                    assertNotNull(stat);
                    assertEquals(-287.061, stat, 0.001);
                    found = true;
                    break;
                case // id 1097
                "probe_97":
                    assertEquals(0.3546, pvalue, 0.001);
                    break;
                case "probe_0":
                    assertEquals(1.36e-12, pvalue, 1e-10);
                    assertNotNull(stat);
                    assertEquals(-425.3, stat, 0.1);
                    break;
            }
        } else {
            assertEquals(factorValueB2, resultSet.getBaselineGroup());
            if (probe.getName().equals("probe_97")) {
                assertEquals(0.4449, pvalue, 0.001);
            }
        }
    }
    assertTrue("Did not find expected results for probe_1", found);
}
Also used : ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult)

Example 19 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ArrayDesignSequenceProcessorTest method testFetchAndLoadWithIdentifiers.

@Test
public void testFetchAndLoadWithIdentifiers() throws Exception {
    String fastacmdExe = Settings.getString(SimpleFastaCmd.FASTA_CMD_ENV_VAR);
    if (fastacmdExe == null) {
        log.warn("No fastacmd executable is configured, skipping test");
        return;
    }
    File fi = new File(fastacmdExe);
    if (!fi.canRead()) {
        log.warn(fastacmdExe + " not found, skipping test");
        return;
    }
    GeoService geoService = this.getBean(GeoService.class);
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
    @SuppressWarnings("unchecked") final Collection<ArrayDesign> ads = (Collection<ArrayDesign>) geoService.fetchAndLoad("GPL226", true, true, false, true, true);
    result = ads.iterator().next();
    result = arrayDesignService.thaw(result);
    // have to specify taxon as this has two taxons in it
    try (InputStream f = this.getClass().getResourceAsStream("/data/loader/expression/arrayDesign/identifierTest.txt")) {
        Collection<BioSequence> res = app.processArrayDesign(result, f, new String[] { "testblastdb", "testblastdbPartTwo" }, FileTools.resourceToPath("/data/loader/genome/blast"), taxon, true);
        assertNotNull(res);
        for (BioSequence sequence : res) {
            assertNotNull(sequence.getSequence());
        }
        for (CompositeSequence cs : result.getCompositeSequences()) {
            assert cs.getBiologicalCharacteristic() != null;
        }
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) InputStream(java.io.InputStream) GeoService(ubic.gemma.core.loader.expression.geo.service.GeoService) Collection(java.util.Collection) File(java.io.File) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 20 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method doAnalysis.

/**
 * @param bioAssaySet       source data, could be a SubSet
 * @param dmatrix           data
 * @param samplesUsed       analyzed
 * @param factors           included in the model
 * @param subsetFactorValue null unless analyzing a subset (only used for book-keeping)
 * @return analysis, or null if there was a problem.
 */
private DifferentialExpressionAnalysis doAnalysis(BioAssaySet bioAssaySet, DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors, FactorValue subsetFactorValue) {
    // We may want to change this to fall back to running normally, though the real fix is to just finish the ebayes implementation.
    if (config.getModerateStatistics() && dmatrix.hasMissingValues()) {
        throw new UnsupportedOperationException("Ebayes cannot be used when there are values missing in the data");
    }
    if (factors.isEmpty()) {
        LinearModelAnalyzer.log.error("Must provide at least one factor");
        return null;
    }
    if (samplesUsed.size() <= factors.size()) {
        LinearModelAnalyzer.log.error("Must have more samples than factors");
        return null;
    }
    final Map<String, Collection<ExperimentalFactor>> label2Factors = this.getRNames(factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    this.dropIncompleteFactors(samplesUsed, factors, baselineConditions);
    if (factors.isEmpty()) {
        LinearModelAnalyzer.log.error("Must provide at least one factor; they were all removed due to incomplete values");
        return null;
    }
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);
    /*
         * Build our factor terms, with interactions handled specially
         */
    List<String[]> interactionFactorLists = new ArrayList<>();
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    config.setBaseLineFactorValues(baselineConditions);
    boolean oneSampleTTest = interceptFactor != null && factors.size() == 1;
    if (!oneSampleTTest) {
        this.buildModelFormula(config, label2Factors, interactionFactorLists);
    }
    // calculate library size before log transformation (FIXME we compute this twice)
    DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
    /*
         * FIXME: remove columns that are marked as outliers.
         */
    dmatrix = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(quantitationType, dmatrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    if (LinearModelAnalyzer.log.isDebugEnabled())
        this.outputForDebugging(dmatrix, designMatrix);
    /*
         * PREPARATION FOR 'NATIVE' FITTING
         */
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    DesignMatrix properDesignMatrix = this.makeDesignMatrix(designMatrix, interactionFactorLists, baselineConditions);
    /*
         * Run the analysis NOTE this can be simplified if we strip out R code.
         */
    final Map<String, LinearModelSummary> rawResults = this.runAnalysis(namedMatrix, sNamedMatrix, properDesignMatrix, librarySize, config);
    if (rawResults.size() == 0) {
        LinearModelAnalyzer.log.error("Got no results from the analysis");
        return null;
    }
    /*
         * Initialize data structures we need to hold results.
         */
    // this used to be a Set, but a List is much faster.
    Map<String, List<DifferentialExpressionAnalysisResult>> resultLists = new HashMap<>();
    Map<String, List<Double>> pvaluesForQvalue = new HashMap<>();
    for (String factorName : label2Factors.keySet()) {
        resultLists.put(factorName, new ArrayList<DifferentialExpressionAnalysisResult>());
        pvaluesForQvalue.put(factorName, new ArrayList<Double>());
    }
    for (String[] fs : interactionFactorLists) {
        String intF = StringUtils.join(fs, ":");
        resultLists.put(intF, new ArrayList<DifferentialExpressionAnalysisResult>());
        pvaluesForQvalue.put(intF, new ArrayList<Double>());
    }
    if (pvaluesForQvalue.isEmpty()) {
        LinearModelAnalyzer.log.warn("No results were obtained for the current stage of analysis.");
        return null;
    }
    /*
         * Create result objects for each model fit. Keeping things in order is important.
         */
    final Transformer rowNameExtractor = TransformerUtils.invokerTransformer("getId");
    boolean warned = false;
    int notUsable = 0;
    int processed = 0;
    for (CompositeSequence el : namedMatrix.getRowNames()) {
        if (++processed % 15000 == 0) {
            LinearModelAnalyzer.log.info("Processed results for " + processed + " elements ...");
        }
        LinearModelSummary lm = rawResults.get(rowNameExtractor.transform(el).toString());
        if (LinearModelAnalyzer.log.isDebugEnabled())
            LinearModelAnalyzer.log.debug(el.getName() + "\n" + lm);
        if (lm == null) {
            if (!warned) {
                LinearModelAnalyzer.log.warn("No result for " + el + ", further warnings suppressed");
                warned = true;
            }
            notUsable++;
            continue;
        }
        for (String factorName : label2Factors.keySet()) {
            if (!pvaluesForQvalue.containsKey(factorName)) {
                // was dropped.
                continue;
            }
            Double overallPValue;
            DifferentialExpressionAnalysisResult probeAnalysisResult = DifferentialExpressionAnalysisResult.Factory.newInstance();
            probeAnalysisResult.setProbe(el);
            if (lm.getCoefficients() == null) {
                // probeAnalysisResult.setPvalue( null );
                // pvaluesForQvalue.get( factorName ).add( overallPValue );
                // resultLists.get( factorName ).add( probeAnalysisResult );
                notUsable++;
                continue;
            }
            Collection<ExperimentalFactor> factorsForName = label2Factors.get(factorName);
            if (factorsForName.size() > 1) {
                /*
                     * Interactions
                     */
                if (factorsForName.size() > 2) {
                    LinearModelAnalyzer.log.error("Handling more than two-way interactions is not implemented");
                    return null;
                }
                assert factorName.contains(":");
                String[] factorNames = StringUtils.split(factorName, ":");
                assert factorNames.length == factorsForName.size();
                overallPValue = lm.getInteractionEffectP(factorNames);
                if (overallPValue != null && !Double.isNaN(overallPValue)) {
                    Map<String, Double> interactionContrastTStats = lm.getContrastTStats(factorName);
                    Map<String, Double> interactionContrastCoeffs = lm.getContrastCoefficients(factorName);
                    Map<String, Double> interactionContrastPValues = lm.getContrastPValues(factorName);
                    for (String term : interactionContrastPValues.keySet()) {
                        Double contrastPvalue = interactionContrastPValues.get(term);
                        this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, interactionContrastTStats, interactionContrastCoeffs);
                    }
                } else {
                    if (!warned) {
                        LinearModelAnalyzer.log.warn("Interaction could not be computed for " + el + ", further warnings suppressed");
                        warned = true;
                    }
                    if (LinearModelAnalyzer.log.isDebugEnabled())
                        LinearModelAnalyzer.log.debug("Interaction could not be computed for " + el + ", further warnings suppressed");
                    // will over count?
                    notUsable++;
                    continue;
                }
            } else {
                /*
                     * Main effect
                     */
                assert factorsForName.size() == 1;
                ExperimentalFactor experimentalFactor = factorsForName.iterator().next();
                if (interceptFactor != null && factorsForName.size() == 1 && experimentalFactor.equals(interceptFactor)) {
                    overallPValue = lm.getInterceptP();
                } else {
                    overallPValue = lm.getMainEffectP(factorName);
                }
                /*
                     * Add contrasts unless overall pvalue is NaN
                     */
                if (overallPValue != null && !Double.isNaN(overallPValue)) {
                    Map<String, Double> mainEffectContrastTStats = lm.getContrastTStats(factorName);
                    Map<String, Double> mainEffectContrastPvalues = lm.getContrastPValues(factorName);
                    Map<String, Double> mainEffectContrastCoeffs = lm.getContrastCoefficients(factorName);
                    for (String term : mainEffectContrastPvalues.keySet()) {
                        Double contrastPvalue = mainEffectContrastPvalues.get(term);
                        this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, mainEffectContrastTStats, mainEffectContrastCoeffs);
                    }
                } else {
                    if (!warned) {
                        LinearModelAnalyzer.log.warn("ANOVA could not be done for " + experimentalFactor + " on " + el + ", further warnings suppressed");
                        warned = true;
                    }
                    if (LinearModelAnalyzer.log.isDebugEnabled())
                        LinearModelAnalyzer.log.debug("ANOVA could not be done for " + experimentalFactor + " on " + el);
                    // will over count?
                    notUsable++;
                    continue;
                }
            }
            assert !Double.isNaN(overallPValue) : "We should not be keeping non-number pvalues (null or NaNs)";
            probeAnalysisResult.setPvalue(this.nan2Null(overallPValue));
            pvaluesForQvalue.get(factorName).add(overallPValue);
            resultLists.get(factorName).add(probeAnalysisResult);
        }
    // over terms
    }
    if (notUsable > 0) {
        LinearModelAnalyzer.log.info(notUsable + " elements or results were not usable - model could not be fit, etc.");
    }
    this.getRanksAndQvalues(resultLists, pvaluesForQvalue);
    DifferentialExpressionAnalysis expressionAnalysis = this.makeAnalysisEntity(bioAssaySet, config, label2Factors, baselineConditions, interceptFactor, interactionFactorLists, oneSampleTTest, resultLists, subsetFactorValue);
    LinearModelAnalyzer.log.info("Analysis processing phase done ...");
    return expressionAnalysis;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Transformer(org.apache.commons.collections.Transformer) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)206 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)43 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)40 Gene (ubic.gemma.model.genome.Gene)32 Test (org.junit.Test)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)19 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)18 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)18 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)18 StopWatch (org.apache.commons.lang3.time.StopWatch)17 HashSet (java.util.HashSet)15 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)15 ArrayList (java.util.ArrayList)14 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)14 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)13 Taxon (ubic.gemma.model.genome.Taxon)12 Collection (java.util.Collection)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11