Search in sources :

Example 1 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method fixFactorsForSubset.

/**
 * Remove factors which are no longer usable, based on the subset.
 */
private Collection<ExperimentalFactor> fixFactorsForSubset(ExpressionDataDoubleMatrix dmatrix, ExpressionExperimentSubSet eesubSet, List<ExperimentalFactor> factors) {
    List<ExperimentalFactor> result = new ArrayList<>();
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);
    /*
         * Remove any constant factors, unless they are that intercept.
         */
    for (ExperimentalFactor f : factors) {
        if (f.getType().equals(FactorType.CONTINUOUS)) {
            result.add(f);
        } else if (interceptFactor != null && interceptFactor.equals(f)) {
            result.add(f);
        } else {
            Collection<FactorValue> levels = new HashSet<>();
            LinearModelAnalyzer.populateFactorValuesFromBASet(eesubSet, f, levels);
            if (levels.size() > 1) {
                result.add(f);
            } else {
                LinearModelAnalyzer.log.info("Dropping " + f + " from subset");
            }
        }
    }
    return result;
}
Also used : QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 2 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method doAnalysis.

/**
 * @param bioAssaySet       source data, could be a SubSet
 * @param dmatrix           data
 * @param samplesUsed       analyzed
 * @param factors           included in the model
 * @param subsetFactorValue null unless analyzing a subset (only used for book-keeping)
 * @return analysis, or null if there was a problem.
 */
private DifferentialExpressionAnalysis doAnalysis(BioAssaySet bioAssaySet, DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors, FactorValue subsetFactorValue) {
    // We may want to change this to fall back to running normally, though the real fix is to just finish the ebayes implementation.
    if (config.getModerateStatistics() && dmatrix.hasMissingValues()) {
        throw new UnsupportedOperationException("Ebayes cannot be used when there are values missing in the data");
    }
    if (factors.isEmpty()) {
        LinearModelAnalyzer.log.error("Must provide at least one factor");
        return null;
    }
    if (samplesUsed.size() <= factors.size()) {
        LinearModelAnalyzer.log.error("Must have more samples than factors");
        return null;
    }
    final Map<String, Collection<ExperimentalFactor>> label2Factors = this.getRNames(factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    this.dropIncompleteFactors(samplesUsed, factors, baselineConditions);
    if (factors.isEmpty()) {
        LinearModelAnalyzer.log.error("Must provide at least one factor; they were all removed due to incomplete values");
        return null;
    }
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);
    /*
         * Build our factor terms, with interactions handled specially
         */
    List<String[]> interactionFactorLists = new ArrayList<>();
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    config.setBaseLineFactorValues(baselineConditions);
    boolean oneSampleTTest = interceptFactor != null && factors.size() == 1;
    if (!oneSampleTTest) {
        this.buildModelFormula(config, label2Factors, interactionFactorLists);
    }
    // calculate library size before log transformation (FIXME we compute this twice)
    DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
    /*
         * FIXME: remove columns that are marked as outliers.
         */
    dmatrix = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(quantitationType, dmatrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    if (LinearModelAnalyzer.log.isDebugEnabled())
        this.outputForDebugging(dmatrix, designMatrix);
    /*
         * PREPARATION FOR 'NATIVE' FITTING
         */
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    DesignMatrix properDesignMatrix = this.makeDesignMatrix(designMatrix, interactionFactorLists, baselineConditions);
    /*
         * Run the analysis NOTE this can be simplified if we strip out R code.
         */
    final Map<String, LinearModelSummary> rawResults = this.runAnalysis(namedMatrix, sNamedMatrix, properDesignMatrix, librarySize, config);
    if (rawResults.size() == 0) {
        LinearModelAnalyzer.log.error("Got no results from the analysis");
        return null;
    }
    /*
         * Initialize data structures we need to hold results.
         */
    // this used to be a Set, but a List is much faster.
    Map<String, List<DifferentialExpressionAnalysisResult>> resultLists = new HashMap<>();
    Map<String, List<Double>> pvaluesForQvalue = new HashMap<>();
    for (String factorName : label2Factors.keySet()) {
        resultLists.put(factorName, new ArrayList<DifferentialExpressionAnalysisResult>());
        pvaluesForQvalue.put(factorName, new ArrayList<Double>());
    }
    for (String[] fs : interactionFactorLists) {
        String intF = StringUtils.join(fs, ":");
        resultLists.put(intF, new ArrayList<DifferentialExpressionAnalysisResult>());
        pvaluesForQvalue.put(intF, new ArrayList<Double>());
    }
    if (pvaluesForQvalue.isEmpty()) {
        LinearModelAnalyzer.log.warn("No results were obtained for the current stage of analysis.");
        return null;
    }
    /*
         * Create result objects for each model fit. Keeping things in order is important.
         */
    final Transformer rowNameExtractor = TransformerUtils.invokerTransformer("getId");
    boolean warned = false;
    int notUsable = 0;
    int processed = 0;
    for (CompositeSequence el : namedMatrix.getRowNames()) {
        if (++processed % 15000 == 0) {
            LinearModelAnalyzer.log.info("Processed results for " + processed + " elements ...");
        }
        LinearModelSummary lm = rawResults.get(rowNameExtractor.transform(el).toString());
        if (LinearModelAnalyzer.log.isDebugEnabled())
            LinearModelAnalyzer.log.debug(el.getName() + "\n" + lm);
        if (lm == null) {
            if (!warned) {
                LinearModelAnalyzer.log.warn("No result for " + el + ", further warnings suppressed");
                warned = true;
            }
            notUsable++;
            continue;
        }
        for (String factorName : label2Factors.keySet()) {
            if (!pvaluesForQvalue.containsKey(factorName)) {
                // was dropped.
                continue;
            }
            Double overallPValue;
            DifferentialExpressionAnalysisResult probeAnalysisResult = DifferentialExpressionAnalysisResult.Factory.newInstance();
            probeAnalysisResult.setProbe(el);
            if (lm.getCoefficients() == null) {
                // probeAnalysisResult.setPvalue( null );
                // pvaluesForQvalue.get( factorName ).add( overallPValue );
                // resultLists.get( factorName ).add( probeAnalysisResult );
                notUsable++;
                continue;
            }
            Collection<ExperimentalFactor> factorsForName = label2Factors.get(factorName);
            if (factorsForName.size() > 1) {
                /*
                     * Interactions
                     */
                if (factorsForName.size() > 2) {
                    LinearModelAnalyzer.log.error("Handling more than two-way interactions is not implemented");
                    return null;
                }
                assert factorName.contains(":");
                String[] factorNames = StringUtils.split(factorName, ":");
                assert factorNames.length == factorsForName.size();
                overallPValue = lm.getInteractionEffectP(factorNames);
                if (overallPValue != null && !Double.isNaN(overallPValue)) {
                    Map<String, Double> interactionContrastTStats = lm.getContrastTStats(factorName);
                    Map<String, Double> interactionContrastCoeffs = lm.getContrastCoefficients(factorName);
                    Map<String, Double> interactionContrastPValues = lm.getContrastPValues(factorName);
                    for (String term : interactionContrastPValues.keySet()) {
                        Double contrastPvalue = interactionContrastPValues.get(term);
                        this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, interactionContrastTStats, interactionContrastCoeffs);
                    }
                } else {
                    if (!warned) {
                        LinearModelAnalyzer.log.warn("Interaction could not be computed for " + el + ", further warnings suppressed");
                        warned = true;
                    }
                    if (LinearModelAnalyzer.log.isDebugEnabled())
                        LinearModelAnalyzer.log.debug("Interaction could not be computed for " + el + ", further warnings suppressed");
                    // will over count?
                    notUsable++;
                    continue;
                }
            } else {
                /*
                     * Main effect
                     */
                assert factorsForName.size() == 1;
                ExperimentalFactor experimentalFactor = factorsForName.iterator().next();
                if (interceptFactor != null && factorsForName.size() == 1 && experimentalFactor.equals(interceptFactor)) {
                    overallPValue = lm.getInterceptP();
                } else {
                    overallPValue = lm.getMainEffectP(factorName);
                }
                /*
                     * Add contrasts unless overall pvalue is NaN
                     */
                if (overallPValue != null && !Double.isNaN(overallPValue)) {
                    Map<String, Double> mainEffectContrastTStats = lm.getContrastTStats(factorName);
                    Map<String, Double> mainEffectContrastPvalues = lm.getContrastPValues(factorName);
                    Map<String, Double> mainEffectContrastCoeffs = lm.getContrastCoefficients(factorName);
                    for (String term : mainEffectContrastPvalues.keySet()) {
                        Double contrastPvalue = mainEffectContrastPvalues.get(term);
                        this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, mainEffectContrastTStats, mainEffectContrastCoeffs);
                    }
                } else {
                    if (!warned) {
                        LinearModelAnalyzer.log.warn("ANOVA could not be done for " + experimentalFactor + " on " + el + ", further warnings suppressed");
                        warned = true;
                    }
                    if (LinearModelAnalyzer.log.isDebugEnabled())
                        LinearModelAnalyzer.log.debug("ANOVA could not be done for " + experimentalFactor + " on " + el);
                    // will over count?
                    notUsable++;
                    continue;
                }
            }
            assert !Double.isNaN(overallPValue) : "We should not be keeping non-number pvalues (null or NaNs)";
            probeAnalysisResult.setPvalue(this.nan2Null(overallPValue));
            pvaluesForQvalue.get(factorName).add(overallPValue);
            resultLists.get(factorName).add(probeAnalysisResult);
        }
    // over terms
    }
    if (notUsable > 0) {
        LinearModelAnalyzer.log.info(notUsable + " elements or results were not usable - model could not be fit, etc.");
    }
    this.getRanksAndQvalues(resultLists, pvaluesForQvalue);
    DifferentialExpressionAnalysis expressionAnalysis = this.makeAnalysisEntity(bioAssaySet, config, label2Factors, baselineConditions, interceptFactor, interactionFactorLists, oneSampleTTest, resultLists, subsetFactorValue);
    LinearModelAnalyzer.log.info("Analysis processing phase done ...");
    return expressionAnalysis;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Transformer(org.apache.commons.collections.Transformer) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 3 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class MeanVarianceServiceImpl method create.

@Override
public MeanVarianceRelation create(ExpressionExperiment ee, boolean forceRecompute) {
    if (ee == null) {
        log.warn("Experiment is null");
        return null;
    }
    log.info("Starting mean-variance computation");
    MeanVarianceRelation mvr = ee.getMeanVarianceRelation();
    if (forceRecompute || mvr == null) {
        log.info("Recomputing mean-variance");
        ee = expressionExperimentService.thawLiter(ee);
        mvr = ee.getMeanVarianceRelation();
        ExpressionDataDoubleMatrix intensities = meanVarianceServiceHelper.getIntensities(ee);
        if (intensities == null) {
            throw new IllegalStateException("Could not locate intensity matrix for " + ee.getShortName());
        }
        Collection<QuantitationType> qtList = expressionExperimentService.getPreferredQuantitationType(ee);
        QuantitationType qt;
        if (qtList.size() == 0) {
            log.error("Did not find any preferred quantitation type. Mean-variance relation was not computed.");
        } else {
            qt = qtList.iterator().next();
            if (qtList.size() > 1) {
                // Really this should be an error condition.
                log.warn("Found more than one preferred quantitation type. Only the first preferred quantitation type (" + qt + ") will be used.");
            }
            try {
                intensities = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(qt, intensities);
            } catch (UnknownLogScaleException e) {
                log.warn("Problem log transforming data. Check that the appropriate log scale is used. Mean-variance will be computed as is.");
            }
            mvr = calculateMeanVariance(intensities, mvr);
            meanVarianceServiceHelper.createMeanVariance(ee, mvr);
        }
    }
    log.info("Mean-variance computation is complete");
    return mvr;
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) MeanVarianceRelation(ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 4 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method getVectors.

/**
 * Get the current set of vectors that need to be updated.
 *
 * @param expExp             ee
 * @param qts                - only used to check for problems.
 * @param allOldBioAssayDims old BA dims
 * @return map
 */
private Map<QuantitationType, Collection<RawExpressionDataVector>> getVectors(ExpressionExperiment expExp, Collection<QuantitationType> qts, Collection<BioAssayDimension> allOldBioAssayDims) {
    Collection<RawExpressionDataVector> oldVectors = new HashSet<>();
    for (BioAssayDimension dim : allOldBioAssayDims) {
        oldVectors.addAll(rawExpressionDataVectorService.find(dim));
    }
    if (oldVectors.isEmpty()) {
        throw new IllegalStateException("No vectors");
    }
    rawExpressionDataVectorService.thaw(oldVectors);
    Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = new HashMap<>();
    Collection<QuantitationType> qtsToAdd = new HashSet<>();
    for (RawExpressionDataVector v : oldVectors) {
        QuantitationType qt = v.getQuantitationType();
        if (!qts.contains(qt)) {
            /*
                 * Guard against QTs that are broken. Sometimes the QTs for the EE don't include the ones that the DEDVs
                 * have, due to corruption.
                 */
            qtsToAdd.add(qt);
        }
        if (!qt2Vec.containsKey(qt)) {
            qt2Vec.put(qt, new HashSet<RawExpressionDataVector>());
        }
        qt2Vec.get(qt).add(v);
    }
    if (!qtsToAdd.isEmpty()) {
        expExp.getQuantitationTypes().addAll(qtsToAdd);
        VectorMergingServiceImpl.log.info("Adding " + qtsToAdd.size() + " missing quantitation types to experiment");
        expressionExperimentService.update(expExp);
    }
    return qt2Vec;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 5 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method print.

@SuppressWarnings("unused")
private void print(Collection<DesignElementDataVector> newVectors) {
    StringBuilder buf = new StringBuilder();
    ByteArrayConverter conv = new ByteArrayConverter();
    for (DesignElementDataVector vector : newVectors) {
        buf.append(vector.getDesignElement());
        QuantitationType qtype = vector.getQuantitationType();
        if (qtype.getRepresentation().equals(PrimitiveType.DOUBLE)) {
            double[] vals = conv.byteArrayToDoubles(vector.getData());
            for (double d : vals) {
                buf.append("\t").append(d);
            }
        } else if (qtype.getRepresentation().equals(PrimitiveType.INT)) {
            int[] vals = conv.byteArrayToInts(vector.getData());
            for (int i : vals) {
                buf.append("\t").append(i);
            }
        } else if (qtype.getRepresentation().equals(PrimitiveType.BOOLEAN)) {
            boolean[] vals = conv.byteArrayToBooleans(vector.getData());
            for (boolean d : vals) {
                buf.append("\t").append(d);
            }
        } else if (qtype.getRepresentation().equals(PrimitiveType.STRING)) {
            String[] vals = conv.byteArrayToStrings(vector.getData());
            for (String d : vals) {
                buf.append("\t").append(d);
            }
        }
        buf.append("\n");
    }
    VectorMergingServiceImpl.log.info("\n" + buf);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Aggregations

QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)74 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)30 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)20 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 Test (org.junit.Test)16 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)14 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)11 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)10 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)8 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)7 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)6 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)6 InputStream (java.io.InputStream)5 GZIPInputStream (java.util.zip.GZIPInputStream)5 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)5 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)4