Search in sources :

Example 41 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method mergeVectors.

@Override
public ExpressionExperiment mergeVectors(ExpressionExperiment ee) {
    Collection<ArrayDesign> arrayDesigns = expressionExperimentService.getArrayDesignsUsed(ee);
    if (arrayDesigns.size() > 1) {
        throw new IllegalArgumentException("Cannot cope with more than one platform; switch experiment to use a (merged) platform first");
    }
    ee = expressionExperimentService.thaw(ee);
    Collection<QuantitationType> qts = expressionExperimentService.getQuantitationTypes(ee);
    VectorMergingServiceImpl.log.info(qts.size() + " quantitation types for potential merge");
    /*
         * Load all the bioassay dimensions, which will be merged.
         */
    Collection<BioAssayDimension> allOldBioAssayDims = new HashSet<>();
    for (BioAssay ba : ee.getBioAssays()) {
        Collection<BioAssayDimension> oldBioAssayDims = bioAssayService.findBioAssayDimensions(ba);
        for (BioAssayDimension bioAssayDim : oldBioAssayDims) {
            if (bioAssayDim.getDescription().startsWith(VectorMergingServiceImpl.MERGED_DIM_DESC_PREFIX)) {
                // not foolproof, but avoids some artifacts - e.g. if there were previous failed attempts at this.
                continue;
            }
            allOldBioAssayDims.add(bioAssayDim);
        }
    }
    if (allOldBioAssayDims.size() == 0) {
        throw new IllegalStateException("No bioAssayDimensions found to merge (previously merged ones are filtered, data may be corrupt?");
    }
    if (allOldBioAssayDims.size() == 1) {
        VectorMergingServiceImpl.log.warn("Experiment already has only a single bioAssayDimension, nothing seems to need merging. Bailing");
        return ee;
    }
    VectorMergingServiceImpl.log.info(allOldBioAssayDims.size() + " bioAssayDimensions to merge");
    List<BioAssayDimension> sortedOldDims = this.sortedBioAssayDimensions(allOldBioAssayDims);
    BioAssayDimension newBioAd = this.getNewBioAssayDimension(sortedOldDims);
    int totalBioAssays = newBioAd.getBioAssays().size();
    assert totalBioAssays == ee.getBioAssays().size() : "experiment has " + ee.getBioAssays().size() + " but new bioAssayDimension has " + totalBioAssays;
    Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = this.getVectors(ee, qts, allOldBioAssayDims);
    /*
         * This will run into problems if there are excess quantitation types
         */
    int numSuccessfulMergers = 0;
    for (QuantitationType type : qt2Vec.keySet()) {
        Collection<RawExpressionDataVector> oldVecs = qt2Vec.get(type);
        if (oldVecs.isEmpty()) {
            VectorMergingServiceImpl.log.warn("No vectors for " + type);
            continue;
        }
        Map<CompositeSequence, Collection<RawExpressionDataVector>> deVMap = this.getDevMap(oldVecs);
        if (deVMap == null) {
            VectorMergingServiceImpl.log.info("Vector merging will not be done for " + type + " as there is only one vector per element already");
            continue;
        }
        VectorMergingServiceImpl.log.info("Processing " + oldVecs.size() + " vectors  for " + type);
        Collection<RawExpressionDataVector> newVectors = new HashSet<>();
        int numAllMissing = 0;
        int missingValuesForQt = 0;
        for (CompositeSequence de : deVMap.keySet()) {
            RawExpressionDataVector vector = this.initializeNewVector(ee, newBioAd, type, de);
            Collection<RawExpressionDataVector> dedvs = deVMap.get(de);
            /*
                 * these ugly nested loops are to ENSURE that we get the vector reconstructed properly. For each of the
                 * old bioassayDimensions, find the designElementDataVector that uses it. If there isn't one, fill in
                 * the values for that dimension with missing data. We go through the dimensions in the same order that
                 * we joined them up.
                 */
            List<Object> data = new ArrayList<>();
            int totalMissingInVector = this.makeMergedData(sortedOldDims, newBioAd, type, de, dedvs, data);
            missingValuesForQt += totalMissingInVector;
            if (totalMissingInVector == totalBioAssays) {
                numAllMissing++;
                // we don't save data that is all missing.
                continue;
            }
            if (data.size() != totalBioAssays) {
                throw new IllegalStateException("Wrong number of values for " + de + " / " + type + ", expected " + totalBioAssays + ", got " + data.size());
            }
            byte[] newDataAr = converter.toBytes(data.toArray());
            vector.setData(newDataAr);
            newVectors.add(vector);
        }
        // TRANSACTION
        vectorMergingHelperService.persist(ee, type, newVectors);
        if (numAllMissing > 0) {
            VectorMergingServiceImpl.log.info(numAllMissing + " vectors had all missing values and were junked for " + type);
        }
        if (missingValuesForQt > 0) {
            VectorMergingServiceImpl.log.info(missingValuesForQt + " total missing values: " + type);
        }
        VectorMergingServiceImpl.log.info("Removing " + oldVecs.size() + " old vectors for " + type);
        rawExpressionDataVectorService.remove(oldVecs);
        ee.getRawExpressionDataVectors().removeAll(oldVecs);
        numSuccessfulMergers++;
    }
    if (numSuccessfulMergers == 0) {
        /*
             * Try to clean up
             */
        this.bioAssayDimensionService.remove(newBioAd);
        throw new IllegalStateException("Nothing was merged. Maybe all the vectors are effectively merged already");
    }
    expressionExperimentService.update(ee);
    // Several transactions
    this.cleanUp(ee, allOldBioAssayDims, newBioAd);
    // transaction
    this.audit(ee, "Vector merging performed, merged " + allOldBioAssayDims + " old bioassay dimensions for " + qts.size() + " quantitation types.");
    // several transactions
    try {
        preprocessorService.process(ee);
    } catch (PreprocessingException e) {
        VectorMergingServiceImpl.log.error("Error during postprocessing", e);
    }
    return ee;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 42 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceImpl method checkCorrectability.

@Override
public boolean checkCorrectability(ExpressionExperiment ee, boolean force) {
    for (QuantitationType qt : expressionExperimentService.getQuantitationTypes(ee)) {
        if (qt.getIsBatchCorrected()) {
            ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment already has a batch-corrected quantitation type: " + ee + ": " + qt);
            return false;
        }
    }
    ExperimentalFactor batch = this.getBatchFactor(ee);
    if (batch == null) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.warn("No batch factor found: " + ee);
        return false;
    }
    String bConf = expressionExperimentService.getBatchConfound(ee);
    if (bConf != null && !force) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment can not be batch corrected: " + bConf);
        ExpressionExperimentBatchCorrectionServiceImpl.log.info("To force batch-correction of a confounded experiment, use the force option (note, that this option also allows outliers while batch correcting).");
        return false;
    }
    /*
         * Make sure we have at least two samples per batch. This generally won't happen if batches were defined by
         * Gemma.
         */
    Map<Long, Integer> batches = new HashMap<>();
    Set<BioMaterial> seen = new HashSet<>();
    for (BioAssay ba : ee.getBioAssays()) {
        BioMaterial bm = ba.getSampleUsed();
        if (seen.contains(bm))
            continue;
        seen.add(bm);
        for (FactorValue fv : bm.getFactorValues()) {
            if (fv.getExperimentalFactor().equals(batch)) {
                Long batchId = fv.getId();
                if (!batches.containsKey(batchId))
                    batches.put(batchId, 0);
                batches.put(batchId, batches.get(batchId) + 1);
            }
        }
    }
    /*
         * consider merging batches. - we already do this when we create the batch factor, so in general batches should
         * always have at least 2 samples
         */
    for (Long batchId : batches.keySet()) {
        if (batches.get(batchId) < 2) {
            ExpressionExperimentBatchCorrectionServiceImpl.log.info("Batch with only one sample detected, correction not possible: " + ee + ", batchId=" + batchId);
            return false;
        }
    }
    return true;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 43 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceImpl method doComBat.

private ExpressionDataDoubleMatrix doComBat(ExpressionExperiment ee, ExpressionDataDoubleMatrix originalDataMatrix, ObjectMatrix<BioMaterial, ExperimentalFactor, Object> design) {
    ObjectMatrix<BioMaterial, String, Object> designU = this.convertFactorValuesToStrings(design);
    DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.getMatrix();
    designU = this.orderMatrix(matrix, designU);
    ScaleType scale = originalDataMatrix.getQuantitationTypes().iterator().next().getScale();
    boolean transformed = false;
    if (!(scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOGBASEUNKNOWN) || scale.equals(ScaleType.LN))) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.info(" *** COMBAT: LOG TRANSFORMING ***");
        transformed = true;
        MatrixStats.logTransform(matrix);
    }
    /*
         * Process
         */
    ComBat<CompositeSequence, BioMaterial> comBat = new ComBat<>(matrix, designU);
    // false: NONPARAMETRIC
    DoubleMatrix2D results = comBat.run(true);
    // note these plots always reflect the parametric setup.
    // TEMPORARY?
    comBat.plot(ee.getId() + "." + FileTools.cleanForFileName(ee.getShortName()));
    /*
         * Postprocess. Results is a raw matrix/
         */
    DoubleMatrix<CompositeSequence, BioMaterial> correctedDataMatrix = new DenseDoubleMatrix<>(results.toArray());
    correctedDataMatrix.setRowNames(matrix.getRowNames());
    correctedDataMatrix.setColumnNames(matrix.getColNames());
    if (transformed) {
        MatrixStats.unLogTransform(correctedDataMatrix);
    }
    ExpressionDataDoubleMatrix correctedExpressionDataMatrix = new ExpressionDataDoubleMatrix(originalDataMatrix, correctedDataMatrix);
    assert correctedExpressionDataMatrix.getQuantitationTypes().size() == 1;
    /*
         * It is easier if we make a new quantitationtype.
         */
    QuantitationType oldQt = correctedExpressionDataMatrix.getQuantitationTypes().iterator().next();
    QuantitationType newQt = this.makeNewQuantitationType(oldQt);
    correctedExpressionDataMatrix.getQuantitationTypes().clear();
    correctedExpressionDataMatrix.getQuantitationTypes().add(newQt);
    // Sanity check...
    for (int i = 0; i < correctedExpressionDataMatrix.columns(); i++) {
        assert correctedExpressionDataMatrix.getBioMaterialForColumn(i).equals(originalDataMatrix.getBioMaterialForColumn(i));
    }
    return correctedExpressionDataMatrix;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 44 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method regressionResiduals.

/**
 * @param matrix      on which to perform regression.
 * @param config      containing configuration of factors to include. Any interactions or subset configuration is
 *                    ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
 *                    ignored)
 * @param retainScale if true, the data retain the global mean (intercept)
 * @return residuals from the regression.
 */
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
    if (config.getFactorsToInclude().isEmpty()) {
        LinearModelAnalyzer.log.warn("No factors");
        return matrix;
    }
    /*
         * Note that this method relies on similar code to doAnalysis, for the setup stages.
         */
    List<ExperimentalFactor> factors = config.getFactorsToInclude();
    List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
    Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
    ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
    DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
    ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
    DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
    DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
    // perform weighted least squares regression on COUNT data
    QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
    LeastSquaresFit fit;
    if (quantitationType.getScale().equals(ScaleType.COUNT)) {
        LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
        // note: data is not log transformed
        DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
        MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
    } else {
        fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
    }
    DoubleMatrix2D residuals = fit.getResiduals();
    if (retainScale) {
        DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
        for (int i = 0; i < residuals.rows(); i++) {
            residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
        }
    }
    DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
    f.setRowNames(dmatrix.getMatrix().getRowNames());
    f.setColumnNames(dmatrix.getMatrix().getColNames());
    return new ExpressionDataDoubleMatrix(dmatrix, f);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 45 with QuantitationType

use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.

the class PreprocessorServiceImpl method checkQuantitationType.

private void checkQuantitationType(ExpressionDataDoubleMatrix correctedData) {
    Collection<QuantitationType> qts = correctedData.getQuantitationTypes();
    assert !qts.isEmpty();
    if (qts.size() > 1) {
        throw new IllegalArgumentException("Only support a single quantitation type");
    }
    QuantitationType batchCorrectedQt = qts.iterator().next();
    assert batchCorrectedQt.getIsBatchCorrected();
}
Also used : QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Aggregations

QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)74 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)30 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)20 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 Test (org.junit.Test)16 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)14 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)11 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)10 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)8 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)7 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)6 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)6 InputStream (java.io.InputStream)5 GZIPInputStream (java.util.zip.GZIPInputStream)5 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)5 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)4