Search in sources :

Example 1 with ScaleType

use of ubic.gemma.model.common.quantitationtype.ScaleType in project Gemma by PavlidisLab.

the class LoadSimpleExpressionDataCli method configureQuantitationType.

private void configureQuantitationType(String[] fields, SimpleExpressionExperimentMetaData metaData) {
    metaData.setQuantitationTypeName(fields[LoadSimpleExpressionDataCli.Q_NAME_I]);
    metaData.setQuantitationTypeDescription(fields[LoadSimpleExpressionDataCli.Q_DESCRIPTION_I]);
    metaData.setGeneralType(GeneralType.QUANTITATIVE);
    StandardQuantitationType sQType = StandardQuantitationType.fromString(fields[LoadSimpleExpressionDataCli.Q_TYPE_I]);
    metaData.setType(sQType);
    ScaleType sType = ScaleType.fromString(fields[LoadSimpleExpressionDataCli.Q_SCALE_I]);
    metaData.setScale(sType);
}
Also used : ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) StandardQuantitationType(ubic.gemma.model.common.quantitationtype.StandardQuantitationType)

Example 2 with ScaleType

use of ubic.gemma.model.common.quantitationtype.ScaleType in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixUtil method filterAndLog2Transform.

/**
 * Log2 transform if necessary, do any required filtering prior to analysis. Count data is converted to log2CPM (but
 * we store log2cpm as the processed data, so that is what would generally be used).
 *
 * @param quantitationType QT
 * @param dmatrix          matrix
 * @return ee data double matrix
 */
public static ExpressionDataDoubleMatrix filterAndLog2Transform(QuantitationType quantitationType, ExpressionDataDoubleMatrix dmatrix) {
    ScaleType scaleType = ExpressionDataDoubleMatrixUtil.findScale(quantitationType, dmatrix.getMatrix());
    if (scaleType.equals(ScaleType.LOG2)) {
        ExpressionDataDoubleMatrixUtil.log.info("Data is already on a log2 scale");
    } else if (scaleType.equals(ScaleType.LN)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from ln to log2 **** ");
        MatrixStats.convertToLog2(dmatrix.getMatrix(), Math.E);
    } else if (scaleType.equals(ScaleType.LOG10)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from log10 to log2 **** ");
        MatrixStats.convertToLog2(dmatrix.getMatrix(), 10);
    } else if (scaleType.equals(ScaleType.LINEAR)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** LOG TRANSFORMING **** ");
        MatrixStats.logTransform(dmatrix.getMatrix());
    } else if (scaleType.equals(ScaleType.COUNT)) {
        /*
             * Since we store log2cpm this shouldn't be reached any more. We don't do it in place.
             */
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from count to log2 counts per million **** ");
        DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
        DoubleMatrix<CompositeSequence, BioMaterial> log2cpm = MatrixStats.convertToLog2Cpm(dmatrix.getMatrix(), librarySize);
        dmatrix = new ExpressionDataDoubleMatrix(dmatrix, log2cpm);
    } else {
        throw new UnknownLogScaleException("Can't figure out what scale the data are on");
    }
    /*
         * We do this second because doing it first causes some kind of subtle problem ... (round off? I could not
         * really track this down).
         *
         * Remove zero-variance rows, but also rows that have lots of equal values even if variance is non-zero. This
         * happens when data is "clipped" (e.g., all values under 10 set to 10).
         */
    int r = dmatrix.rows();
    dmatrix = ExpressionExperimentFilter.zeroVarianceFilter(dmatrix);
    if (dmatrix.rows() < r) {
        ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to low variance");
    }
    r = dmatrix.rows();
    if (dmatrix.columns() > ExpressionDataDoubleMatrixUtil.COLUMNS_LIMIT) {
        dmatrix = ExpressionExperimentFilter.tooFewDistinctValues(dmatrix, ExpressionDataDoubleMatrixUtil.VALUES_LIMIT);
        if (dmatrix.rows() < r) {
            ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to too many identical values");
        }
    }
    return dmatrix;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) UnknownLogScaleException(ubic.gemma.core.analysis.preprocess.UnknownLogScaleException)

Example 3 with ScaleType

use of ubic.gemma.model.common.quantitationtype.ScaleType in project Gemma by PavlidisLab.

the class ExpressionExperimentFilter method isLogTransformed.

/**
 * @param eeDoubleMatrix the matrix
 * @return true if the data looks like it is already log transformed, false otherwise. This is based on the
 * quantitation types and, as a check, looking at the data itself.
 */
private boolean isLogTransformed(ExpressionDataDoubleMatrix eeDoubleMatrix) {
    Collection<QuantitationType> quantitationTypes = eeDoubleMatrix.getQuantitationTypes();
    for (QuantitationType qt : quantitationTypes) {
        ScaleType scale = qt.getScale();
        if (scale.equals(ScaleType.LN) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOGBASEUNKNOWN)) {
            ExpressionExperimentFilter.log.info("Quantitationtype says the data is already log transformed");
            return true;
        }
    }
    if (this.isTwoColor()) {
        ExpressionExperimentFilter.log.info("Data is from a two-color array, assuming it is log transformed");
        return true;
    }
    for (int i = 0; i < eeDoubleMatrix.rows(); i++) {
        for (int j = 0; j < eeDoubleMatrix.columns(); j++) {
            double v = eeDoubleMatrix.get(i, j);
            if (v > 20) {
                ExpressionExperimentFilter.log.info("Data has large values, doesn't look log transformed");
                return false;
            }
        }
    }
    ExpressionExperimentFilter.log.info("Data looks log-transformed, but not sure...assuming it is");
    return true;
}
Also used : ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 4 with ScaleType

use of ubic.gemma.model.common.quantitationtype.ScaleType in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceImpl method doComBat.

private ExpressionDataDoubleMatrix doComBat(ExpressionExperiment ee, ExpressionDataDoubleMatrix originalDataMatrix, ObjectMatrix<BioMaterial, ExperimentalFactor, Object> design) {
    ObjectMatrix<BioMaterial, String, Object> designU = this.convertFactorValuesToStrings(design);
    DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.getMatrix();
    designU = this.orderMatrix(matrix, designU);
    ScaleType scale = originalDataMatrix.getQuantitationTypes().iterator().next().getScale();
    boolean transformed = false;
    if (!(scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOGBASEUNKNOWN) || scale.equals(ScaleType.LN))) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.info(" *** COMBAT: LOG TRANSFORMING ***");
        transformed = true;
        MatrixStats.logTransform(matrix);
    }
    /*
         * Process
         */
    ComBat<CompositeSequence, BioMaterial> comBat = new ComBat<>(matrix, designU);
    // false: NONPARAMETRIC
    DoubleMatrix2D results = comBat.run(true);
    // note these plots always reflect the parametric setup.
    // TEMPORARY?
    comBat.plot(ee.getId() + "." + FileTools.cleanForFileName(ee.getShortName()));
    /*
         * Postprocess. Results is a raw matrix/
         */
    DoubleMatrix<CompositeSequence, BioMaterial> correctedDataMatrix = new DenseDoubleMatrix<>(results.toArray());
    correctedDataMatrix.setRowNames(matrix.getRowNames());
    correctedDataMatrix.setColumnNames(matrix.getColNames());
    if (transformed) {
        MatrixStats.unLogTransform(correctedDataMatrix);
    }
    ExpressionDataDoubleMatrix correctedExpressionDataMatrix = new ExpressionDataDoubleMatrix(originalDataMatrix, correctedDataMatrix);
    assert correctedExpressionDataMatrix.getQuantitationTypes().size() == 1;
    /*
         * It is easier if we make a new quantitationtype.
         */
    QuantitationType oldQt = correctedExpressionDataMatrix.getQuantitationTypes().iterator().next();
    QuantitationType newQt = this.makeNewQuantitationType(oldQt);
    correctedExpressionDataMatrix.getQuantitationTypes().clear();
    correctedExpressionDataMatrix.getQuantitationTypes().add(newQt);
    // Sanity check...
    for (int i = 0; i < correctedExpressionDataMatrix.columns(); i++) {
        assert correctedExpressionDataMatrix.getBioMaterialForColumn(i).equals(originalDataMatrix.getBioMaterialForColumn(i));
    }
    return correctedExpressionDataMatrix;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Aggregations

ScaleType (ubic.gemma.model.common.quantitationtype.ScaleType)4 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)2 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)2 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)2 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)1 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)1 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)1 UnknownLogScaleException (ubic.gemma.core.analysis.preprocess.UnknownLogScaleException)1 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)1 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)1