Search in sources :

Example 6 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method makeSubSets.

private Map<FactorValue, ExpressionDataDoubleMatrix> makeSubSets(DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, ExperimentalFactor subsetFactor) {
    if (subsetFactor.getType().equals(FactorType.CONTINUOUS)) {
        throw new IllegalArgumentException("You cannot subset on a continuous factor (has a Measurement)");
    }
    if (config.getFactorsToInclude().contains(subsetFactor)) {
        throw new IllegalArgumentException("You cannot analyze a factor and use it for subsetting at the same time.");
    }
    Map<FactorValue, List<BioMaterial>> subSetSamples = new HashMap<>();
    for (FactorValue fv : subsetFactor.getFactorValues()) {
        assert fv.getMeasurement() == null;
        subSetSamples.put(fv, new ArrayList<BioMaterial>());
    }
    for (BioMaterial sample : samplesUsed) {
        boolean ok = false;
        for (FactorValue fv : sample.getFactorValues()) {
            if (fv.getExperimentalFactor().equals(subsetFactor)) {
                subSetSamples.get(fv).add(sample);
                ok = true;
                break;
            }
        }
        if (!ok) {
            throw new IllegalArgumentException("Cannot subset on a factor unless each sample has a value for it. Missing value for: " + sample + " " + sample.getBioAssaysUsedIn());
        }
    }
    Map<FactorValue, ExpressionDataDoubleMatrix> subMatrices = new HashMap<>();
    for (FactorValue fv : subSetSamples.keySet()) {
        List<BioMaterial> samplesInSubset = subSetSamples.get(fv);
        if (samplesInSubset.isEmpty()) {
            throw new IllegalArgumentException("The subset was empty for fv: " + fv);
        }
        assert samplesInSubset.size() < samplesUsed.size();
        samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
        ExpressionDataDoubleMatrix subMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
        subMatrices.put(fv, subMatrix);
    }
    return subMatrices;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)

Example 7 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class MeanVarianceServiceImpl method create.

@Override
public MeanVarianceRelation create(ExpressionExperiment ee, boolean forceRecompute) {
    if (ee == null) {
        log.warn("Experiment is null");
        return null;
    }
    log.info("Starting mean-variance computation");
    MeanVarianceRelation mvr = ee.getMeanVarianceRelation();
    if (forceRecompute || mvr == null) {
        log.info("Recomputing mean-variance");
        ee = expressionExperimentService.thawLiter(ee);
        mvr = ee.getMeanVarianceRelation();
        ExpressionDataDoubleMatrix intensities = meanVarianceServiceHelper.getIntensities(ee);
        if (intensities == null) {
            throw new IllegalStateException("Could not locate intensity matrix for " + ee.getShortName());
        }
        Collection<QuantitationType> qtList = expressionExperimentService.getPreferredQuantitationType(ee);
        QuantitationType qt;
        if (qtList.size() == 0) {
            log.error("Did not find any preferred quantitation type. Mean-variance relation was not computed.");
        } else {
            qt = qtList.iterator().next();
            if (qtList.size() > 1) {
                // Really this should be an error condition.
                log.warn("Found more than one preferred quantitation type. Only the first preferred quantitation type (" + qt + ") will be used.");
            }
            try {
                intensities = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(qt, intensities);
            } catch (UnknownLogScaleException e) {
                log.warn("Problem log transforming data. Check that the appropriate log scale is used. Mean-variance will be computed as is.");
            }
            mvr = calculateMeanVariance(intensities, mvr);
            meanVarianceServiceHelper.createMeanVariance(ee, mvr);
        }
    }
    log.info("Mean-variance computation is complete");
    return mvr;
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) MeanVarianceRelation(ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 8 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class PreprocessorServiceImpl method batchCorrect.

@Override
public void batchCorrect(ExpressionExperiment ee, boolean force) throws PreprocessingException {
    String note = "ComBat batch correction";
    String detail = null;
    /*
         * This leaves the raw data alone; it updates the processed data.
         */
    this.checkArrayDesign(ee);
    ee = expressionExperimentService.thawLite(ee);
    this.checkCorrectable(ee, force);
    /*
         * If there are predicted outliers, but which we decide are okay, we just go ahead.
         */
    if (!force) {
        this.checkOutliers(ee);
    } else {
        note = "[Forced]" + note;
        detail = "Batch correction skipped outlier check.";
        PreprocessorServiceImpl.log.warn(detail);
    }
    try {
        Collection<ProcessedExpressionDataVector> vecs = this.getProcessedExpressionDataVectors(ee);
        // TODO log-transform if not already, update QT. See https://github.com/PavlidisLab/Gemma/issues/50
        ExpressionDataDoubleMatrix correctedData = this.getCorrectedData(ee, vecs);
        // Convert to vectors
        processedExpressionDataVectorService.createProcessedDataVectors(ee, correctedData.toProcessedDataVectors());
        AuditEventType eventType = BatchCorrectionEvent.Factory.newInstance();
        String bConf = expressionExperimentService.getBatchConfound(ee);
        if (bConf != null && force) {
            String add = "Batch correction forced over a detected confound: " + bConf;
            // noinspection ConstantConditions // That is simply false.
            detail = (detail == null) ? add : detail + "\n" + add;
        }
        auditTrailService.addUpdateEvent(ee, eventType, note, detail);
        this.removeInvalidatedData(ee);
        this.processExceptForVectorCreate(ee);
    } catch (Exception e) {
        throw new PreprocessingException(e);
    }
}
Also used : AuditEventType(ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)

Example 9 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionDataSVD method equalize.

/**
 * Implements the method described in the SPELL paper, alternative interpretation as related by Q. Morris. Set all
 * components to have equal weight (set all singular values to 1)
 *
 * @return the reconstructed matrix; values that were missing before are re-masked.
 */
public ExpressionDataDoubleMatrix equalize() {
    DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
    for (int i = 0; i < copy.columns(); i++) {
        copy.set(i, i, 1.0);
    }
    double[][] rawU = svd.getU().getRawMatrix();
    double[][] rawS = copy.getRawMatrix();
    double[][] rawV = svd.getV().getRawMatrix();
    DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
    DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
    DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
    Algebra a = new Algebra();
    DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
    reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
    reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
    // re-mask the missing values.
    for (int i = 0; i < reconstructed.rows(); i++) {
        for (int j = 0; j < reconstructed.columns(); j++) {
            if (Double.isNaN(this.missingValueInfo.get(i, j))) {
                reconstructed.set(i, j, Double.NaN);
            }
        }
    }
    return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 10 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionDataSVD method winnow.

/**
 * Implements method described in Skillicorn et al., "Strategies for winnowing microarray data" (also section 3.5.5
 * of his book)
 *
 * @param thresholdQuantile Enter 0.5 for median. Value must be &gt; 0 and &lt; 1.
 * @return a filtered matrix
 */
public ExpressionDataDoubleMatrix winnow(double thresholdQuantile) {
    if (thresholdQuantile <= 0 || thresholdQuantile >= 1) {
        throw new IllegalArgumentException("Threshold quantile should be a value between 0 and 1 exclusive");
    }
    class NormCmp implements Comparable<NormCmp> {

        private Double norm;

        private int rowIndex;

        private NormCmp(int rowIndex, Double norm) {
            super();
            this.rowIndex = rowIndex;
            this.norm = norm;
        }

        @Override
        public int compareTo(NormCmp o) {
            return this.norm.compareTo(o.norm);
        }

        public int getRowIndex() {
            return rowIndex;
        }

        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + ((norm == null) ? 0 : norm.hashCode());
            return result;
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj)
                return true;
            if (obj == null)
                return false;
            if (this.getClass() != obj.getClass())
                return false;
            NormCmp other = (NormCmp) obj;
            if (norm == null) {
                return other.norm == null;
            } else
                return norm.equals(other.norm);
        }
    }
    // order rows by distance from the origin. This is proportional to the 1-norm.
    Algebra a = new Algebra();
    List<NormCmp> os = new ArrayList<>();
    for (int i = 0; i < this.expressionData.rows(); i++) {
        double[] row = this.getU().getRow(i);
        DoubleMatrix1D rom = new DenseDoubleMatrix1D(row);
        double norm1 = a.norm1(rom);
        os.add(new NormCmp(i, norm1));
    }
    Collections.sort(os);
    int quantileLimit = (int) Math.floor(this.expressionData.rows() * thresholdQuantile);
    quantileLimit = Math.max(0, quantileLimit);
    List<CompositeSequence> keepers = new ArrayList<>();
    for (int i = 0; i < quantileLimit; i++) {
        NormCmp x = os.get(i);
        CompositeSequence d = this.expressionData.getDesignElementForRow(x.getRowIndex());
        keepers.add(d);
    }
    // remove genes which are near the origin in SVD space. FIXME: make sure the missing values are still masked.
    return new ExpressionDataDoubleMatrix(this.expressionData, keepers);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleArrayList(cern.colt.list.DoubleArrayList) ArrayList(java.util.ArrayList) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D)

Aggregations

ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)41 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 Test (org.junit.Test)9 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)7 ArrayList (java.util.ArrayList)6 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)5 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)4 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 InputStream (java.io.InputStream)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)4 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)4 Algebra (cern.colt.matrix.linalg.Algebra)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)3 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)3