Search in sources :

Example 16 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method run.

@Override
public DifferentialExpressionAnalysis run(ExpressionExperimentSubSet subset, DifferentialExpressionAnalysisConfig config) {
    /*
         * Start by setting it up like the full experiment.
         */
    ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService.getProcessedExpressionDataMatrix(subset.getSourceExperiment());
    ExperimentalFactor ef = config.getSubsetFactor();
    Collection<BioMaterial> bmTmp = new HashSet<>();
    for (BioAssay ba : subset.getBioAssays()) {
        bmTmp.add(ba.getSampleUsed());
    }
    List<BioMaterial> samplesInSubset = new ArrayList<>(bmTmp);
    FactorValue subsetFactorValue = null;
    for (BioMaterial bm : samplesInSubset) {
        Collection<FactorValue> fvs = bm.getFactorValues();
        for (FactorValue fv : fvs) {
            if (fv.getExperimentalFactor().equals(ef)) {
                if (subsetFactorValue == null) {
                    subsetFactorValue = fv;
                } else if (!subsetFactorValue.equals(fv)) {
                    throw new IllegalStateException("This subset has more than one factor value for the supposed subset factor: " + fv + " and " + subsetFactorValue);
                }
            }
        }
    }
    samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
    // slice.
    ExpressionDataDoubleMatrix subsetMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
    Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(dmatrix, subset, config.getFactorsToInclude());
    if (subsetFactors.isEmpty()) {
        LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
        return null;
    }
    DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(config.getFactorsToInclude(), config, subsetFactorValue);
    DifferentialExpressionAnalysis analysis = this.doAnalysis(subset, subsetConfig, subsetMatrix, samplesInSubset, config.getFactorsToInclude(), subsetFactorValue);
    if (analysis == null) {
        throw new IllegalStateException("Subset could not be analyzed with config: " + config);
    }
    return analysis;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 17 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method makeSubSets.

private Map<FactorValue, ExpressionDataDoubleMatrix> makeSubSets(DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, ExperimentalFactor subsetFactor) {
    if (subsetFactor.getType().equals(FactorType.CONTINUOUS)) {
        throw new IllegalArgumentException("You cannot subset on a continuous factor (has a Measurement)");
    }
    if (config.getFactorsToInclude().contains(subsetFactor)) {
        throw new IllegalArgumentException("You cannot analyze a factor and use it for subsetting at the same time.");
    }
    Map<FactorValue, List<BioMaterial>> subSetSamples = new HashMap<>();
    for (FactorValue fv : subsetFactor.getFactorValues()) {
        assert fv.getMeasurement() == null;
        subSetSamples.put(fv, new ArrayList<BioMaterial>());
    }
    for (BioMaterial sample : samplesUsed) {
        boolean ok = false;
        for (FactorValue fv : sample.getFactorValues()) {
            if (fv.getExperimentalFactor().equals(subsetFactor)) {
                subSetSamples.get(fv).add(sample);
                ok = true;
                break;
            }
        }
        if (!ok) {
            throw new IllegalArgumentException("Cannot subset on a factor unless each sample has a value for it. Missing value for: " + sample + " " + sample.getBioAssaysUsedIn());
        }
    }
    Map<FactorValue, ExpressionDataDoubleMatrix> subMatrices = new HashMap<>();
    for (FactorValue fv : subSetSamples.keySet()) {
        List<BioMaterial> samplesInSubset = subSetSamples.get(fv);
        if (samplesInSubset.isEmpty()) {
            throw new IllegalArgumentException("The subset was empty for fv: " + fv);
        }
        assert samplesInSubset.size() < samplesUsed.size();
        samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
        ExpressionDataDoubleMatrix subMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
        subMatrices.put(fv, subMatrix);
    }
    return subMatrices;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)

Example 18 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ExpressionDataSVD method equalize.

/**
 * Implements the method described in the SPELL paper, alternative interpretation as related by Q. Morris. Set all
 * components to have equal weight (set all singular values to 1)
 *
 * @return the reconstructed matrix; values that were missing before are re-masked.
 */
public ExpressionDataDoubleMatrix equalize() {
    DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
    for (int i = 0; i < copy.columns(); i++) {
        copy.set(i, i, 1.0);
    }
    double[][] rawU = svd.getU().getRawMatrix();
    double[][] rawS = copy.getRawMatrix();
    double[][] rawV = svd.getV().getRawMatrix();
    DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
    DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
    DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
    Algebra a = new Algebra();
    DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
    reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
    reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
    // re-mask the missing values.
    for (int i = 0; i < reconstructed.rows(); i++) {
        for (int j = 0; j < reconstructed.columns(); j++) {
            if (Double.isNaN(this.missingValueInfo.get(i, j))) {
                reconstructed.set(i, j, Double.NaN);
            }
        }
    }
    return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 19 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateHelperServiceImpl method reorderByDesign.

@Override
@Transactional
public void reorderByDesign(Long eeId) {
    ExpressionExperiment ee = expressionExperimentDao.load(eeId);
    if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have a populated experimental design, skipping");
        return;
    }
    Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors();
    if (processedDataVectors.size() == 0) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have processed data");
        return;
    }
    Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee);
    if (dims.size() > 1) {
        this.checkAllBioAssayDimensionsMatch(dims);
    }
    BioAssayDimension bioassaydim = dims.iterator().next();
    List<BioMaterial> start = new ArrayList<>();
    for (BioAssay ba : bioassaydim.getBioAssays()) {
        start.add(ba.getSampleUsed());
    }
    /*
         * Get the ordering we want.
         */
    List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors());
    /*
         * Map of biomaterials to the new order index.
         */
    final Map<BioMaterial, Integer> ordering = new HashMap<>();
    int i = 0;
    for (BioMaterial bioMaterial : orderByExperimentalDesign) {
        ordering.put(bioMaterial, i);
        i++;
    }
    /*
         * Map of the original order to new order of bioassays.
         */
    Map<Integer, Integer> indexes = new HashMap<>();
    Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>();
    for (BioAssayDimension bioAssayDimension : dims) {
        Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
        assert bioAssays != null;
        /*
             * Initialize the new bioassay list.
             */
        List<BioAssay> resorted = new ArrayList<>(bioAssays.size());
        for (int m = 0; m < bioAssays.size(); m++) {
            resorted.add(null);
        }
        for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) {
            BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex);
            BioMaterial sam1 = bioAssay.getSampleUsed();
            if (ordering.containsKey(sam1)) {
                Integer newIndex = ordering.get(sam1);
                resorted.set(newIndex, bioAssay);
                /*
                     * Should be the same for all dimensions....
                     */
                assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex);
                indexes.put(oldIndex, newIndex);
            } else {
                throw new IllegalStateException();
            }
        }
        BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance();
        newBioAssayDimension.setBioAssays(resorted);
        newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design");
        newBioAssayDimension.setDescription("Data was reordered based on the experimental design.");
        newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension);
        old2new.put(bioAssayDimension, newBioAssayDimension);
    }
    ByteArrayConverter converter = new ByteArrayConverter();
    for (ProcessedExpressionDataVector v : processedDataVectors) {
        BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension());
        assert revisedBioAssayDimension != null;
        double[] data = converter.byteArrayToDoubles(v.getData());
        /*
             * Put the data in the order of the bioAssayDimension.
             */
        Double[] resortedData = new Double[data.length];
        for (int k = 0; k < data.length; k++) {
            resortedData[k] = data[indexes.get(k)];
        }
        v.setData(converter.toBytes(resortedData));
        v.setBioAssayDimension(revisedBioAssayDimension);
    }
    ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors");
    this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design");
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DoubleArrayList(cern.colt.list.DoubleArrayList) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DoubleArrayList(cern.colt.list.DoubleArrayList) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Transactional(org.springframework.transaction.annotation.Transactional)

Example 20 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateHelperServiceImpl method checkAllBioAssayDimensionsMatch.

/**
 * Make sure we have only one ordering!!! If the sample matching is botched, there will be problems.
 */
private void checkAllBioAssayDimensionsMatch(Collection<BioAssayDimension> dims) {
    ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Data set has more than one bioassaydimension for its processed data vectors");
    List<BioMaterial> ordering = new ArrayList<>();
    int i = 0;
    for (BioAssayDimension dim : dims) {
        int j = 0;
        for (BioAssay ba : dim.getBioAssays()) {
            BioMaterial sample = ba.getSampleUsed();
            if (i == 0) {
                ordering.add(sample);
            } else {
                if (!ordering.get(j).equals(sample)) {
                    throw new IllegalStateException("Two dimensions didn't have the same BioMaterial ordering for the same data set.");
                }
                j++;
            }
        }
        i++;
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DoubleArrayList(cern.colt.list.DoubleArrayList) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)132 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)67 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)27 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)22 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)19 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 HashSet (java.util.HashSet)13 Test (org.junit.Test)13 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)12 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)12 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)10 InputStream (java.io.InputStream)7 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)7 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)7 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)7 Characteristic (ubic.gemma.model.common.description.Characteristic)6 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)6 DoubleArrayList (cern.colt.list.DoubleArrayList)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)5 ArrayList (java.util.ArrayList)5