Search in sources :

Example 1 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class PersistentDummyObjectHelper method getDesignElementDataVectors.

/**
 * @param bioAssays         BAs
 * @param ad                AD
 * @param ee                EE
 * @param quantitationTypes QTs
 * @return These are non-persistent
 */
private Collection<RawExpressionDataVector> getDesignElementDataVectors(ExpressionExperiment ee, Collection<QuantitationType> quantitationTypes, List<BioAssay> bioAssays, ArrayDesign ad) {
    BioAssayDimension baDim = BioAssayDimension.Factory.newInstance(ee.getShortName() + "_" + RandomStringUtils.randomAlphanumeric(20), null, bioAssays);
    Collection<RawExpressionDataVector> vectors = new HashSet<>();
    for (QuantitationType quantType : quantitationTypes) {
        for (CompositeSequence cs : ad.getCompositeSequences()) {
            RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
            byte[] bdata = this.getDoubleData();
            vector.setData(bdata);
            vector.setDesignElement(cs);
            assert cs.getArrayDesign() != null;
            vector.setExpressionExperiment(ee);
            vector.setQuantitationType(quantType);
            vector.setBioAssayDimension(baDim);
            vectors.add(vector);
        }
    }
    return vectors;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 2 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixTest method testConstructExpressionDataDoubleMatrixWithGeoValues.

/**
 * This is a self-contained test. That is, it does not depend on the setup in onSetUpInTransaction}. It tests
 * creating an {@link ExpressionDataDoubleMatrix} using real values from the Gene Expression Omnibus (GEO). That is,
 * we have obtained information from GSE994. The probe sets used are 218120_s_at and 121_at, and the samples used
 * are GSM15697 and GSM15744. Specifically, we the Gemma objects that correspond to the GEO objects are:
 * DesignElement 1 = 218120_s_at, DesignElement 2 = 121_at
 * BioAssay 1 = "Current Smoker 73", BioAssay 2 = "Former Smoker 34"
 * BioMaterial 1 = "GSM15697", BioMaterial 2 = "GSM15744"
 * BioAssayDimension = "GSM15697, GSM15744" (the names of all the biomaterials).
 */
@Test
public void testConstructExpressionDataDoubleMatrixWithGeoValues() {
    ByteArrayConverter bac = new ByteArrayConverter();
    ee = ExpressionExperiment.Factory.newInstance();
    QuantitationType qt = QuantitationType.Factory.newInstance();
    qt.setName("VALUE");
    qt.setIsBackgroundSubtracted(false);
    qt.setIsNormalized(false);
    qt.setIsBackground(false);
    qt.setIsRatio(false);
    qt.setIsPreferred(true);
    qt.setIsMaskedPreferred(false);
    qt.setRepresentation(PrimitiveType.DOUBLE);
    BioAssayDimension bioAssayDimension = BioAssayDimension.Factory.newInstance();
    bioAssayDimension.setName("GSM15697, GSM15744");
    List<BioAssay> assays = new ArrayList<>();
    BioAssay assay1 = BioAssay.Factory.newInstance();
    assay1.setName("Current Smoker 73");
    BioMaterial sample1 = BioMaterial.Factory.newInstance();
    sample1.setName("GSM15697");
    assay1.setSampleUsed(sample1);
    assays.add(assay1);
    BioAssay assay2 = BioAssay.Factory.newInstance();
    assay2.setName("Former Smoker 34");
    BioMaterial sample2 = BioMaterial.Factory.newInstance();
    sample2.setName("GSM15744");
    assay2.setSampleUsed(sample2);
    assays.add(assay2);
    bioAssayDimension.setBioAssays(assays);
    RawExpressionDataVector vector1 = RawExpressionDataVector.Factory.newInstance();
    double[] ddata1 = { 74.9, 101.7 };
    byte[] bdata1 = bac.doubleArrayToBytes(ddata1);
    vector1.setData(bdata1);
    vector1.setQuantitationType(qt);
    vector1.setBioAssayDimension(bioAssayDimension);
    RawExpressionDataVector vector2 = RawExpressionDataVector.Factory.newInstance();
    double[] ddata2 = { 404.6, 318.7 };
    byte[] bdata2 = bac.doubleArrayToBytes(ddata2);
    vector2.setData(bdata2);
    vector2.setQuantitationType(qt);
    vector2.setBioAssayDimension(bioAssayDimension);
    ArrayDesign ad = ArrayDesign.Factory.newInstance();
    ad.setName("test ar");
    CompositeSequence de1 = CompositeSequence.Factory.newInstance();
    de1.setName("218120_s_at");
    vector1.setDesignElement(de1);
    BioSequence bs1 = BioSequence.Factory.newInstance();
    bs1.setName("test1");
    de1.setBiologicalCharacteristic(bs1);
    de1.setArrayDesign(ad);
    CompositeSequence de2 = CompositeSequence.Factory.newInstance();
    de2.setName("121_at");
    BioSequence bs2 = BioSequence.Factory.newInstance();
    bs2.setName("test2");
    de2.setBiologicalCharacteristic(bs2);
    de2.setArrayDesign(ad);
    vector2.setDesignElement(de2);
    Collection<RawExpressionDataVector> eeVectors = new LinkedHashSet<>();
    eeVectors.add(vector1);
    eeVectors.add(vector2);
    ee.setRawExpressionDataVectors(eeVectors);
    ExpressionDataDoubleMatrix expressionDataMatrix = new ExpressionDataDoubleMatrix(eeVectors);
    assertNotNull(expressionDataMatrix);
    assertEquals(expressionDataMatrix.rows(), 2);
    assertEquals(expressionDataMatrix.columns(), 2);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 3 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method combineBioAssayDimensions.

/**
 * Create a new one or use an existing one. (an existing one might be found if this process was started once before
 * and aborted partway through).
 *
 * @param oldDims in the sort order to be used.
 * @return BA dim
 */
private BioAssayDimension combineBioAssayDimensions(List<BioAssayDimension> oldDims) {
    List<BioAssay> bioAssays = new ArrayList<>();
    for (BioAssayDimension bioAd : oldDims) {
        for (BioAssay bioAssay : bioAd.getBioAssays()) {
            if (bioAssays.contains(bioAssay)) {
                throw new IllegalStateException("Duplicate bioassay for biodimension: " + bioAssay + "; inspecting " + oldDims.size() + " BioAssayDimensions");
            }
            bioAssays.add(bioAssay);
        }
    }
    // first see if we already have an equivalent one.
    boolean found = true;
    for (BioAssayDimension newDim : oldDims) {
        // size should be the same.
        List<BioAssay> assaysInExisting = newDim.getBioAssays();
        if (assaysInExisting.size() != bioAssays.size()) {
            continue;
        }
        for (int i = 0; i < bioAssays.size(); i++) {
            if (!assaysInExisting.get(i).equals(bioAssays.get(i))) {
                found = false;
                break;
            }
        }
        if (!found)
            continue;
        VectorMergingServiceImpl.log.info("Already have a dimension created that fits the bill - removing it from the 'old' list.");
        oldDims.remove(newDim);
        return newDim;
    }
    BioAssayDimension newBioAd = BioAssayDimension.Factory.newInstance();
    newBioAd.setName("");
    newBioAd.setDescription(VectorMergingServiceImpl.MERGED_DIM_DESC_PREFIX + " " + oldDims.size() + " dimensions: ");
    for (BioAssayDimension bioAd : oldDims) {
        newBioAd.setName(newBioAd.getName() + bioAd.getName() + " ");
        newBioAd.setDescription(newBioAd.getDescription() + bioAd.getName() + " ");
    }
    newBioAd.setName(StringUtils.abbreviate(newBioAd.getName(), 255));
    newBioAd.setBioAssays(bioAssays);
    newBioAd = bioAssayDimensionService.create(newBioAd);
    VectorMergingServiceImpl.log.info("Created new bioAssayDimension with " + newBioAd.getBioAssays().size() + " bioassays.");
    return newBioAd;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 4 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method getVectors.

/**
 * Get the current set of vectors that need to be updated.
 *
 * @param expExp             ee
 * @param qts                - only used to check for problems.
 * @param allOldBioAssayDims old BA dims
 * @return map
 */
private Map<QuantitationType, Collection<RawExpressionDataVector>> getVectors(ExpressionExperiment expExp, Collection<QuantitationType> qts, Collection<BioAssayDimension> allOldBioAssayDims) {
    Collection<RawExpressionDataVector> oldVectors = new HashSet<>();
    for (BioAssayDimension dim : allOldBioAssayDims) {
        oldVectors.addAll(rawExpressionDataVectorService.find(dim));
    }
    if (oldVectors.isEmpty()) {
        throw new IllegalStateException("No vectors");
    }
    rawExpressionDataVectorService.thaw(oldVectors);
    Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = new HashMap<>();
    Collection<QuantitationType> qtsToAdd = new HashSet<>();
    for (RawExpressionDataVector v : oldVectors) {
        QuantitationType qt = v.getQuantitationType();
        if (!qts.contains(qt)) {
            /*
                 * Guard against QTs that are broken. Sometimes the QTs for the EE don't include the ones that the DEDVs
                 * have, due to corruption.
                 */
            qtsToAdd.add(qt);
        }
        if (!qt2Vec.containsKey(qt)) {
            qt2Vec.put(qt, new HashSet<RawExpressionDataVector>());
        }
        qt2Vec.get(qt).add(v);
    }
    if (!qtsToAdd.isEmpty()) {
        expExp.getQuantitationTypes().addAll(qtsToAdd);
        VectorMergingServiceImpl.log.info("Adding " + qtsToAdd.size() + " missing quantitation types to experiment");
        expressionExperimentService.update(expExp);
    }
    return qt2Vec;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 5 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method makeMergedData.

/**
 * @param sortedOldDims sorted old dims
 * @param newBioAd      new BA dims
 * @param type          type
 * @param de            de
 * @param dedvs         dedvs
 * @param mergedData    starts out empty, is initalized to the new data.
 * @return number of values missing
 */
private int makeMergedData(List<BioAssayDimension> sortedOldDims, BioAssayDimension newBioAd, QuantitationType type, CompositeSequence de, Collection<RawExpressionDataVector> dedvs, List<Object> mergedData) {
    int totalMissingInVector = 0;
    PrimitiveType representation = type.getRepresentation();
    for (BioAssayDimension oldDim : sortedOldDims) {
        // careful, the 'new' bioAssayDimension might be one of the old ones that we're reusing.
        if (oldDim.equals(newBioAd))
            continue;
        boolean found = false;
        for (RawExpressionDataVector oldV : dedvs) {
            assert oldV.getDesignElement().equals(de);
            assert oldV.getQuantitationType().equals(type);
            if (oldV.getBioAssayDimension().equals(oldDim)) {
                found = true;
                this.convertFromBytes(mergedData, representation, oldV);
                break;
            }
        }
        if (!found) {
            int missing = this.fillMissingValues(de, mergedData, oldDim, representation);
            totalMissingInVector += missing;
        }
    }
    return totalMissingInVector;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) PrimitiveType(ubic.gemma.model.common.quantitationtype.PrimitiveType)

Aggregations

BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)59 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)29 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)20 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)15 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)15 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)15 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)9 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)9 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 HashSet (java.util.HashSet)4 Test (org.junit.Test)4 Transactional (org.springframework.transaction.annotation.Transactional)4 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2