Search in sources :

Example 6 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixTest method testMatrixConversion.

@Test
public void testMatrixConversion() throws Exception {
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("")));
        Collection<?> results = geoService.fetchAndLoad("GSE8294", false, true, false);
        newee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        newee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
    }
    newee = expressionExperimentService.thaw(newee);
    // make sure we really thawRawAndProcessed them, so we can get the design element sequences.
    Collection<RawExpressionDataVector> vectors = newee.getRawExpressionDataVectors();
    rawExpressionDataVectorService.thaw(vectors);
    ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(vectors);
    ExpressionDataDoubleMatrix matrix = builder.getPreferredData();
    assertTrue(!Double.isNaN(matrix.get(10, 0)));
    assertEquals(66, matrix.rows());
    assertEquals(9, matrix.columns());
    /*
         * Additional tests for files and outlier marking.
         */
    processedDataVectorService.computeProcessedExpressionData(newee);
    File f1 = expressionDataFileService.writeOrLocateDataFile(expressionExperimentService.load(newee.getId()), true, true);
    assertNotNull(f1);
    assertTrue(f1.exists());
    expressionDataFileService.deleteAllFiles(newee);
    assertTrue(!f1.exists());
    /*
         * outlier removal.
         */
    BioAssay tba = newee.getBioAssays().iterator().next();
    Collection<BioAssay> ol = new HashSet<>();
    ol.add(tba);
    sampleRemoveService.markAsMissing(ol);
    assertTrue(tba.getIsOutlier());
    newee = expressionExperimentService.thaw(newee);
    Collection<ProcessedExpressionDataVector> vecs = newee.getProcessedExpressionDataVectors();
    this.processedDataVectorService.thaw(vecs);
    assertTrue(!vecs.isEmpty());
    ExpressionDataMatrixBuilder matrixBuilder = new ExpressionDataMatrixBuilder(vecs);
    ExpressionDataDoubleMatrix data = matrixBuilder.getProcessedData();
    assertNotNull(data);
    assertTrue(Double.isNaN(data.getColumn(tba)[10]));
    sampleRemoveService.unmarkAsMissing(ol);
    newee = expressionExperimentService.load(newee.getId());
    newee = expressionExperimentService.thaw(newee);
    vecs = newee.getProcessedExpressionDataVectors();
    this.processedDataVectorService.thaw(vecs);
    assertTrue(!vecs.isEmpty());
    matrixBuilder = new ExpressionDataMatrixBuilder(vecs);
    data = matrixBuilder.getProcessedData();
    assertTrue(!tba.getIsOutlier());
    assertTrue(!Double.isNaN(data.getColumn(tba)[10]));
}
Also used : ExpressionDataMatrixBuilder(ubic.gemma.core.analysis.preprocess.ExpressionDataMatrixBuilder) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) File(java.io.File) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 7 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class DataUpdaterTest method testAddData.

@Test
public void testAddData() throws Exception {
    /*
         * Load a regular data set that has no data. Platform is (basically) irrelevant.
         */
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
    ExpressionExperiment ee;
    try {
        // RNA-seq data.
        Collection<?> results = geoService.fetchAndLoad("GSE37646", false, true, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        // log.warn( "Test skipped because GSE37646 was not removed from the system prior to test" );
        ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
    }
    ee = experimentService.thawLite(ee);
    List<BioAssay> bioAssays = new ArrayList<>(ee.getBioAssays());
    assertEquals(31, bioAssays.size());
    List<BioMaterial> bms = new ArrayList<>();
    for (BioAssay ba : bioAssays) {
        bms.add(ba.getSampleUsed());
    }
    targetArrayDesign = this.getTestPersistentArrayDesign(100, true);
    DoubleMatrix<CompositeSequence, BioMaterial> rawMatrix = new DenseDoubleMatrix<>(targetArrayDesign.getCompositeSequences().size(), bms.size());
    /*
         * make up some fake data on another platform, and match it to those samples
         */
    for (int i = 0; i < rawMatrix.rows(); i++) {
        for (int j = 0; j < rawMatrix.columns(); j++) {
            rawMatrix.set(i, j, (i + 1) * (j + 1) * Math.random() / 100.0);
        }
    }
    List<CompositeSequence> probes = new ArrayList<>(targetArrayDesign.getCompositeSequences());
    rawMatrix.setRowNames(probes);
    rawMatrix.setColumnNames(bms);
    QuantitationType qt = this.makeQt(true);
    ExpressionDataDoubleMatrix data = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
    assertNotNull(data.getBestBioAssayDimension());
    assertEquals(rawMatrix.columns(), data.getBestBioAssayDimension().getBioAssays().size());
    assertEquals(probes.size(), data.getMatrix().rows());
    /*
         * Replace it.
         */
    ee = dataUpdater.replaceData(ee, targetArrayDesign, data);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    ee = experimentService.thaw(ee);
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
    }
    assertEquals(100, ee.getRawExpressionDataVectors().size());
    for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
        assertTrue(v.getQuantitationType().getIsPreferred());
    }
    assertEquals(100, ee.getProcessedExpressionDataVectors().size());
    Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
    for (DoubleVectorValueObject v : processedDataArrays) {
        assertEquals(31, v.getBioAssays().size());
    }
    /*
         * Test adding data (non-preferred)
         */
    qt = this.makeQt(false);
    ExpressionDataDoubleMatrix moreData = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
    ee = dataUpdater.addData(ee, targetArrayDesign, moreData);
    ee = experimentService.thaw(ee);
    try {
        // add preferred data twice.
        dataUpdater.addData(ee, targetArrayDesign, data);
        fail("Should have gotten an exception");
    } catch (IllegalArgumentException e) {
    // okay.
    }
    dataUpdater.deleteData(ee, qt);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ArrayList(java.util.ArrayList) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 8 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method getVectors.

/**
 * Get the current set of vectors that need to be updated.
 *
 * @param expExp             ee
 * @param qts                - only used to check for problems.
 * @param allOldBioAssayDims old BA dims
 * @return map
 */
private Map<QuantitationType, Collection<RawExpressionDataVector>> getVectors(ExpressionExperiment expExp, Collection<QuantitationType> qts, Collection<BioAssayDimension> allOldBioAssayDims) {
    Collection<RawExpressionDataVector> oldVectors = new HashSet<>();
    for (BioAssayDimension dim : allOldBioAssayDims) {
        oldVectors.addAll(rawExpressionDataVectorService.find(dim));
    }
    if (oldVectors.isEmpty()) {
        throw new IllegalStateException("No vectors");
    }
    rawExpressionDataVectorService.thaw(oldVectors);
    Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = new HashMap<>();
    Collection<QuantitationType> qtsToAdd = new HashSet<>();
    for (RawExpressionDataVector v : oldVectors) {
        QuantitationType qt = v.getQuantitationType();
        if (!qts.contains(qt)) {
            /*
                 * Guard against QTs that are broken. Sometimes the QTs for the EE don't include the ones that the DEDVs
                 * have, due to corruption.
                 */
            qtsToAdd.add(qt);
        }
        if (!qt2Vec.containsKey(qt)) {
            qt2Vec.put(qt, new HashSet<RawExpressionDataVector>());
        }
        qt2Vec.get(qt).add(v);
    }
    if (!qtsToAdd.isEmpty()) {
        expExp.getQuantitationTypes().addAll(qtsToAdd);
        VectorMergingServiceImpl.log.info("Adding " + qtsToAdd.size() + " missing quantitation types to experiment");
        expressionExperimentService.update(expExp);
    }
    return qt2Vec;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 9 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method initializeNewVector.

/**
 * Make a (non-persistent) vector that has the right bioAssayDimension, designelement and quantitationtype.
 *
 * @param expExp   ee
 * @param newBioAd new BA dim
 * @param type     type
 * @param de       de
 * @return raw data vector
 */
private RawExpressionDataVector initializeNewVector(ExpressionExperiment expExp, BioAssayDimension newBioAd, QuantitationType type, CompositeSequence de) {
    RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
    vector.setBioAssayDimension(newBioAd);
    vector.setDesignElement(de);
    vector.setQuantitationType(type);
    vector.setExpressionExperiment(expExp);
    return vector;
}
Also used : RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)

Example 10 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method makeMergedData.

/**
 * @param sortedOldDims sorted old dims
 * @param newBioAd      new BA dims
 * @param type          type
 * @param de            de
 * @param dedvs         dedvs
 * @param mergedData    starts out empty, is initalized to the new data.
 * @return number of values missing
 */
private int makeMergedData(List<BioAssayDimension> sortedOldDims, BioAssayDimension newBioAd, QuantitationType type, CompositeSequence de, Collection<RawExpressionDataVector> dedvs, List<Object> mergedData) {
    int totalMissingInVector = 0;
    PrimitiveType representation = type.getRepresentation();
    for (BioAssayDimension oldDim : sortedOldDims) {
        // careful, the 'new' bioAssayDimension might be one of the old ones that we're reusing.
        if (oldDim.equals(newBioAd))
            continue;
        boolean found = false;
        for (RawExpressionDataVector oldV : dedvs) {
            assert oldV.getDesignElement().equals(de);
            assert oldV.getQuantitationType().equals(type);
            if (oldV.getBioAssayDimension().equals(oldDim)) {
                found = true;
                this.convertFromBytes(mergedData, representation, oldV);
                break;
            }
        }
        if (!found) {
            int missing = this.fillMissingValues(de, mergedData, oldDim, representation);
            totalMissingInVector += missing;
        }
    }
    return totalMissingInVector;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) PrimitiveType(ubic.gemma.model.common.quantitationtype.PrimitiveType)

Aggregations

RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)53 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)16 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)16 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)16 Test (org.junit.Test)15 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)9 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)7 InputStream (java.io.InputStream)6 Collection (java.util.Collection)6 HashSet (java.util.HashSet)6 GZIPInputStream (java.util.zip.GZIPInputStream)6 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)6 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 Transactional (org.springframework.transaction.annotation.Transactional)4