Search in sources :

Example 6 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method svd.

@Override
public SVDValueObject svd(ExpressionExperiment ee) {
    assert ee != null;
    Collection<ProcessedExpressionDataVector> vectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    if (vectors.isEmpty()) {
        throw new IllegalArgumentException("Experiment must have processed data already to do SVD");
    }
    processedExpressionDataVectorService.thaw(vectors);
    ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(vectors);
    SVDServiceHelperImpl.log.info("Starting SVD");
    ExpressionDataSVD svd = new ExpressionDataSVD(mat);
    SVDServiceHelperImpl.log.info("SVD done, postprocessing and storing results.");
    /*
         * Save the results
         */
    DoubleMatrix<Integer, BioMaterial> v = svd.getV();
    BioAssayDimension b = mat.getBestBioAssayDimension();
    PrincipalComponentAnalysis pca = this.updatePca(ee, svd, v, b);
    return this.svdFactorAnalysis(pca);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) PrincipalComponentAnalysis(ubic.gemma.model.analysis.expression.pca.PrincipalComponentAnalysis) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)

Example 7 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class GeoDatasetServiceTest method testFetchAndLoadGSE9048.

@Test
public void testFetchAndLoadGSE9048() throws Exception {
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
        Collection<?> results = geoService.fetchAndLoad("GSE9048", false, true, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        log.info("Test skipped because GSE9048 was already loaded - clean the DB before running the test");
        return;
    }
    ee = eeService.load(ee.getId());
    ee = this.eeService.thawLite(ee);
    aclTestUtils.checkEEAcls(ee);
    Collection<QuantitationType> qts = eeService.getQuantitationTypes(ee);
    assertEquals(16, qts.size());
    twoChannelMissingValues.computeMissingValues(ee);
    ee = eeService.load(ee.getId());
    ee = this.eeService.thawLite(ee);
    qts = eeService.getQuantitationTypes(ee);
    // 16 that were imported plus the detection call we added.
    assertEquals(17, qts.size());
    Collection<ProcessedExpressionDataVector> dataVectors = processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    assertEquals(10, dataVectors.size());
    ee = eeService.load(ee.getId());
    ee = this.eeService.thawLite(ee);
    qts = eeService.getQuantitationTypes(ee);
    assertEquals(18, qts.size());
    File f = dataFileService.writeOrLocateDataFile(ee, true, true);
    assertTrue(f.canRead());
    assertTrue(f.length() > 0);
}
Also used : ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) File(java.io.File) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 8 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class GeoDatasetServiceTest method testFetchAndLoadGSE18707.

/*
     * For bug 2312 - qts getting dropped.
     */
@Test
public void testFetchAndLoadGSE18707() throws Exception {
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
        Collection<?> results = geoService.fetchAndLoad("GSE18707", false, true, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        log.info("Test skipped because GSE18707 was already loaded - clean the DB before running the test");
        return;
    }
    // Mouse430A_2.
    ee = eeService.findByShortName("GSE18707");
    aclTestUtils.checkEEAcls(ee);
    Collection<QuantitationType> qts = eeService.getQuantitationTypes(ee);
    assertEquals(1, qts.size());
    QuantitationType qt = qts.iterator().next();
    assertEquals("Processed Affymetrix Rosetta intensity values", qt.getDescription());
    Collection<ProcessedExpressionDataVector> dataVectors = processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    assertEquals(100, dataVectors.size());
    ee = eeService.findByShortName("GSE18707");
    qts = eeService.getQuantitationTypes(ee);
    assertEquals(2, qts.size());
}
Also used : ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 9 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class BaseAnalyzerConfigurationTest method configureVectors.

void configureVectors(List<BioMaterial> bioMaterials, String resourcePath) throws Exception {
    this.vectors = new HashSet<>();
    DoubleMatrixReader r = new DoubleMatrixReader();
    String path;
    if (resourcePath == null) {
        path = "/data/stat-tests/anova-test-data.txt";
    } else {
        path = resourcePath;
    }
    DoubleMatrix<String, String> dataMatrix = r.read(this.getClass().getResourceAsStream(path));
    // RandomData randomData = new RandomDataImpl( new MersenneTwister( 0 ) ); // fixed seed - important!
    Collection<CompositeSequence> compositeSequences = new HashSet<>();
    for (int i = 0; i < BaseAnalyzerConfigurationTest.NUM_DESIGN_ELEMENTS; i++) {
        ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
        vector.setBioAssayDimension(bioAssayDimension);
        vector.setQuantitationType(quantitationType);
        CompositeSequence cs = CompositeSequence.Factory.newInstance();
        cs.setName(dataMatrix.getRowName(i));
        cs.setId(i + 1000L);
        cs.setArrayDesign(arrayDesign);
        vector.setDesignElement(cs);
        vector.setId(i + 10000L);
        double[] dvals = new double[bioMaterials.size()];
        for (int j = 0; j < dvals.length; j++) {
            dvals[j] = dataMatrix.get(i, j);
        }
        byte[] bvals = bac.doubleArrayToBytes(dvals);
        vector.setData(bvals);
        vectors.add(vector);
        compositeSequences.add(cs);
    }
    expressionExperiment.setProcessedExpressionDataVectors(vectors);
    arrayDesign.setCompositeSequences(compositeSequences);
}
Also used : ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) HashSet(java.util.HashSet)

Example 10 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class ExpressionExperimentPlatformSwitchService method runOldAd.

private void runOldAd(ExpressionExperiment ee, ArrayDesign arrayDesign, Map<BioSequence, Collection<CompositeSequence>> designElementMap, BioAssayDimension maxBAD, Map<CompositeSequence, Collection<BioAssayDimension>> usedDesignElements, ArrayDesign oldAd) {
    if (oldAd.equals(arrayDesign))
        return;
    oldAd = arrayDesignService.thaw(oldAd);
    if (oldAd.getCompositeSequences().size() == 0 && !oldAd.getTechnologyType().equals(TechnologyType.NONE)) {
        /*
             * Bug 3451 - this is okay if it is a RNA-seq experiment etc. prior to data upload.
             */
        throw new IllegalStateException(oldAd + " has no elements");
    }
    Collection<QuantitationType> qts = expressionExperimentService.getQuantitationTypes(ee, oldAd);
    ExpressionExperimentPlatformSwitchService.log.info("Processing " + qts.size() + " quantitation types for vectors on " + oldAd);
    for (QuantitationType type : qts) {
        // use each design element only once per quantitation type + bioassaydimension per array design
        usedDesignElements.clear();
        Collection<RawExpressionDataVector> rawForQt = this.getRawVectorsForOneQuantitationType(oldAd, type);
        Collection<ProcessedExpressionDataVector> processedForQt = this.getProcessedVectorsForOneQuantitationType(oldAd, type);
        if (// 
        (rawForQt == null || rawForQt.size() == 0) && (processedForQt == null || processedForQt.size() == 0)) {
            /*
                 * This can happen when the quantitation types vary for the array designs.
                 */
            ExpressionExperimentPlatformSwitchService.log.debug("No vectors for " + type + " on " + oldAd);
            continue;
        }
        // This check assures we do not mix raw and processed vectors further down the line
        if ((rawForQt != null && rawForQt.size() > 0) && (processedForQt != null && processedForQt.size() > 0)) {
            throw new IllegalStateException("Two types of vector for quantitationType " + type);
        }
        Collection<DesignElementDataVector> vectors = new HashSet<>();
        if (rawForQt != null) {
            vectors.addAll(rawForQt);
        }
        if (processedForQt != null) {
            vectors.addAll(processedForQt);
        }
        ExpressionExperimentPlatformSwitchService.log.info("Switching " + vectors.size() + " vectors for " + type + " from " + oldAd.getShortName() + " to " + arrayDesign.getShortName());
        int count = 0;
        // noinspection MismatchedQueryAndUpdateOfCollection // Only used for logging
        Collection<DesignElementDataVector> unMatched = new HashSet<>();
        for (DesignElementDataVector vector : vectors) {
            assert RawExpressionDataVector.class.isAssignableFrom(vector.getClass()) : "Unexpected class: " + vector.getClass().getName();
            CompositeSequence oldDe = vector.getDesignElement();
            if (oldDe.getArrayDesign().equals(arrayDesign)) {
                continue;
            }
            this.processVector(designElementMap, usedDesignElements, vector, maxBAD);
            if (++count % 20000 == 0) {
                ExpressionExperimentPlatformSwitchService.log.info("Found matches for " + count + " vectors for " + type);
            }
        }
        /*
             * This is bad.
             */
        if (unMatched.size() > 0) {
            throw new IllegalStateException("There were " + unMatched.size() + " vectors that couldn't be matched to the new design for: " + type + ", example: " + unMatched.iterator().next());
        }
        // Force collection update
        if (rawForQt != null && rawForQt.size() > 0) {
            int s = ee.getRawExpressionDataVectors().size();
            ee.getRawExpressionDataVectors().removeAll(rawForQt);
            assert s > ee.getRawExpressionDataVectors().size();
            ee.getRawExpressionDataVectors().addAll(rawForQt);
            assert s == ee.getRawExpressionDataVectors().size();
        } else if (processedForQt != null && processedForQt.size() > 0) {
            int s = ee.getProcessedExpressionDataVectors().size();
            ee.getProcessedExpressionDataVectors().removeAll(processedForQt);
            assert s > ee.getProcessedExpressionDataVectors().size();
            ee.getProcessedExpressionDataVectors().addAll(processedForQt);
            assert s == ee.getProcessedExpressionDataVectors().size();
        }
    }
}
Also used : RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)26 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)10 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)9 Test (org.junit.Test)8 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)8 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)8 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)7 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)6 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)6 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)5 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)5 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)5 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)5 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)5 HashSet (java.util.HashSet)4 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)4 File (java.io.File)3 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)3 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2