Search in sources :

Example 21 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class MeanVarianceServiceTest method testServiceCreateCountData.

@Test
public final void testServiceCreateCountData() throws Exception {
    // so it doesn't look for soft files
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
    ee = eeService.findByShortName("GSE29006");
    if (ee != null) {
        eeService.remove(ee);
    }
    assertNull(eeService.findByShortName("GSE29006"));
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
        ee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        throw new IllegalStateException("Need to remove this data set before test is run");
    }
    ee = eeService.thaw(ee);
    qt = this.createOrUpdateQt(ScaleType.COUNT);
    // Load the data from a text file.
    DoubleMatrixReader reader = new DoubleMatrixReader();
    try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
        InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
        DoubleMatrix<String, String> countMatrix = reader.read(countData);
        DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
        List<String> probeNames = countMatrix.getRowNames();
        // we have to find the right generic platform to use.
        ArrayDesign targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
        targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
        try {
            dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
            fail("Should have gotten an exception");
        } catch (IllegalArgumentException e) {
        // Expected
        }
        dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
    }
    ee = eeService.thaw(this.ee);
    assertNotNull(ee.getId());
    MeanVarianceRelation mvr = meanVarianceService.create(ee, true);
    // convert byte[] to array[]
    // warning: order may have changed
    double[] means = MeanVarianceServiceTest.bac.byteArrayToDoubles(mvr.getMeans());
    double[] variances = MeanVarianceServiceTest.bac.byteArrayToDoubles(mvr.getVariances());
    if (means != null) {
        Arrays.sort(means);
    }
    if (variances != null) {
        Arrays.sort(variances);
    }
    // check sizes
    int expectedMeanVarianceLength = 199;
    // NAs removed
    int expectedLowessLength = 197;
    assert means != null;
    assertEquals(expectedMeanVarianceLength, means.length);
    assert variances != null;
    assertEquals(expectedMeanVarianceLength, variances.length);
    int idx = 0;
    assertEquals(1.037011, means[idx], 0.0001);
    assertEquals(0.00023724336, variances[idx], 0.000001);
    idx = expectedLowessLength - 1;
    assertEquals(15.23313, means[idx], 0.0001);
    assertEquals(4.84529, variances[idx], 0.0001);
}
Also used : InputStream(java.io.InputStream) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) MeanVarianceRelation(ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 22 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class ProcessedExpressionDataCreateServiceTest method testReorder.

@Test
public void testReorder() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE404");
    if (old != null) {
        eeService.remove(old);
    }
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse404Short")));
        @SuppressWarnings("unchecked") Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE404", false, true, false);
        this.ee = results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        this.ee = (ExpressionExperiment) e.getData();
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance();
    factor.setType(FactorType.CATEGORICAL);
    factor.setName(ee.getShortName() + " design");
    factor.setExperimentalDesign(ee.getExperimentalDesign());
    factor = eeService.addFactor(ee, factor);
    FactorValue fv1 = FactorValue.Factory.newInstance();
    FactorValue fv2 = FactorValue.Factory.newInstance();
    fv1.setValue("foo");
    fv1.setExperimentalFactor(factor);
    fv2.setValue("bar");
    fv2.setIsBaseline(true);
    fv2.setExperimentalFactor(factor);
    eeService.addFactorValue(ee, fv1);
    eeService.addFactorValue(ee, fv2);
    List<BioAssay> basInOrder = new ArrayList<>(ee.getBioAssays());
    Collections.sort(basInOrder, new Comparator<BioAssay>() {

        @Override
        public int compare(BioAssay o1, BioAssay o2) {
            return o1.getId().compareTo(o2.getId());
        }
    });
    int i = 0;
    for (BioAssay ba : basInOrder) {
        // bioAssayService.thawRawAndProcessed( ba );
        BioMaterial bm = ba.getSampleUsed();
        assert fv1.getId() != null;
        if (!bm.getFactorValues().isEmpty()) {
            continue;
        }
        if (i % 2 == 0) {
            bm.getFactorValues().add(fv1);
        // log.info( bm + " " + bm.getId() + " => " + fv1 );
        } else {
            bm.getFactorValues().add(fv2);
        // log.info( bm + " " + bm.getId() + " => " + fv2 );
        }
        bioMaterialService.update(bm);
        i++;
    }
    factor = this.experimentalFactorService.load(factor.getId());
    assertEquals(2, factor.getFactorValues().size());
    /*
         * All that was setup. Now do the interesting bit
         */
    processedExpressionDataVectorService.reorderByDesign(ee.getId());
    /*
         * Now check the vectors...
         */
    Collection<ProcessedExpressionDataVector> resortedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    // ExpressionDataDoubleMatrix newMat = new ExpressionDataDoubleMatrix( resortedVectors );
    // log.info( newMat );
    boolean foundVector = false;
    assertTrue(resortedVectors.size() > 0);
    for (ProcessedExpressionDataVector vector : resortedVectors) {
        i = 0;
        log.debug(vector.getDesignElement().getName() + " .........................");
        // thawingto avoid lazy error because we are outside of transaction in this test. All references in code run
        // inside a transaction
        BioAssayDimension bioAssayDimension = vector.getBioAssayDimension();
        bioAssayDimensionService.thawLite(bioAssayDimension);
        Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
        for (BioAssay ba : bioAssays) {
            BioMaterial bm = ba.getSampleUsed();
            assertEquals(1, bm.getFactorValues().size());
            FactorValue fv = bm.getFactorValues().iterator().next();
            assertNotNull(fv.getId());
            log.debug(ba.getId() + " " + fv.getId() + " " + fv);
            if (i < 10) {
                // first because it is baseline;
                assertEquals(fv2, fv);
            }
            i++;
        }
        /*
             * spot check the data, same place as before.
             */
        if (vector.getDesignElement().getName().equals("40")) {
            foundVector = true;
            ByteArrayConverter conv = new ByteArrayConverter();
            Double[] d = ArrayUtils.toObject(conv.byteArrayToDoubles(vector.getData()));
            assertEquals(20, d.length);
            assertEquals(-0.08, d[1], 0.001);
            assertEquals(0.45, d[10], 0.001);
            assertEquals(Double.NaN, d[19], 0.001);
        }
    }
    assertTrue("test vector not found", foundVector);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 23 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class ProcessedExpressionDataCreateServiceTest method testComputeDevRankForExpressionExperimentMultiArrayWithGaps.

/**
 * Three platforms, one sample was not run on GPL81. It's 'Norm-1a', but the name we use for the sample is random.
 */
@SuppressWarnings("unchecked")
@Test
public void testComputeDevRankForExpressionExperimentMultiArrayWithGaps() throws Exception {
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse482short")));
        Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE482", false, true, false);
        this.ee = results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        this.ee = ((Collection<ExpressionExperiment>) e.getData()).iterator().next();
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    Collection<ProcessedExpressionDataVector> preferredVectors = this.processedExpressionDataVectorService.getProcessedDataVectors(ee);
    ee = eeService.load(ee.getId());
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.thaw(preferredVectors);
    ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(preferredVectors);
    assertEquals(10, mat.columns());
    boolean found = false;
    for (int i = 0; i < mat.rows(); i++) {
        Double[] row = mat.getRow(i);
        // debugging
        if (i == 0) {
            for (int j = 0; j < row.length; j++) {
                BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
                System.err.println(ba.getName());
            }
        }
        System.err.print(mat.getRowElement(i).getDesignElement().getName() + "\t");
        for (double d : row) {
            System.err.print(String.format("%4.2f\t", d));
        }
        System.err.print("\n");
        CompositeSequence el = mat.getDesignElementForRow(i);
        for (int j = 0; j < row.length; j++) {
            BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
            if (ba.getName().matches("PGA-MurLungHyper-Norm-1a[ABC]v2-s2") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
                assertEquals(Double.NaN, row[j], 0.0001);
                found = true;
            } else {
                assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
            }
        }
    }
    assertTrue(found);
    /*
         * Now do this through the processedExpressionDataVectorService
         */
    Collection<DoubleVectorValueObject> da = this.processedExpressionDataVectorService.getProcessedDataArrays(ee);
    assertEquals(30, da.size());
    found = false;
    boolean first = true;
    for (DoubleVectorValueObject v : da) {
        CompositeSequenceValueObject el = v.getDesignElement();
        double[] row = v.getData();
        // debugging
        if (first) {
            for (int j = 0; j < row.length; j++) {
                BioAssayValueObject ba = v.getBioAssays().get(j);
                System.err.println(ba.getName());
            }
            first = false;
        }
        System.err.print(el.getName() + "\t");
        for (double d : row) {
            System.err.print(String.format("%4.2f\t", d));
        }
        System.err.print("\n");
        assertEquals(10, row.length);
        for (int j = 0; j < row.length; j++) {
            assertNotNull(v.getBioAssays());
            BioAssayValueObject ba = v.getBioAssays().get(j);
            if (ba.getName().startsWith("Missing bioassay for biomaterial") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
                assertEquals(Double.NaN, row[j], 0.0001);
                found = true;
            } else {
                assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
            }
        }
    }
    assertTrue(found);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) CompositeSequenceValueObject(ubic.gemma.model.expression.designElement.CompositeSequenceValueObject) BioAssayValueObject(ubic.gemma.model.expression.bioAssay.BioAssayValueObject) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 24 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class ProcessedExpressionDataCreateServiceTest method testComputeDevRankForExpressionExperimentB.

@SuppressWarnings("unchecked")
@Test
public void testComputeDevRankForExpressionExperimentB() throws Exception {
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("GSE5949short")));
        Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE5949", false, true, false);
        this.ee = results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        this.ee = ((Collection<ExpressionExperiment>) e.getData()).iterator().next();
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    Collection<ProcessedExpressionDataVector> preferredVectors = this.processedExpressionDataVectorService.getProcessedDataVectors(ee);
    ee = eeService.load(ee.getId());
    ee = this.eeService.thawLite(ee);
    int numQts = ee.getQuantitationTypes().size();
    for (ProcessedExpressionDataVector d : preferredVectors) {
        assertTrue(d.getQuantitationType().getIsMaskedPreferred());
        assertTrue(ee.getQuantitationTypes().contains(d.getQuantitationType()));
        assertNotNull(d.getRankByMean());
        assertNotNull(d.getRankByMax());
    }
    assertNotNull(ee.getNumberOfDataVectors());
    ExpressionExperimentValueObject s = expressionExperimentReportService.generateSummary(ee.getId());
    assertNotNull(s);
    assertEquals(ee.getNumberOfDataVectors(), s.getProcessedExpressionVectorCount());
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    // repeat, make sure deleted old QTs.
    ee = eeService.load(ee.getId());
    ee = this.eeService.thawLite(ee);
    assertEquals(numQts, ee.getQuantitationTypes().size());
}
Also used : ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 25 with AlreadyExistsInSystemException

use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceTest method testComBatOnEE.

@Test
public void testComBatOnEE() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse18162Short")));
    ExpressionExperiment newee;
    try {
        Collection<?> results = geoService.fetchAndLoad("GSE18162", false, true, false);
        newee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        newee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
    }
    assertNotNull(newee);
    newee = expressionExperimentService.thawLite(newee);
    processedExpressionDataVectorService.computeProcessedExpressionData(newee);
    try (InputStream deis = this.getClass().getResourceAsStream("/data/loader/expression/geo/gse18162Short/design.txt")) {
        experimentalDesignImporter.importDesign(newee, deis);
    }
    ExpressionDataDoubleMatrix comBat = correctionService.comBat(newee);
    assertNotNull(comBat);
}
Also used : InputStream(java.io.InputStream) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) List(java.util.List) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Aggregations

AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)41 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)33 Test (org.junit.Test)29 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)29 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)27 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)10 Collection (java.util.Collection)9 HashSet (java.util.HashSet)8 Before (org.junit.Before)8 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)8 InputStream (java.io.InputStream)7 List (java.util.List)7 GeoDomainObjectGenerator (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)6 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)5 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)5 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)5 DoubleMatrixReader (ubic.basecode.io.reader.DoubleMatrixReader)4 File (java.io.File)3 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)3