Search in sources :

Example 16 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class ProcessedExpressionDataCreateServiceTest method testReorder.

@Test
public void testReorder() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE404");
    if (old != null) {
        eeService.remove(old);
    }
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse404Short")));
        @SuppressWarnings("unchecked") Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE404", false, true, false);
        this.ee = results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        this.ee = (ExpressionExperiment) e.getData();
    }
    ee = this.eeService.thawLite(ee);
    processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance();
    factor.setType(FactorType.CATEGORICAL);
    factor.setName(ee.getShortName() + " design");
    factor.setExperimentalDesign(ee.getExperimentalDesign());
    factor = eeService.addFactor(ee, factor);
    FactorValue fv1 = FactorValue.Factory.newInstance();
    FactorValue fv2 = FactorValue.Factory.newInstance();
    fv1.setValue("foo");
    fv1.setExperimentalFactor(factor);
    fv2.setValue("bar");
    fv2.setIsBaseline(true);
    fv2.setExperimentalFactor(factor);
    eeService.addFactorValue(ee, fv1);
    eeService.addFactorValue(ee, fv2);
    List<BioAssay> basInOrder = new ArrayList<>(ee.getBioAssays());
    Collections.sort(basInOrder, new Comparator<BioAssay>() {

        @Override
        public int compare(BioAssay o1, BioAssay o2) {
            return o1.getId().compareTo(o2.getId());
        }
    });
    int i = 0;
    for (BioAssay ba : basInOrder) {
        // bioAssayService.thawRawAndProcessed( ba );
        BioMaterial bm = ba.getSampleUsed();
        assert fv1.getId() != null;
        if (!bm.getFactorValues().isEmpty()) {
            continue;
        }
        if (i % 2 == 0) {
            bm.getFactorValues().add(fv1);
        // log.info( bm + " " + bm.getId() + " => " + fv1 );
        } else {
            bm.getFactorValues().add(fv2);
        // log.info( bm + " " + bm.getId() + " => " + fv2 );
        }
        bioMaterialService.update(bm);
        i++;
    }
    factor = this.experimentalFactorService.load(factor.getId());
    assertEquals(2, factor.getFactorValues().size());
    /*
         * All that was setup. Now do the interesting bit
         */
    processedExpressionDataVectorService.reorderByDesign(ee.getId());
    /*
         * Now check the vectors...
         */
    Collection<ProcessedExpressionDataVector> resortedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    // ExpressionDataDoubleMatrix newMat = new ExpressionDataDoubleMatrix( resortedVectors );
    // log.info( newMat );
    boolean foundVector = false;
    assertTrue(resortedVectors.size() > 0);
    for (ProcessedExpressionDataVector vector : resortedVectors) {
        i = 0;
        log.debug(vector.getDesignElement().getName() + " .........................");
        // thawingto avoid lazy error because we are outside of transaction in this test. All references in code run
        // inside a transaction
        BioAssayDimension bioAssayDimension = vector.getBioAssayDimension();
        bioAssayDimensionService.thawLite(bioAssayDimension);
        Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
        for (BioAssay ba : bioAssays) {
            BioMaterial bm = ba.getSampleUsed();
            assertEquals(1, bm.getFactorValues().size());
            FactorValue fv = bm.getFactorValues().iterator().next();
            assertNotNull(fv.getId());
            log.debug(ba.getId() + " " + fv.getId() + " " + fv);
            if (i < 10) {
                // first because it is baseline;
                assertEquals(fv2, fv);
            }
            i++;
        }
        /*
             * spot check the data, same place as before.
             */
        if (vector.getDesignElement().getName().equals("40")) {
            foundVector = true;
            ByteArrayConverter conv = new ByteArrayConverter();
            Double[] d = ArrayUtils.toObject(conv.byteArrayToDoubles(vector.getData()));
            assertEquals(20, d.length);
            assertEquals(-0.08, d[1], 0.001);
            assertEquals(0.45, d[10], 0.001);
            assertEquals(Double.NaN, d[19], 0.001);
        }
    }
    assertTrue("test vector not found", foundVector);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 17 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class LinkAnalysis method getCorrelationDistribution.

public CoexpCorrelationDistribution getCorrelationDistribution() {
    CoexpCorrelationDistribution result = CoexpCorrelationDistribution.Factory.newInstance();
    DoubleArrayList histogramArrayList = this.metricMatrix.getHistogramArrayList();
    result.setNumBins(histogramArrayList.size());
    ByteArrayConverter bac = new ByteArrayConverter();
    result.setBinCounts(bac.doubleArrayToBytes(MatrixUtil.fromList(histogramArrayList).toArray()));
    return result;
}
Also used : CoexpCorrelationDistribution(ubic.gemma.model.analysis.expression.coexpression.CoexpCorrelationDistribution) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DoubleArrayList(cern.colt.list.DoubleArrayList)

Example 18 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class LinkAnalysisServiceImpl method diagnoseCorrelationDistribution.

/**
 * Check properties of the distribution
 */
// Better readability
@SuppressWarnings("StatementWithEmptyBody")
private void diagnoseCorrelationDistribution(ExpressionExperiment ee, CoexpCorrelationDistribution corrDist) throws UnsuitableForAnalysisException {
    /*
         * Find the median, etc.
         */
    ByteArrayConverter bac = new ByteArrayConverter();
    double[] binCounts = bac.byteArrayToDoubles(corrDist.getBinCounts());
    int numBins = binCounts.length;
    DoubleMatrix1D histogram = new DenseDoubleMatrix1D(binCounts);
    // QC parameters; quantile, not correlation
    double lowerLimitofMiddle = 0.45;
    double upperLimitofMiddle = 0.55;
    double tailFraction = 0.1;
    // normalize
    histogram.assign(Functions.div(histogram.zSum()));
    double lowerTailDensity = 0.0;
    double upperTailDensity = 0.0;
    double median = 0.0;
    // cumulative
    double s = 0.0;
    double middleDensity = 0.0;
    for (int bin = 0; bin < histogram.size(); bin++) {
        // cumulate
        s += histogram.get(bin);
        /*
             * Perhaps these should be adjusted based on the sample size; for smaller data sets, more of the data is
             * going to be above 0.9 etc. But in practice this can't have a very big effect.
             */
        if (bin == (int) Math.floor(numBins * tailFraction)) {
            lowerTailDensity = s;
        } else if (bin == (int) Math.floor(numBins * (1.0 - tailFraction))) {
            upperTailDensity = 1.0 - s;
        } else if (bin > (int) Math.floor(lowerLimitofMiddle * numBins) && bin < (int) Math.floor(upperLimitofMiddle * numBins)) {
            middleDensity += histogram.get(bin);
        }
        if (s >= 0.2) {
        // firstQuintile = binToCorrelation( i, numBins );
        } else if (s >= 0.5) {
            median = this.binToCorrelation(bin, numBins);
        } else if (s >= 0.8) {
        // lastQuintile = binToCorrelation( i, numBins );
        }
    }
    String message = "";
    boolean bad = false;
    if (median > 0.2 || median < -0.2) {
        bad = true;
        message = "Correlation distribution fails QC: median far from center (" + median + ")";
    } else if (lowerTailDensity + upperTailDensity > middleDensity) {
        bad = true;
        message = "Correlation distribution fails QC: tails too heavy";
    }
    if (bad) {
        throw new UnsuitableForAnalysisException(ee, message);
    }
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D)

Example 19 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class DataUpdater method makeNewVectors.

private Collection<RawExpressionDataVector> makeNewVectors(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data, QuantitationType qt) {
    ByteArrayConverter bArrayConverter = new ByteArrayConverter();
    Collection<RawExpressionDataVector> vectors = new HashSet<>();
    BioAssayDimension bioAssayDimension = data.getBestBioAssayDimension();
    assert bioAssayDimension != null;
    assert !bioAssayDimension.getBioAssays().isEmpty();
    bioAssayDimension = assayDimensionService.findOrCreate(bioAssayDimension);
    assert !bioAssayDimension.getBioAssays().isEmpty();
    for (int i = 0; i < data.rows(); i++) {
        byte[] bdata = bArrayConverter.doubleArrayToBytes(data.getRow(i));
        RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
        vector.setData(bdata);
        CompositeSequence cs = data.getRowElement(i).getDesignElement();
        if (cs == null) {
            continue;
        }
        if (!cs.getArrayDesign().equals(targetPlatform)) {
            throw new IllegalArgumentException("Input data must use the target platform (was: " + cs.getArrayDesign() + ", expected: " + targetPlatform);
        }
        vector.setDesignElement(cs);
        vector.setQuantitationType(qt);
        vector.setExpressionExperiment(ee);
        vector.setBioAssayDimension(bioAssayDimension);
        vectors.add(vector);
    }
    return vectors;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 20 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class GeoSuperSeriesLoadTest method testFetchAndLoadSuperSeriesB.

/*
     * See bug 2064. GSE14618 is a superseries of GSE14613 and GSE14615. This is actually even worse, because some
     * samples were run on both platforms. This is a situation we don't really want to handle completely.
     *
     */
@Test
public void testFetchAndLoadSuperSeriesB() throws Exception {
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse14618superser")));
    ee = ees.findByShortName("GSE14618");
    this.tearDown();
    // noinspection unchecked
    Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE14618", false, true, false, true, false);
    assertEquals(1, results.size());
    ee = results.iterator().next();
    ee = ees.findByShortName("GSE14618");
    ee = ees.thawLite(ee);
    Collection<QuantitationType> qts = ee.getQuantitationTypes();
    assertEquals(1, qts.size());
    Collection<ArrayDesign> arrayDesignsUsed = ees.getArrayDesignsUsed(ee);
    Collection<ArrayDesign> others = new HashSet<>();
    others.add((ArrayDesign) arrayDesignsUsed.toArray()[1]);
    ArrayDesign arrayDesign = (ArrayDesign) arrayDesignsUsed.toArray()[0];
    ArrayDesign merged = adms.merge(arrayDesign, others, RandomStringUtils.randomAlphabetic(5), RandomStringUtils.randomAlphabetic(5), false);
    ee = eepss.switchExperimentToArrayDesign(ee, merged);
    vms.mergeVectors(ee);
    ee = ees.load(ee.getId());
    ee = ees.findByShortName("GSE14618");
    ee = ees.thaw(ee);
    assertEquals(40, ee.getProcessedExpressionDataVectors().size());
    // System.err.println( ee.getProcessedExpressionDataVectors().size() );
    boolean found1 = false;
    boolean found2 = false;
    ByteArrayConverter bac = new ByteArrayConverter();
    for (ProcessedExpressionDataVector v : ee.getProcessedExpressionDataVectors()) {
        double[] dat = bac.byteArrayToDoubles(v.getData());
        int count = 0;
        assertEquals(92, dat.length);
        if (v.getDesignElement().getName().equals("117_at")) {
            found1 = true;
            for (double d : dat) {
                if (Double.isNaN(d)) {
                    count++;
                }
            }
            assertEquals("Should have been no missing values", 0, count);
        } else if (v.getDesignElement().getName().equals("1552279_a_at")) {
            found2 = true;
            for (double d : dat) {
                if (Double.isNaN(d)) {
                    count++;
                }
            }
            assertEquals("Wrong number of missing values", 42, count);
        }
    }
    assertTrue("Didn't find first test probe expected.", found1);
    assertTrue("Didn't find second test probe expected.", found2);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) Collection(java.util.Collection) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) HashSet(java.util.HashSet) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Aggregations

ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)32 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)11 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)11 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)10 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)9 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)5 Test (org.junit.Test)4 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)4 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)4 DoubleArrayList (cern.colt.list.DoubleArrayList)3 Transactional (org.springframework.transaction.annotation.Transactional)3 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)3 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)3 InputStream (java.io.InputStream)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2 XYSeries (org.jfree.data.xy.XYSeries)2