Search in sources :

Example 1 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixTest method testConstructExpressionDataDoubleMatrixWithGeoValues.

/**
 * This is a self-contained test. That is, it does not depend on the setup in onSetUpInTransaction}. It tests
 * creating an {@link ExpressionDataDoubleMatrix} using real values from the Gene Expression Omnibus (GEO). That is,
 * we have obtained information from GSE994. The probe sets used are 218120_s_at and 121_at, and the samples used
 * are GSM15697 and GSM15744. Specifically, we the Gemma objects that correspond to the GEO objects are:
 * DesignElement 1 = 218120_s_at, DesignElement 2 = 121_at
 * BioAssay 1 = "Current Smoker 73", BioAssay 2 = "Former Smoker 34"
 * BioMaterial 1 = "GSM15697", BioMaterial 2 = "GSM15744"
 * BioAssayDimension = "GSM15697, GSM15744" (the names of all the biomaterials).
 */
@Test
public void testConstructExpressionDataDoubleMatrixWithGeoValues() {
    ByteArrayConverter bac = new ByteArrayConverter();
    ee = ExpressionExperiment.Factory.newInstance();
    QuantitationType qt = QuantitationType.Factory.newInstance();
    qt.setName("VALUE");
    qt.setIsBackgroundSubtracted(false);
    qt.setIsNormalized(false);
    qt.setIsBackground(false);
    qt.setIsRatio(false);
    qt.setIsPreferred(true);
    qt.setIsMaskedPreferred(false);
    qt.setRepresentation(PrimitiveType.DOUBLE);
    BioAssayDimension bioAssayDimension = BioAssayDimension.Factory.newInstance();
    bioAssayDimension.setName("GSM15697, GSM15744");
    List<BioAssay> assays = new ArrayList<>();
    BioAssay assay1 = BioAssay.Factory.newInstance();
    assay1.setName("Current Smoker 73");
    BioMaterial sample1 = BioMaterial.Factory.newInstance();
    sample1.setName("GSM15697");
    assay1.setSampleUsed(sample1);
    assays.add(assay1);
    BioAssay assay2 = BioAssay.Factory.newInstance();
    assay2.setName("Former Smoker 34");
    BioMaterial sample2 = BioMaterial.Factory.newInstance();
    sample2.setName("GSM15744");
    assay2.setSampleUsed(sample2);
    assays.add(assay2);
    bioAssayDimension.setBioAssays(assays);
    RawExpressionDataVector vector1 = RawExpressionDataVector.Factory.newInstance();
    double[] ddata1 = { 74.9, 101.7 };
    byte[] bdata1 = bac.doubleArrayToBytes(ddata1);
    vector1.setData(bdata1);
    vector1.setQuantitationType(qt);
    vector1.setBioAssayDimension(bioAssayDimension);
    RawExpressionDataVector vector2 = RawExpressionDataVector.Factory.newInstance();
    double[] ddata2 = { 404.6, 318.7 };
    byte[] bdata2 = bac.doubleArrayToBytes(ddata2);
    vector2.setData(bdata2);
    vector2.setQuantitationType(qt);
    vector2.setBioAssayDimension(bioAssayDimension);
    ArrayDesign ad = ArrayDesign.Factory.newInstance();
    ad.setName("test ar");
    CompositeSequence de1 = CompositeSequence.Factory.newInstance();
    de1.setName("218120_s_at");
    vector1.setDesignElement(de1);
    BioSequence bs1 = BioSequence.Factory.newInstance();
    bs1.setName("test1");
    de1.setBiologicalCharacteristic(bs1);
    de1.setArrayDesign(ad);
    CompositeSequence de2 = CompositeSequence.Factory.newInstance();
    de2.setName("121_at");
    BioSequence bs2 = BioSequence.Factory.newInstance();
    bs2.setName("test2");
    de2.setBiologicalCharacteristic(bs2);
    de2.setArrayDesign(ad);
    vector2.setDesignElement(de2);
    Collection<RawExpressionDataVector> eeVectors = new LinkedHashSet<>();
    eeVectors.add(vector1);
    eeVectors.add(vector2);
    ee.setRawExpressionDataVectors(eeVectors);
    ExpressionDataDoubleMatrix expressionDataMatrix = new ExpressionDataDoubleMatrix(eeVectors);
    assertNotNull(expressionDataMatrix);
    assertEquals(expressionDataMatrix.rows(), 2);
    assertEquals(expressionDataMatrix.columns(), 2);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 2 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method print.

@SuppressWarnings("unused")
private void print(Collection<DesignElementDataVector> newVectors) {
    StringBuilder buf = new StringBuilder();
    ByteArrayConverter conv = new ByteArrayConverter();
    for (DesignElementDataVector vector : newVectors) {
        buf.append(vector.getDesignElement());
        QuantitationType qtype = vector.getQuantitationType();
        if (qtype.getRepresentation().equals(PrimitiveType.DOUBLE)) {
            double[] vals = conv.byteArrayToDoubles(vector.getData());
            for (double d : vals) {
                buf.append("\t").append(d);
            }
        } else if (qtype.getRepresentation().equals(PrimitiveType.INT)) {
            int[] vals = conv.byteArrayToInts(vector.getData());
            for (int i : vals) {
                buf.append("\t").append(i);
            }
        } else if (qtype.getRepresentation().equals(PrimitiveType.BOOLEAN)) {
            boolean[] vals = conv.byteArrayToBooleans(vector.getData());
            for (boolean d : vals) {
                buf.append("\t").append(d);
            }
        } else if (qtype.getRepresentation().equals(PrimitiveType.STRING)) {
            String[] vals = conv.byteArrayToStrings(vector.getData());
            for (String d : vals) {
                buf.append("\t").append(d);
            }
        }
        buf.append("\n");
    }
    VectorMergingServiceImpl.log.info("\n" + buf);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 3 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateHelperServiceImpl method reorderByDesign.

@Override
@Transactional
public void reorderByDesign(Long eeId) {
    ExpressionExperiment ee = expressionExperimentDao.load(eeId);
    if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have a populated experimental design, skipping");
        return;
    }
    Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors();
    if (processedDataVectors.size() == 0) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have processed data");
        return;
    }
    Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee);
    if (dims.size() > 1) {
        this.checkAllBioAssayDimensionsMatch(dims);
    }
    BioAssayDimension bioassaydim = dims.iterator().next();
    List<BioMaterial> start = new ArrayList<>();
    for (BioAssay ba : bioassaydim.getBioAssays()) {
        start.add(ba.getSampleUsed());
    }
    /*
         * Get the ordering we want.
         */
    List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors());
    /*
         * Map of biomaterials to the new order index.
         */
    final Map<BioMaterial, Integer> ordering = new HashMap<>();
    int i = 0;
    for (BioMaterial bioMaterial : orderByExperimentalDesign) {
        ordering.put(bioMaterial, i);
        i++;
    }
    /*
         * Map of the original order to new order of bioassays.
         */
    Map<Integer, Integer> indexes = new HashMap<>();
    Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>();
    for (BioAssayDimension bioAssayDimension : dims) {
        Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
        assert bioAssays != null;
        /*
             * Initialize the new bioassay list.
             */
        List<BioAssay> resorted = new ArrayList<>(bioAssays.size());
        for (int m = 0; m < bioAssays.size(); m++) {
            resorted.add(null);
        }
        for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) {
            BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex);
            BioMaterial sam1 = bioAssay.getSampleUsed();
            if (ordering.containsKey(sam1)) {
                Integer newIndex = ordering.get(sam1);
                resorted.set(newIndex, bioAssay);
                /*
                     * Should be the same for all dimensions....
                     */
                assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex);
                indexes.put(oldIndex, newIndex);
            } else {
                throw new IllegalStateException();
            }
        }
        BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance();
        newBioAssayDimension.setBioAssays(resorted);
        newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design");
        newBioAssayDimension.setDescription("Data was reordered based on the experimental design.");
        newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension);
        old2new.put(bioAssayDimension, newBioAssayDimension);
    }
    ByteArrayConverter converter = new ByteArrayConverter();
    for (ProcessedExpressionDataVector v : processedDataVectors) {
        BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension());
        assert revisedBioAssayDimension != null;
        double[] data = converter.byteArrayToDoubles(v.getData());
        /*
             * Put the data in the order of the bioAssayDimension.
             */
        Double[] resortedData = new Double[data.length];
        for (int k = 0; k < data.length; k++) {
            resortedData[k] = data[indexes.get(k)];
        }
        v.setData(converter.toBytes(resortedData));
        v.setBioAssayDimension(revisedBioAssayDimension);
    }
    ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors");
    this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design");
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DoubleArrayList(cern.colt.list.DoubleArrayList) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DoubleArrayList(cern.colt.list.DoubleArrayList) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Transactional(org.springframework.transaction.annotation.Transactional)

Example 4 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method print.

/**
 * Debug code.
 */
@SuppressWarnings("unused")
private void print(Collection<RawExpressionDataVector> calls) {
    ByteArrayConverter bac = new ByteArrayConverter();
    BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
    System.err.print("\n");
    for (BioAssay bas : dim.getBioAssays()) {
        System.err.print("\t" + bas);
    }
    System.err.print("\n");
    for (DesignElementDataVector vector : calls) {
        System.err.print(vector.getDesignElement());
        byte[] dat = vector.getData();
        boolean[] row = bac.byteArrayToBooleans(dat);
        for (boolean b : row) {
            System.err.print("\t" + b);
        }
        System.err.print("\n");
    }
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 5 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method testMissingValue.

@Test
public void testMissingValue() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE2221");
    if (old != null)
        eeService.remove(old);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/shortGenePix/GSE2221_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE2221");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    Object result = this.gc.convert(series);
    assertNotNull(result);
    ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
    expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
    Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
    assertEquals(500, calls.size());
    BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
    // Spot check the results. For sample ME-TMZ, ID #27 should be 'true' and 26 should be false.
    ByteArrayConverter bac = new ByteArrayConverter();
    boolean foundA = false;
    boolean foundB = false;
    for (DesignElementDataVector vector : calls) {
        if (vector.getDesignElement().getName().equals("26")) {
            byte[] dat = vector.getData();
            boolean[] row = bac.byteArrayToBooleans(dat);
            int i = 0;
            for (BioAssay bas : dim.getBioAssays()) {
                if (bas.getName().equals("expression array ME-TMZ")) {
                    assertTrue(!row[i]);
                    foundA = true;
                }
                i++;
            }
        }
        if (vector.getDesignElement().getName().equals("27")) {
            byte[] dat = vector.getData();
            boolean[] row = bac.byteArrayToBooleans(dat);
            int i = 0;
            for (BioAssay bas : dim.getBioAssays()) {
                if (bas.getName().equals("expression array ME-TMZ")) {
                    assertTrue(row[i]);
                    foundB = true;
                }
                i++;
            }
        }
    }
    assertTrue(foundA && foundB);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GZIPInputStream(java.util.zip.GZIPInputStream) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) Collection(java.util.Collection) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Aggregations

ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)32 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)11 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)11 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)10 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)9 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)5 Test (org.junit.Test)4 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)4 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)4 DoubleArrayList (cern.colt.list.DoubleArrayList)3 Transactional (org.springframework.transaction.annotation.Transactional)3 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)3 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)3 InputStream (java.io.InputStream)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2 XYSeries (org.jfree.data.xy.XYSeries)2