Search in sources :

Example 11 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method print.

/**
 * Debug code.
 */
@SuppressWarnings("unused")
private void print(Collection<RawExpressionDataVector> calls) {
    ByteArrayConverter bac = new ByteArrayConverter();
    BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
    System.err.print("\n");
    for (BioAssay bas : dim.getBioAssays()) {
        System.err.print("\t" + bas);
    }
    System.err.print("\n");
    for (DesignElementDataVector vector : calls) {
        System.err.print(vector.getDesignElement());
        byte[] dat = vector.getData();
        boolean[] row = bac.byteArrayToBooleans(dat);
        for (boolean b : row) {
            System.err.print("\t" + b);
        }
        System.err.print("\n");
    }
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 12 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesTest method testMissingValue.

@Test
public void testMissingValue() throws Exception {
    ExpressionExperiment old = eeService.findByShortName("GSE2221");
    if (old != null)
        eeService.remove(old);
    InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/shortGenePix/GSE2221_family.soft.gz"));
    GeoFamilyParser parser = new GeoFamilyParser();
    parser.parse(is);
    GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE2221");
    DatasetCombiner datasetCombiner = new DatasetCombiner();
    GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
    series.setSampleCorrespondence(correspondence);
    Object result = this.gc.convert(series);
    assertNotNull(result);
    ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
    expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
    Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
    assertEquals(500, calls.size());
    BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
    // Spot check the results. For sample ME-TMZ, ID #27 should be 'true' and 26 should be false.
    ByteArrayConverter bac = new ByteArrayConverter();
    boolean foundA = false;
    boolean foundB = false;
    for (DesignElementDataVector vector : calls) {
        if (vector.getDesignElement().getName().equals("26")) {
            byte[] dat = vector.getData();
            boolean[] row = bac.byteArrayToBooleans(dat);
            int i = 0;
            for (BioAssay bas : dim.getBioAssays()) {
                if (bas.getName().equals("expression array ME-TMZ")) {
                    assertTrue(!row[i]);
                    foundA = true;
                }
                i++;
            }
        }
        if (vector.getDesignElement().getName().equals("27")) {
            byte[] dat = vector.getData();
            boolean[] row = bac.byteArrayToBooleans(dat);
            int i = 0;
            for (BioAssay bas : dim.getBioAssays()) {
                if (bas.getName().equals("expression array ME-TMZ")) {
                    assertTrue(row[i]);
                    foundB = true;
                }
                i++;
            }
        }
    }
    assertTrue(foundA && foundB);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) GeoSeries(ubic.gemma.core.loader.expression.geo.model.GeoSeries) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) GZIPInputStream(java.util.zip.GZIPInputStream) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) Collection(java.util.Collection) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 13 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSortTest method testOrderByExperimentalDesignB.

@Test
public void testOrderByExperimentalDesignB() {
    BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
    /*
         * Five factors. Factor4 is a measurmeent.
         */
    Collection<ExperimentalFactor> factors = new HashSet<>();
    for (int i = 0; i < 5; i++) {
        ExperimentalFactor ef = ExperimentalFactor.Factory.newInstance();
        ef.setType(FactorType.CATEGORICAL);
        ef.setName("factor" + i);
        if (i == 4) {
            ef.setName("mfact" + i);
        }
        ef.setId((long) i);
        for (int j = 0; j < 3; j++) {
            FactorValue fv = FactorValue.Factory.newInstance();
            fv.setValue("fv" + (j + 1) * (i + 1));
            fv.setId((long) (j + 1) * (i + 1));
            fv.setExperimentalFactor(ef);
            ef.getFactorValues().add(fv);
            if (j == 2 && i != 4) {
                fv.setValue("control_group");
            }
            if (i == 4) {
                ef.setType(FactorType.CONTINUOUS);
                Measurement m = Measurement.Factory.newInstance();
                m.setId((long) j * (i + 1));
                m.setValue(j + ".00");
                m.setRepresentation(PrimitiveType.DOUBLE);
                fv.setMeasurement(m);
            }
        }
        factors.add(ef);
    }
    Random random = new Random();
    for (int i = 0; i < 100; i++) {
        BioAssay ba = BioAssay.Factory.newInstance();
        ba.setName("ba" + i);
        ba.setId((long) i);
        bad.getBioAssays().add(ba);
        BioMaterial bm = BioMaterial.Factory.newInstance();
        bm.setId((long) i);
        bm.setName("bm" + i);
        ba.setSampleUsed(bm);
        for (ExperimentalFactor ef : factors) {
            /*
                 * Note: if we use 4, then some of the biomaterials will not have a factorvalue for each factor. This is
                 * realistic. Use 3 to fill it in completely.
                 */
            int k = random.nextInt(4);
            int m = 0;
            FactorValue toUse = null;
            for (FactorValue fv : ef.getFactorValues()) {
                if (m == k) {
                    toUse = fv;
                    break;
                }
                m++;
            }
            if (toUse != null)
                bm.getFactorValues().add(toUse);
        // log.info( ba + " -> " + bm + " -> " + ef + " -> " + toUse );
        }
    }
    EmptyExpressionMatrix mat = new EmptyExpressionMatrix(bad);
    assertEquals(100, mat.columns());
    List<BioMaterial> ordered = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(mat);
    assertEquals(100, ordered.size());
// for ( BioMaterial bioMaterial : ordered ) {
// log.info( bioMaterial + " .... " + StringUtils.join( bioMaterial.getFactorValues(), "  --- " ) );
// }
}
Also used : Measurement(ubic.gemma.model.common.measurement.Measurement) BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) Random(java.util.Random) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) HashSet(java.util.HashSet) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 14 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class BaseExpressionDataMatrix method getBestBioAssayDimension.

@Override
public BioAssayDimension getBestBioAssayDimension() {
    Collection<BioAssayDimension> dims = new HashSet<>(this.bioAssayDimensions.values());
    BioAssayDimension b = dims.iterator().next();
    if (dims.size() > 1) {
        /*
             * Special complication if there is more than one BioAssayDimension
             */
        int s = -1;
        Collection<BioMaterial> allBioMaterials = new HashSet<>();
        // find the largest BioAssayDimension
        for (BioAssayDimension bioAssayDimension : dims) {
            if (bioAssayDimension.getBioAssays().size() > s) {
                s = bioAssayDimension.getBioAssays().size();
                b = bioAssayDimension;
            }
            for (BioAssay ba : b.getBioAssays()) {
                allBioMaterials.add(ba.getSampleUsed());
            }
        }
        for (BioAssay ba : b.getBioAssays()) {
            if (!allBioMaterials.contains(ba.getSampleUsed())) {
                /*
                     * In rare cases none of the usual ones has all the samples.
                     *
                     * This can also happen if the data are not sample-matched or vector-merged
                     */
                throw new IllegalStateException("Could not find an appropriate BioAssayDimension to represent the data matrix; data might need to be matched or merged");
            }
        }
    }
    return b;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 15 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class BaseExpressionDataMatrix method setUpColumnElements.

/**
 * <p>
 * Note: In the current versions of Gemma, we require that there can be only a single BioAssayDimension. Thus this
 * code is overly complex. If an experiment has multiple BioAssayDimensions (due to multiple arrays), we merge the
 * vectors (e.g., needed in the last case shown below). However, the issue of having multiple "BioMaterials" per
 * "BioAssay" still exists.
 * <p>
 * Deals with the fact that the bioassay dimensions can vary in size, and don't even need to overlap in the
 * biomaterials used. In the case where there is a single BioAssayDimension this reduces to simply associating each
 * column with a bioassay (though we are forced to use an integer under the hood).
 * <p>
 * For example, in the following diagram "-" indicates a biomaterial, while "*" indicates a bioassay. Each row of
 * "*" indicates samples run on a different microarray design (a different bio assay material). In the examples we
 * assume there is just a single biomaterial dimension.
 * <pre>
 * ---------------
 * *****              -- only a few samples run on this platform
 *  **********        -- ditto
 *            ****    -- these samples were not run on any of the other platforms .
 * </pre>
 * <p>
 * A simpler case:
 * </p>
 * <pre>
 * ---------------
 * ***************
 * ***********
 * *******
 * </pre>
 * <p>
 * A more typical and easy case (one microarray design used):
 * </p>
 * <pre>
 * ----------------
 * ****************
 * </pre>
 * <p>
 * If every sample was run on two different array designs:
 * </p>
 * <pre>
 * ----------------
 * ****************
 * ****************
 * </pre>
 * <p>
 * Every sample was run on a different array design:
 * <pre>
 * -----------------------
 * ******
 *       *********
 *                ********
 * </pre>
 * <p>
 * Because there can be limited or no overlap between the bioassay dimensions, we cannot assume the dimensions of
 * the matrix will be defined by the longest BioAssayDimension. Note that later in processing, this possible lack of
 * overlap is fixed by sample matching or vector merging; this class has to deal with the general case.
 * </p>
 */
int setUpColumnElements() {
    BaseExpressionDataMatrix.log.debug("Setting up column elements");
    assert this.bioAssayDimensions != null && this.bioAssayDimensions.size() > 0 : "No bioAssayDimensions defined";
    Map<BioMaterial, Collection<BioAssay>> bioMaterialMap = new LinkedHashMap<>();
    for (BioAssayDimension dimension : this.bioAssayDimensions.values()) {
        List<BioAssay> bioAssays = dimension.getBioAssays();
        BaseExpressionDataMatrix.log.debug("Processing: " + dimension + " with " + bioAssays.size() + " assays");
        this.getBioMaterialGroupsForAssays(bioMaterialMap, bioAssays);
    }
    if (BaseExpressionDataMatrix.log.isDebugEnabled())
        BaseExpressionDataMatrix.log.debug(bioMaterialMap.size() + " biomaterialGroups (correspond to columns)");
    int column = 0;
    for (BioMaterial bioMaterial : bioMaterialMap.keySet()) {
        if (BaseExpressionDataMatrix.log.isDebugEnabled())
            BaseExpressionDataMatrix.log.debug("Column " + column + " **--->>>> " + bioMaterial);
        for (BioAssay assay : bioMaterialMap.get(bioMaterial)) {
            if (this.columnBioMaterialMap.containsKey(bioMaterial)) {
                int existingColumn = columnBioMaterialMap.get(bioMaterial);
                this.columnAssayMap.put(assay, existingColumn);
                if (BaseExpressionDataMatrix.log.isDebugEnabled())
                    BaseExpressionDataMatrix.log.debug(assay + " --> column " + existingColumn);
                if (columnBioAssayMapByInteger.get(existingColumn) == null) {
                    columnBioAssayMapByInteger.put(existingColumn, new HashSet<BioAssay>());
                }
                columnBioAssayMapByInteger.get(existingColumn).add(assay);
            } else {
                if (BaseExpressionDataMatrix.log.isDebugEnabled()) {
                    BaseExpressionDataMatrix.log.debug(bioMaterial + " --> column " + column);
                    BaseExpressionDataMatrix.log.debug(assay + " --> column " + column);
                }
                this.columnBioMaterialMap.put(bioMaterial, column);
                this.columnAssayMap.put(assay, column);
                if (columnBioAssayMapByInteger.get(column) == null) {
                    columnBioAssayMapByInteger.put(column, new HashSet<BioAssay>());
                }
                columnBioMaterialMapByInteger.put(column, bioMaterial);
                columnBioAssayMapByInteger.get(column).add(assay);
            }
        }
        column++;
    }
    if (BaseExpressionDataMatrix.log.isDebugEnabled()) {
        for (BioAssay o : this.columnAssayMap.keySet()) {
            BaseExpressionDataMatrix.log.debug(o + " " + this.columnAssayMap.get(o));
        }
    }
    assert bioMaterialMap.size() == columnBioMaterialMapByInteger.keySet().size();
    return columnBioMaterialMapByInteger.keySet().size();
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)59 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)29 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)20 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)15 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)15 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)15 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)9 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)9 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 HashSet (java.util.HashSet)4 Test (org.junit.Test)4 Transactional (org.springframework.transaction.annotation.Transactional)4 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2