Search in sources :

Example 26 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class BaseAnalyzerConfigurationTest method setup.

@Before
public void setup() throws Exception {
    try {
        if (Settings.getBoolean("gemma.linearmodels.useR")) {
            rc = RConnectionFactory.getRConnection(Settings.getString("gemma.rserve.hostname", "localhost"));
            if (rc != null && rc.isConnected()) {
                connected = true;
                /*
                     * We have to disconnect right away for test to work under Windows, where only one connection is
                     * allowed at a time. The classes under test will get their own connections.
                     */
                if (rc != null && rc.isConnected() && rc instanceof RServeClient)
                    ((RServeClient) rc).disconnect();
            }
        } else {
            // not using R
            connected = true;
        }
    } catch (Exception e) {
        log.warn(e.getMessage());
    }
    /* array designs */
    arrayDesign = ArrayDesign.Factory.newInstance();
    arrayDesign.setTechnologyType(TechnologyType.ONECOLOR);
    arrayDesign.setId(1L);
    arrayDesign.setName("MG-U74Test_" + RandomStringUtils.randomAlphanumeric(12));
    arrayDesign.setPrimaryTaxon(this.getTaxon("mouse"));
    expressionExperiment = ExpressionExperiment.Factory.newInstance();
    expressionExperiment.setName("analysistest_" + RandomStringUtils.randomAlphanumeric(12));
    expressionExperiment.setId(100009L);
    expressionExperiment.setShortName(RandomStringUtils.randomAlphanumeric(12));
    /* experimental factor "area" */
    experimentalFactorA_Area = ExperimentalFactor.Factory.newInstance();
    experimentalFactorA_Area.setName("area");
    experimentalFactorA_Area.setType(FactorType.CATEGORICAL);
    experimentalFactorA_Area.setId(5001L);
    Collection<FactorValue> factorValuesA = new HashSet<>();
    factorValueA1 = FactorValue.Factory.newInstance();
    factorValueA1.setId(1001L);
    factorValueA1.setValue("cerebellum");
    Characteristic characteristicA1 = Characteristic.Factory.newInstance();
    characteristicA1.setValue(factorValueA1.getValue());
    Collection<Characteristic> characteristicsA1 = new HashSet<>();
    characteristicsA1.add(characteristicA1);
    factorValueA1.setCharacteristics(characteristicsA1);
    factorValueA1.setExperimentalFactor(experimentalFactorA_Area);
    factorValueA2 = FactorValue.Factory.newInstance();
    factorValueA2.setIsBaseline(true);
    factorValueA2.setValue("amygdala");
    factorValueA2.setId(1002L);
    Characteristic characteristicA2 = Characteristic.Factory.newInstance();
    characteristicA2.setValue(factorValueA2.getValue());
    Collection<Characteristic> characteristicsA2 = new HashSet<>();
    characteristicsA2.add(characteristicA2);
    factorValueA2.setCharacteristics(characteristicsA2);
    factorValueA2.setExperimentalFactor(experimentalFactorA_Area);
    factorValuesA.add(factorValueA1);
    factorValuesA.add(factorValueA2);
    experimentalFactorA_Area.getFactorValues().addAll(factorValuesA);
    /* experimental factor "treat" */
    experimentalFactorB = ExperimentalFactor.Factory.newInstance();
    experimentalFactorB.setName("treat");
    experimentalFactorB.setId(5002L);
    experimentalFactorB.setType(FactorType.CATEGORICAL);
    Collection<FactorValue> factorValuesB = new HashSet<>();
    FactorValue factorValueB1 = FactorValue.Factory.newInstance();
    factorValueB1.setValue("pcp");
    factorValueB1.setId(1003L);
    Characteristic characteristicB1 = Characteristic.Factory.newInstance();
    characteristicB1.setValue(factorValueB1.getValue());
    Collection<Characteristic> characteristicsB1 = new HashSet<>();
    characteristicsB1.add(characteristicB1);
    factorValueB1.setCharacteristics(characteristicsB1);
    factorValueB1.setExperimentalFactor(experimentalFactorB);
    factorValueB2 = FactorValue.Factory.newInstance();
    factorValueB2.setValue("control_group");
    factorValueB2.setId(1004L);
    Characteristic characteristicB2 = Characteristic.Factory.newInstance();
    characteristicB2.setValue(factorValueB2.getValue());
    Collection<Characteristic> characteristicsB2 = new HashSet<>();
    characteristicsB2.add(characteristicB2);
    factorValueB2.setCharacteristics(characteristicsB2);
    factorValueB2.setExperimentalFactor(experimentalFactorB);
    factorValuesB.add(factorValueB1);
    factorValuesB.add(factorValueB2);
    experimentalFactorB.getFactorValues().addAll(factorValuesB);
    /* set up the biomaterials */
    biomaterials = new ArrayList<>();
    // 2 replicates
    BioMaterial biomaterial0a = BioMaterial.Factory.newInstance();
    biomaterial0a.setName("0a");
    Collection<FactorValue> factorValuesForBioMaterial0 = new HashSet<>();
    factorValuesForBioMaterial0.add(factorValueA1);
    factorValuesForBioMaterial0.add(factorValueB1);
    biomaterial0a.getFactorValues().addAll(factorValuesForBioMaterial0);
    BioMaterial biomaterial0b = BioMaterial.Factory.newInstance();
    biomaterial0b.setName("0b");
    biomaterial0b.getFactorValues().addAll(factorValuesForBioMaterial0);
    // 2 replicates
    BioMaterial biomaterial1a = BioMaterial.Factory.newInstance();
    biomaterial1a.setName("1a");
    Collection<FactorValue> factorValuesForBioMaterial1 = new HashSet<>();
    factorValuesForBioMaterial1.add(factorValueA1);
    factorValuesForBioMaterial1.add(factorValueB2);
    biomaterial1a.getFactorValues().addAll(factorValuesForBioMaterial1);
    BioMaterial biomaterial1b = BioMaterial.Factory.newInstance();
    biomaterial1b.setName("1b");
    biomaterial1b.getFactorValues().addAll(factorValuesForBioMaterial1);
    // 2 replicates
    BioMaterial biomaterial2a = BioMaterial.Factory.newInstance();
    biomaterial2a.setName("2a");
    Collection<FactorValue> factorValuesForBioMaterial2 = new HashSet<>();
    factorValuesForBioMaterial2.add(factorValueA2);
    factorValuesForBioMaterial2.add(factorValueB1);
    biomaterial2a.getFactorValues().addAll(factorValuesForBioMaterial2);
    BioMaterial biomaterial2b = BioMaterial.Factory.newInstance();
    biomaterial2b.setName("2b");
    biomaterial2b.getFactorValues().addAll(factorValuesForBioMaterial2);
    // 2 replicates
    BioMaterial biomaterial3a = BioMaterial.Factory.newInstance();
    biomaterial3a.setName("3a");
    Collection<FactorValue> factorValuesForBioMaterial3 = new HashSet<>();
    factorValuesForBioMaterial3.add(factorValueA2);
    factorValuesForBioMaterial3.add(factorValueB2);
    biomaterial3a.getFactorValues().addAll(factorValuesForBioMaterial3);
    BioMaterial biomaterial3b = BioMaterial.Factory.newInstance();
    biomaterial3b.setName("3b");
    biomaterial3b.getFactorValues().addAll(factorValuesForBioMaterial3);
    biomaterial0a.setId(100000L);
    biomaterial0b.setId(100001L);
    biomaterial1a.setId(100002L);
    biomaterial1b.setId(100003L);
    biomaterial2a.setId(100004L);
    biomaterial2b.setId(100005L);
    biomaterial3a.setId(100006L);
    biomaterial3b.setId(100007L);
    biomaterials.add(biomaterial0a);
    biomaterials.add(biomaterial0b);
    biomaterials.add(biomaterial1a);
    biomaterials.add(biomaterial1b);
    biomaterials.add(biomaterial2a);
    biomaterials.add(biomaterial2b);
    biomaterials.add(biomaterial3a);
    biomaterials.add(biomaterial3b);
    /* set up the bioassays */
    bioAssay0a = BioAssay.Factory.newInstance();
    bioAssay0a.setName("bioassay 0a");
    bioAssay0a.setSampleUsed(biomaterial0a);
    bioAssay0a.setArrayDesignUsed(arrayDesign);
    bioAssay0b = BioAssay.Factory.newInstance();
    bioAssay0b.setName("bioassay 0b");
    bioAssay0b.setSampleUsed(biomaterial0b);
    bioAssay0b.setArrayDesignUsed(arrayDesign);
    bioAssay1a = BioAssay.Factory.newInstance();
    bioAssay1a.setName("bioassay 1a");
    bioAssay1a.setSampleUsed(biomaterial1a);
    bioAssay1a.setArrayDesignUsed(arrayDesign);
    bioAssay1b = BioAssay.Factory.newInstance();
    bioAssay1b.setName("bioassay 1b");
    bioAssay1b.setSampleUsed(biomaterial1b);
    bioAssay1b.setArrayDesignUsed(arrayDesign);
    bioAssay2a = BioAssay.Factory.newInstance();
    bioAssay2a.setName("bioassay 2a");
    bioAssay2a.setSampleUsed(biomaterial2a);
    bioAssay2a.setArrayDesignUsed(arrayDesign);
    bioAssay2b = BioAssay.Factory.newInstance();
    bioAssay2b.setName("bioassay 2b");
    bioAssay2b.setSampleUsed(biomaterial2b);
    bioAssay2b.setArrayDesignUsed(arrayDesign);
    bioAssay3a = BioAssay.Factory.newInstance();
    bioAssay3a.setName("bioassay 3a");
    bioAssay3a.setSampleUsed(biomaterial3a);
    bioAssay3a.setArrayDesignUsed(arrayDesign);
    bioAssay3b = BioAssay.Factory.newInstance();
    bioAssay3b.setName("bioassay 3b");
    bioAssay3b.setSampleUsed(biomaterial3b);
    bioAssay3b.setArrayDesignUsed(arrayDesign);
    bioAssays = new ArrayList<>();
    bioAssays.add(bioAssay0a);
    bioAssays.add(bioAssay0b);
    bioAssays.add(bioAssay1a);
    bioAssays.add(bioAssay1b);
    bioAssays.add(bioAssay2a);
    bioAssays.add(bioAssay2b);
    bioAssays.add(bioAssay3a);
    bioAssays.add(bioAssay3b);
    biomaterial0a.getBioAssaysUsedIn().add(bioAssay0a);
    biomaterial0b.getBioAssaysUsedIn().add(bioAssay0b);
    biomaterial1a.getBioAssaysUsedIn().add(bioAssay1a);
    biomaterial1b.getBioAssaysUsedIn().add(bioAssay1b);
    biomaterial2a.getBioAssaysUsedIn().add(bioAssay2a);
    biomaterial2b.getBioAssaysUsedIn().add(bioAssay2b);
    biomaterial3a.getBioAssaysUsedIn().add(bioAssay3a);
    biomaterial3b.getBioAssaysUsedIn().add(bioAssay3b);
    expressionExperiment.setBioAssays(bioAssays);
    experimentalFactors = new ArrayList<>();
    experimentalFactors.add(experimentalFactorA_Area);
    experimentalFactors.add(experimentalFactorB);
    experimentalDesign = ExperimentalDesign.Factory.newInstance();
    experimentalDesign.setName("experimental design");
    experimentalDesign.setExperimentalFactors(experimentalFactors);
    expressionExperiment.setExperimentalDesign(experimentalDesign);
    experimentalFactorA_Area.setExperimentalDesign(experimentalDesign);
    experimentalFactorB.setExperimentalDesign(experimentalDesign);
    quantitationType = QuantitationType.Factory.newInstance();
    quantitationType.setName("quantitation type");
    quantitationType.setRepresentation(PrimitiveType.DOUBLE);
    quantitationType.setType(StandardQuantitationType.AMOUNT);
    quantitationType.setIsPreferred(true);
    quantitationType.setIsMaskedPreferred(false);
    quantitationType.setIsBackground(false);
    quantitationType.setScale(ScaleType.LOG2);
    quantitationType.setIsNormalized(false);
    quantitationType.setIsBackgroundSubtracted(false);
    quantitationType.setIsRatio(false);
    expressionExperiment.getQuantitationTypes().add(quantitationType);
    bioAssayDimension = BioAssayDimension.Factory.newInstance();
    bioAssayDimension.setName("test bioassay dimension");
    bioAssayDimension.setBioAssays(bioAssays);
    this.configureVectors(biomaterials, null);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Characteristic(ubic.gemma.model.common.description.Characteristic) RServeClient(ubic.basecode.util.r.RServeClient) HashSet(java.util.HashSet) Before(org.junit.Before)

Example 27 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ExpressionDataMatrixColumnSortTest method testOrderByExperimentalDesignB.

@Test
public void testOrderByExperimentalDesignB() {
    BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
    /*
         * Five factors. Factor4 is a measurmeent.
         */
    Collection<ExperimentalFactor> factors = new HashSet<>();
    for (int i = 0; i < 5; i++) {
        ExperimentalFactor ef = ExperimentalFactor.Factory.newInstance();
        ef.setType(FactorType.CATEGORICAL);
        ef.setName("factor" + i);
        if (i == 4) {
            ef.setName("mfact" + i);
        }
        ef.setId((long) i);
        for (int j = 0; j < 3; j++) {
            FactorValue fv = FactorValue.Factory.newInstance();
            fv.setValue("fv" + (j + 1) * (i + 1));
            fv.setId((long) (j + 1) * (i + 1));
            fv.setExperimentalFactor(ef);
            ef.getFactorValues().add(fv);
            if (j == 2 && i != 4) {
                fv.setValue("control_group");
            }
            if (i == 4) {
                ef.setType(FactorType.CONTINUOUS);
                Measurement m = Measurement.Factory.newInstance();
                m.setId((long) j * (i + 1));
                m.setValue(j + ".00");
                m.setRepresentation(PrimitiveType.DOUBLE);
                fv.setMeasurement(m);
            }
        }
        factors.add(ef);
    }
    Random random = new Random();
    for (int i = 0; i < 100; i++) {
        BioAssay ba = BioAssay.Factory.newInstance();
        ba.setName("ba" + i);
        ba.setId((long) i);
        bad.getBioAssays().add(ba);
        BioMaterial bm = BioMaterial.Factory.newInstance();
        bm.setId((long) i);
        bm.setName("bm" + i);
        ba.setSampleUsed(bm);
        for (ExperimentalFactor ef : factors) {
            /*
                 * Note: if we use 4, then some of the biomaterials will not have a factorvalue for each factor. This is
                 * realistic. Use 3 to fill it in completely.
                 */
            int k = random.nextInt(4);
            int m = 0;
            FactorValue toUse = null;
            for (FactorValue fv : ef.getFactorValues()) {
                if (m == k) {
                    toUse = fv;
                    break;
                }
                m++;
            }
            if (toUse != null)
                bm.getFactorValues().add(toUse);
        // log.info( ba + " -> " + bm + " -> " + ef + " -> " + toUse );
        }
    }
    EmptyExpressionMatrix mat = new EmptyExpressionMatrix(bad);
    assertEquals(100, mat.columns());
    List<BioMaterial> ordered = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(mat);
    assertEquals(100, ordered.size());
// for ( BioMaterial bioMaterial : ordered ) {
// log.info( bioMaterial + " .... " + StringUtils.join( bioMaterial.getFactorValues(), "  --- " ) );
// }
}
Also used : Measurement(ubic.gemma.model.common.measurement.Measurement) BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) Random(java.util.Random) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) HashSet(java.util.HashSet) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 28 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ExperimentalDesignUtils method sampleInfoMatrix.

/**
 * @return Experimental design matrix
 */
public static ObjectMatrix<BioMaterial, ExperimentalFactor, Object> sampleInfoMatrix(List<ExperimentalFactor> factors, List<BioMaterial> samplesUsed, Map<ExperimentalFactor, FactorValue> baselines) {
    ObjectMatrix<BioMaterial, ExperimentalFactor, Object> designMatrix = new ObjectMatrixImpl<>(samplesUsed.size(), factors.size());
    designMatrix.setColumnNames(factors);
    int row = 0;
    for (BioMaterial samp : samplesUsed) {
        int col = 0;
        for (ExperimentalFactor factor : factors) {
            Object value = ExperimentalDesignUtils.extractFactorValueForSample(baselines, samp, factor);
            designMatrix.set(row, col, value);
            col++;
        }
        row++;
    }
    designMatrix.setRowNames(samplesUsed);
    return designMatrix;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) ExperimentalFactorValueObject(ubic.gemma.model.expression.experiment.ExperimentalFactorValueObject) ObjectMatrixImpl(ubic.basecode.dataStructure.matrix.ObjectMatrixImpl)

Example 29 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixUtil method filterAndLog2Transform.

/**
 * Log2 transform if necessary, do any required filtering prior to analysis. Count data is converted to log2CPM (but
 * we store log2cpm as the processed data, so that is what would generally be used).
 *
 * @param quantitationType QT
 * @param dmatrix          matrix
 * @return ee data double matrix
 */
public static ExpressionDataDoubleMatrix filterAndLog2Transform(QuantitationType quantitationType, ExpressionDataDoubleMatrix dmatrix) {
    ScaleType scaleType = ExpressionDataDoubleMatrixUtil.findScale(quantitationType, dmatrix.getMatrix());
    if (scaleType.equals(ScaleType.LOG2)) {
        ExpressionDataDoubleMatrixUtil.log.info("Data is already on a log2 scale");
    } else if (scaleType.equals(ScaleType.LN)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from ln to log2 **** ");
        MatrixStats.convertToLog2(dmatrix.getMatrix(), Math.E);
    } else if (scaleType.equals(ScaleType.LOG10)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from log10 to log2 **** ");
        MatrixStats.convertToLog2(dmatrix.getMatrix(), 10);
    } else if (scaleType.equals(ScaleType.LINEAR)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** LOG TRANSFORMING **** ");
        MatrixStats.logTransform(dmatrix.getMatrix());
    } else if (scaleType.equals(ScaleType.COUNT)) {
        /*
             * Since we store log2cpm this shouldn't be reached any more. We don't do it in place.
             */
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from count to log2 counts per million **** ");
        DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
        DoubleMatrix<CompositeSequence, BioMaterial> log2cpm = MatrixStats.convertToLog2Cpm(dmatrix.getMatrix(), librarySize);
        dmatrix = new ExpressionDataDoubleMatrix(dmatrix, log2cpm);
    } else {
        throw new UnknownLogScaleException("Can't figure out what scale the data are on");
    }
    /*
         * We do this second because doing it first causes some kind of subtle problem ... (round off? I could not
         * really track this down).
         *
         * Remove zero-variance rows, but also rows that have lots of equal values even if variance is non-zero. This
         * happens when data is "clipped" (e.g., all values under 10 set to 10).
         */
    int r = dmatrix.rows();
    dmatrix = ExpressionExperimentFilter.zeroVarianceFilter(dmatrix);
    if (dmatrix.rows() < r) {
        ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to low variance");
    }
    r = dmatrix.rows();
    if (dmatrix.columns() > ExpressionDataDoubleMatrixUtil.COLUMNS_LIMIT) {
        dmatrix = ExpressionExperimentFilter.tooFewDistinctValues(dmatrix, ExpressionDataDoubleMatrixUtil.VALUES_LIMIT);
        if (dmatrix.rows() < r) {
            ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to too many identical values");
        }
    }
    return dmatrix;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) UnknownLogScaleException(ubic.gemma.core.analysis.preprocess.UnknownLogScaleException)

Example 30 with BioMaterial

use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.

the class ExpressionDataWriterUtils method constructBioAssayName.

/**
 * Constructs a bioassay name. This is useful when writing out data to a file.
 *
 * @param matrix           matrix
 * @param assayColumnIndex The column index in the matrix.
 * @return BA name
 */
public static String constructBioAssayName(ExpressionDataMatrix<?> matrix, int assayColumnIndex) {
    BioMaterial bioMaterialForColumn = matrix.getBioMaterialForColumn(assayColumnIndex);
    Collection<BioAssay> bioAssaysForColumn = matrix.getBioAssaysForColumn(assayColumnIndex);
    return constructBioAssayName(bioMaterialForColumn, bioAssaysForColumn);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)132 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)67 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)27 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)22 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)19 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 HashSet (java.util.HashSet)13 Test (org.junit.Test)13 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)12 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)12 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)10 InputStream (java.io.InputStream)7 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)7 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)7 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)7 Characteristic (ubic.gemma.model.common.description.Characteristic)6 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)6 DoubleArrayList (cern.colt.list.DoubleArrayList)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)5 ArrayList (java.util.ArrayList)5