use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class BaseAnalyzerConfigurationTest method setup.
@Before
public void setup() throws Exception {
try {
if (Settings.getBoolean("gemma.linearmodels.useR")) {
rc = RConnectionFactory.getRConnection(Settings.getString("gemma.rserve.hostname", "localhost"));
if (rc != null && rc.isConnected()) {
connected = true;
/*
* We have to disconnect right away for test to work under Windows, where only one connection is
* allowed at a time. The classes under test will get their own connections.
*/
if (rc != null && rc.isConnected() && rc instanceof RServeClient)
((RServeClient) rc).disconnect();
}
} else {
// not using R
connected = true;
}
} catch (Exception e) {
log.warn(e.getMessage());
}
/* array designs */
arrayDesign = ArrayDesign.Factory.newInstance();
arrayDesign.setTechnologyType(TechnologyType.ONECOLOR);
arrayDesign.setId(1L);
arrayDesign.setName("MG-U74Test_" + RandomStringUtils.randomAlphanumeric(12));
arrayDesign.setPrimaryTaxon(this.getTaxon("mouse"));
expressionExperiment = ExpressionExperiment.Factory.newInstance();
expressionExperiment.setName("analysistest_" + RandomStringUtils.randomAlphanumeric(12));
expressionExperiment.setId(100009L);
expressionExperiment.setShortName(RandomStringUtils.randomAlphanumeric(12));
/* experimental factor "area" */
experimentalFactorA_Area = ExperimentalFactor.Factory.newInstance();
experimentalFactorA_Area.setName("area");
experimentalFactorA_Area.setType(FactorType.CATEGORICAL);
experimentalFactorA_Area.setId(5001L);
Collection<FactorValue> factorValuesA = new HashSet<>();
factorValueA1 = FactorValue.Factory.newInstance();
factorValueA1.setId(1001L);
factorValueA1.setValue("cerebellum");
Characteristic characteristicA1 = Characteristic.Factory.newInstance();
characteristicA1.setValue(factorValueA1.getValue());
Collection<Characteristic> characteristicsA1 = new HashSet<>();
characteristicsA1.add(characteristicA1);
factorValueA1.setCharacteristics(characteristicsA1);
factorValueA1.setExperimentalFactor(experimentalFactorA_Area);
factorValueA2 = FactorValue.Factory.newInstance();
factorValueA2.setIsBaseline(true);
factorValueA2.setValue("amygdala");
factorValueA2.setId(1002L);
Characteristic characteristicA2 = Characteristic.Factory.newInstance();
characteristicA2.setValue(factorValueA2.getValue());
Collection<Characteristic> characteristicsA2 = new HashSet<>();
characteristicsA2.add(characteristicA2);
factorValueA2.setCharacteristics(characteristicsA2);
factorValueA2.setExperimentalFactor(experimentalFactorA_Area);
factorValuesA.add(factorValueA1);
factorValuesA.add(factorValueA2);
experimentalFactorA_Area.getFactorValues().addAll(factorValuesA);
/* experimental factor "treat" */
experimentalFactorB = ExperimentalFactor.Factory.newInstance();
experimentalFactorB.setName("treat");
experimentalFactorB.setId(5002L);
experimentalFactorB.setType(FactorType.CATEGORICAL);
Collection<FactorValue> factorValuesB = new HashSet<>();
FactorValue factorValueB1 = FactorValue.Factory.newInstance();
factorValueB1.setValue("pcp");
factorValueB1.setId(1003L);
Characteristic characteristicB1 = Characteristic.Factory.newInstance();
characteristicB1.setValue(factorValueB1.getValue());
Collection<Characteristic> characteristicsB1 = new HashSet<>();
characteristicsB1.add(characteristicB1);
factorValueB1.setCharacteristics(characteristicsB1);
factorValueB1.setExperimentalFactor(experimentalFactorB);
factorValueB2 = FactorValue.Factory.newInstance();
factorValueB2.setValue("control_group");
factorValueB2.setId(1004L);
Characteristic characteristicB2 = Characteristic.Factory.newInstance();
characteristicB2.setValue(factorValueB2.getValue());
Collection<Characteristic> characteristicsB2 = new HashSet<>();
characteristicsB2.add(characteristicB2);
factorValueB2.setCharacteristics(characteristicsB2);
factorValueB2.setExperimentalFactor(experimentalFactorB);
factorValuesB.add(factorValueB1);
factorValuesB.add(factorValueB2);
experimentalFactorB.getFactorValues().addAll(factorValuesB);
/* set up the biomaterials */
biomaterials = new ArrayList<>();
// 2 replicates
BioMaterial biomaterial0a = BioMaterial.Factory.newInstance();
biomaterial0a.setName("0a");
Collection<FactorValue> factorValuesForBioMaterial0 = new HashSet<>();
factorValuesForBioMaterial0.add(factorValueA1);
factorValuesForBioMaterial0.add(factorValueB1);
biomaterial0a.getFactorValues().addAll(factorValuesForBioMaterial0);
BioMaterial biomaterial0b = BioMaterial.Factory.newInstance();
biomaterial0b.setName("0b");
biomaterial0b.getFactorValues().addAll(factorValuesForBioMaterial0);
// 2 replicates
BioMaterial biomaterial1a = BioMaterial.Factory.newInstance();
biomaterial1a.setName("1a");
Collection<FactorValue> factorValuesForBioMaterial1 = new HashSet<>();
factorValuesForBioMaterial1.add(factorValueA1);
factorValuesForBioMaterial1.add(factorValueB2);
biomaterial1a.getFactorValues().addAll(factorValuesForBioMaterial1);
BioMaterial biomaterial1b = BioMaterial.Factory.newInstance();
biomaterial1b.setName("1b");
biomaterial1b.getFactorValues().addAll(factorValuesForBioMaterial1);
// 2 replicates
BioMaterial biomaterial2a = BioMaterial.Factory.newInstance();
biomaterial2a.setName("2a");
Collection<FactorValue> factorValuesForBioMaterial2 = new HashSet<>();
factorValuesForBioMaterial2.add(factorValueA2);
factorValuesForBioMaterial2.add(factorValueB1);
biomaterial2a.getFactorValues().addAll(factorValuesForBioMaterial2);
BioMaterial biomaterial2b = BioMaterial.Factory.newInstance();
biomaterial2b.setName("2b");
biomaterial2b.getFactorValues().addAll(factorValuesForBioMaterial2);
// 2 replicates
BioMaterial biomaterial3a = BioMaterial.Factory.newInstance();
biomaterial3a.setName("3a");
Collection<FactorValue> factorValuesForBioMaterial3 = new HashSet<>();
factorValuesForBioMaterial3.add(factorValueA2);
factorValuesForBioMaterial3.add(factorValueB2);
biomaterial3a.getFactorValues().addAll(factorValuesForBioMaterial3);
BioMaterial biomaterial3b = BioMaterial.Factory.newInstance();
biomaterial3b.setName("3b");
biomaterial3b.getFactorValues().addAll(factorValuesForBioMaterial3);
biomaterial0a.setId(100000L);
biomaterial0b.setId(100001L);
biomaterial1a.setId(100002L);
biomaterial1b.setId(100003L);
biomaterial2a.setId(100004L);
biomaterial2b.setId(100005L);
biomaterial3a.setId(100006L);
biomaterial3b.setId(100007L);
biomaterials.add(biomaterial0a);
biomaterials.add(biomaterial0b);
biomaterials.add(biomaterial1a);
biomaterials.add(biomaterial1b);
biomaterials.add(biomaterial2a);
biomaterials.add(biomaterial2b);
biomaterials.add(biomaterial3a);
biomaterials.add(biomaterial3b);
/* set up the bioassays */
bioAssay0a = BioAssay.Factory.newInstance();
bioAssay0a.setName("bioassay 0a");
bioAssay0a.setSampleUsed(biomaterial0a);
bioAssay0a.setArrayDesignUsed(arrayDesign);
bioAssay0b = BioAssay.Factory.newInstance();
bioAssay0b.setName("bioassay 0b");
bioAssay0b.setSampleUsed(biomaterial0b);
bioAssay0b.setArrayDesignUsed(arrayDesign);
bioAssay1a = BioAssay.Factory.newInstance();
bioAssay1a.setName("bioassay 1a");
bioAssay1a.setSampleUsed(biomaterial1a);
bioAssay1a.setArrayDesignUsed(arrayDesign);
bioAssay1b = BioAssay.Factory.newInstance();
bioAssay1b.setName("bioassay 1b");
bioAssay1b.setSampleUsed(biomaterial1b);
bioAssay1b.setArrayDesignUsed(arrayDesign);
bioAssay2a = BioAssay.Factory.newInstance();
bioAssay2a.setName("bioassay 2a");
bioAssay2a.setSampleUsed(biomaterial2a);
bioAssay2a.setArrayDesignUsed(arrayDesign);
bioAssay2b = BioAssay.Factory.newInstance();
bioAssay2b.setName("bioassay 2b");
bioAssay2b.setSampleUsed(biomaterial2b);
bioAssay2b.setArrayDesignUsed(arrayDesign);
bioAssay3a = BioAssay.Factory.newInstance();
bioAssay3a.setName("bioassay 3a");
bioAssay3a.setSampleUsed(biomaterial3a);
bioAssay3a.setArrayDesignUsed(arrayDesign);
bioAssay3b = BioAssay.Factory.newInstance();
bioAssay3b.setName("bioassay 3b");
bioAssay3b.setSampleUsed(biomaterial3b);
bioAssay3b.setArrayDesignUsed(arrayDesign);
bioAssays = new ArrayList<>();
bioAssays.add(bioAssay0a);
bioAssays.add(bioAssay0b);
bioAssays.add(bioAssay1a);
bioAssays.add(bioAssay1b);
bioAssays.add(bioAssay2a);
bioAssays.add(bioAssay2b);
bioAssays.add(bioAssay3a);
bioAssays.add(bioAssay3b);
biomaterial0a.getBioAssaysUsedIn().add(bioAssay0a);
biomaterial0b.getBioAssaysUsedIn().add(bioAssay0b);
biomaterial1a.getBioAssaysUsedIn().add(bioAssay1a);
biomaterial1b.getBioAssaysUsedIn().add(bioAssay1b);
biomaterial2a.getBioAssaysUsedIn().add(bioAssay2a);
biomaterial2b.getBioAssaysUsedIn().add(bioAssay2b);
biomaterial3a.getBioAssaysUsedIn().add(bioAssay3a);
biomaterial3b.getBioAssaysUsedIn().add(bioAssay3b);
expressionExperiment.setBioAssays(bioAssays);
experimentalFactors = new ArrayList<>();
experimentalFactors.add(experimentalFactorA_Area);
experimentalFactors.add(experimentalFactorB);
experimentalDesign = ExperimentalDesign.Factory.newInstance();
experimentalDesign.setName("experimental design");
experimentalDesign.setExperimentalFactors(experimentalFactors);
expressionExperiment.setExperimentalDesign(experimentalDesign);
experimentalFactorA_Area.setExperimentalDesign(experimentalDesign);
experimentalFactorB.setExperimentalDesign(experimentalDesign);
quantitationType = QuantitationType.Factory.newInstance();
quantitationType.setName("quantitation type");
quantitationType.setRepresentation(PrimitiveType.DOUBLE);
quantitationType.setType(StandardQuantitationType.AMOUNT);
quantitationType.setIsPreferred(true);
quantitationType.setIsMaskedPreferred(false);
quantitationType.setIsBackground(false);
quantitationType.setScale(ScaleType.LOG2);
quantitationType.setIsNormalized(false);
quantitationType.setIsBackgroundSubtracted(false);
quantitationType.setIsRatio(false);
expressionExperiment.getQuantitationTypes().add(quantitationType);
bioAssayDimension = BioAssayDimension.Factory.newInstance();
bioAssayDimension.setName("test bioassay dimension");
bioAssayDimension.setBioAssays(bioAssays);
this.configureVectors(biomaterials, null);
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ExpressionDataMatrixColumnSortTest method testOrderByExperimentalDesignB.
@Test
public void testOrderByExperimentalDesignB() {
BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
/*
* Five factors. Factor4 is a measurmeent.
*/
Collection<ExperimentalFactor> factors = new HashSet<>();
for (int i = 0; i < 5; i++) {
ExperimentalFactor ef = ExperimentalFactor.Factory.newInstance();
ef.setType(FactorType.CATEGORICAL);
ef.setName("factor" + i);
if (i == 4) {
ef.setName("mfact" + i);
}
ef.setId((long) i);
for (int j = 0; j < 3; j++) {
FactorValue fv = FactorValue.Factory.newInstance();
fv.setValue("fv" + (j + 1) * (i + 1));
fv.setId((long) (j + 1) * (i + 1));
fv.setExperimentalFactor(ef);
ef.getFactorValues().add(fv);
if (j == 2 && i != 4) {
fv.setValue("control_group");
}
if (i == 4) {
ef.setType(FactorType.CONTINUOUS);
Measurement m = Measurement.Factory.newInstance();
m.setId((long) j * (i + 1));
m.setValue(j + ".00");
m.setRepresentation(PrimitiveType.DOUBLE);
fv.setMeasurement(m);
}
}
factors.add(ef);
}
Random random = new Random();
for (int i = 0; i < 100; i++) {
BioAssay ba = BioAssay.Factory.newInstance();
ba.setName("ba" + i);
ba.setId((long) i);
bad.getBioAssays().add(ba);
BioMaterial bm = BioMaterial.Factory.newInstance();
bm.setId((long) i);
bm.setName("bm" + i);
ba.setSampleUsed(bm);
for (ExperimentalFactor ef : factors) {
/*
* Note: if we use 4, then some of the biomaterials will not have a factorvalue for each factor. This is
* realistic. Use 3 to fill it in completely.
*/
int k = random.nextInt(4);
int m = 0;
FactorValue toUse = null;
for (FactorValue fv : ef.getFactorValues()) {
if (m == k) {
toUse = fv;
break;
}
m++;
}
if (toUse != null)
bm.getFactorValues().add(toUse);
// log.info( ba + " -> " + bm + " -> " + ef + " -> " + toUse );
}
}
EmptyExpressionMatrix mat = new EmptyExpressionMatrix(bad);
assertEquals(100, mat.columns());
List<BioMaterial> ordered = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(mat);
assertEquals(100, ordered.size());
// for ( BioMaterial bioMaterial : ordered ) {
// log.info( bioMaterial + " .... " + StringUtils.join( bioMaterial.getFactorValues(), " --- " ) );
// }
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ExperimentalDesignUtils method sampleInfoMatrix.
/**
* @return Experimental design matrix
*/
public static ObjectMatrix<BioMaterial, ExperimentalFactor, Object> sampleInfoMatrix(List<ExperimentalFactor> factors, List<BioMaterial> samplesUsed, Map<ExperimentalFactor, FactorValue> baselines) {
ObjectMatrix<BioMaterial, ExperimentalFactor, Object> designMatrix = new ObjectMatrixImpl<>(samplesUsed.size(), factors.size());
designMatrix.setColumnNames(factors);
int row = 0;
for (BioMaterial samp : samplesUsed) {
int col = 0;
for (ExperimentalFactor factor : factors) {
Object value = ExperimentalDesignUtils.extractFactorValueForSample(baselines, samp, factor);
designMatrix.set(row, col, value);
col++;
}
row++;
}
designMatrix.setRowNames(samplesUsed);
return designMatrix;
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixUtil method filterAndLog2Transform.
/**
* Log2 transform if necessary, do any required filtering prior to analysis. Count data is converted to log2CPM (but
* we store log2cpm as the processed data, so that is what would generally be used).
*
* @param quantitationType QT
* @param dmatrix matrix
* @return ee data double matrix
*/
public static ExpressionDataDoubleMatrix filterAndLog2Transform(QuantitationType quantitationType, ExpressionDataDoubleMatrix dmatrix) {
ScaleType scaleType = ExpressionDataDoubleMatrixUtil.findScale(quantitationType, dmatrix.getMatrix());
if (scaleType.equals(ScaleType.LOG2)) {
ExpressionDataDoubleMatrixUtil.log.info("Data is already on a log2 scale");
} else if (scaleType.equals(ScaleType.LN)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from ln to log2 **** ");
MatrixStats.convertToLog2(dmatrix.getMatrix(), Math.E);
} else if (scaleType.equals(ScaleType.LOG10)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from log10 to log2 **** ");
MatrixStats.convertToLog2(dmatrix.getMatrix(), 10);
} else if (scaleType.equals(ScaleType.LINEAR)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** LOG TRANSFORMING **** ");
MatrixStats.logTransform(dmatrix.getMatrix());
} else if (scaleType.equals(ScaleType.COUNT)) {
/*
* Since we store log2cpm this shouldn't be reached any more. We don't do it in place.
*/
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from count to log2 counts per million **** ");
DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
DoubleMatrix<CompositeSequence, BioMaterial> log2cpm = MatrixStats.convertToLog2Cpm(dmatrix.getMatrix(), librarySize);
dmatrix = new ExpressionDataDoubleMatrix(dmatrix, log2cpm);
} else {
throw new UnknownLogScaleException("Can't figure out what scale the data are on");
}
/*
* We do this second because doing it first causes some kind of subtle problem ... (round off? I could not
* really track this down).
*
* Remove zero-variance rows, but also rows that have lots of equal values even if variance is non-zero. This
* happens when data is "clipped" (e.g., all values under 10 set to 10).
*/
int r = dmatrix.rows();
dmatrix = ExpressionExperimentFilter.zeroVarianceFilter(dmatrix);
if (dmatrix.rows() < r) {
ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to low variance");
}
r = dmatrix.rows();
if (dmatrix.columns() > ExpressionDataDoubleMatrixUtil.COLUMNS_LIMIT) {
dmatrix = ExpressionExperimentFilter.tooFewDistinctValues(dmatrix, ExpressionDataDoubleMatrixUtil.VALUES_LIMIT);
if (dmatrix.rows() < r) {
ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to too many identical values");
}
}
return dmatrix;
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ExpressionDataWriterUtils method constructBioAssayName.
/**
* Constructs a bioassay name. This is useful when writing out data to a file.
*
* @param matrix matrix
* @param assayColumnIndex The column index in the matrix.
* @return BA name
*/
public static String constructBioAssayName(ExpressionDataMatrix<?> matrix, int assayColumnIndex) {
BioMaterial bioMaterialForColumn = matrix.getBioMaterialForColumn(assayColumnIndex);
Collection<BioAssay> bioAssaysForColumn = matrix.getBioAssaysForColumn(assayColumnIndex);
return constructBioAssayName(bioMaterialForColumn, bioAssaysForColumn);
}
Aggregations