use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class GeoDatasetServiceTest method testFetchAndLoadMultiChipPerSeriesShort.
@Test
public void testFetchAndLoadMultiChipPerSeriesShort() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("shortTest")));
/*
* HG-U133A. GDS473 is for the other chip (B). Series is GSE674. see
* http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gds&term=GSE674[Accession]&cmd=search
*/
ExpressionExperiment newee;
try {
Collection<?> results = geoService.fetchAndLoad("GSE674", false, true, false);
newee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
log.info("Skipping test, data already exists in db");
return;
}
assertNotNull(newee);
newee = eeService.thaw(newee);
/*
* Test for bug 468 (merging of subsets across GDS's)
*/
ExperimentalFactor factor = newee.getExperimentalDesign().getExperimentalFactors().iterator().next();
// otherwise get 4.
assertEquals(2, factor.getFactorValues().size());
Collection<RawExpressionDataVector> vectors = newee.getRawExpressionDataVectors();
rawExpressionDataVectorService.thaw(vectors);
ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(vectors);
ExpressionDataMatrix<Double> matrix = builder.getPreferredData();
assertNotNull(matrix);
assertEquals(31, matrix.rows());
assertEquals(15, matrix.columns());
// GSM10363 = D1-U133B
this.testMatrixValue(newee, matrix, "200000_s_at", "GSM10363", 5722.0);
// GSM10380 = C7-U133A
this.testMatrixValue(newee, matrix, "1007_s_at", "GSM10380", 1272.0);
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class GeoDatasetServiceTest method testMatrixValue.
private void testMatrixValue(ExpressionExperiment exp, ExpressionDataMatrix<Double> matrix, String probeToTest, String sampleToTest, double expectedValue) {
CompositeSequence soughtDesignElement = null;
BioAssay soughtBioAssay = null;
Collection<RawExpressionDataVector> vectors = exp.getRawExpressionDataVectors();
for (DesignElementDataVector vector : vectors) {
CompositeSequence de = vector.getDesignElement();
if (de.getName().equals(probeToTest)) {
soughtDesignElement = de;
}
BioAssayDimension bad = vector.getBioAssayDimension();
for (BioAssay ba : bad.getBioAssays()) {
if (ba.getAccession().getAccession().equals(sampleToTest)) {
soughtBioAssay = ba;
}
}
}
if (soughtDesignElement == null || soughtBioAssay == null)
fail("didn't find values for " + sampleToTest);
Double actualValue = matrix.get(soughtDesignElement, soughtBioAssay);
assertNotNull("No value for " + soughtBioAssay, actualValue);
assertEquals(expectedValue, actualValue, 0.00001);
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesTest method testMissingValueGSE56.
/**
* GSE56 is corrupt: there is no Channel 1 signal value in the data file.
*/
@Test
public void testMissingValueGSE56() throws Exception {
ExpressionExperiment old = eeService.findByShortName("GSE56");
if (old != null)
eeService.remove(old);
InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE56Short/GSE56_family.soft.gz"));
GeoFamilyParser parser = new GeoFamilyParser();
parser.parse(is);
GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE56");
DatasetCombiner datasetCombiner = new DatasetCombiner();
GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
series.setSampleCorrespondence(correspondence);
Object result = this.gc.convert(series);
assertNotNull(result);
ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
assertEquals(10, calls.size());
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesTest method testMissingValue.
@Test
public void testMissingValue() throws Exception {
ExpressionExperiment old = eeService.findByShortName("GSE2221");
if (old != null)
eeService.remove(old);
InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/shortGenePix/GSE2221_family.soft.gz"));
GeoFamilyParser parser = new GeoFamilyParser();
parser.parse(is);
GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE2221");
DatasetCombiner datasetCombiner = new DatasetCombiner();
GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
series.setSampleCorrespondence(correspondence);
Object result = this.gc.convert(series);
assertNotNull(result);
ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
assertEquals(500, calls.size());
BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
// Spot check the results. For sample ME-TMZ, ID #27 should be 'true' and 26 should be false.
ByteArrayConverter bac = new ByteArrayConverter();
boolean foundA = false;
boolean foundB = false;
for (DesignElementDataVector vector : calls) {
if (vector.getDesignElement().getName().equals("26")) {
byte[] dat = vector.getData();
boolean[] row = bac.byteArrayToBooleans(dat);
int i = 0;
for (BioAssay bas : dim.getBioAssays()) {
if (bas.getName().equals("expression array ME-TMZ")) {
assertTrue(!row[i]);
foundA = true;
}
i++;
}
}
if (vector.getDesignElement().getName().equals("27")) {
byte[] dat = vector.getData();
boolean[] row = bac.byteArrayToBooleans(dat);
int i = 0;
for (BioAssay bas : dim.getBioAssays()) {
if (bas.getName().equals("expression array ME-TMZ")) {
assertTrue(row[i]);
foundB = true;
}
i++;
}
}
}
assertTrue(foundA && foundB);
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesTest method testMissingValueGSE5091.
/**
* GSE56 is corrupt: there is no Channel 1 signal value in the data file.
*/
@Test
public void testMissingValueGSE5091() throws Exception {
ExpressionExperiment old = eeService.findByShortName("GSE5091");
if (old != null)
eeService.remove(old);
InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE5091Short/GSE5091_family.soft.gz"));
GeoFamilyParser parser = new GeoFamilyParser();
parser.parse(is);
GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE5091");
DatasetCombiner datasetCombiner = new DatasetCombiner();
GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
series.setSampleCorrespondence(correspondence);
gc = this.getBean(GeoConverter.class);
Object result = this.gc.convert(series);
assertNotNull(result);
ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
assertEquals(10, calls.size());
}
Aggregations