use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixTest method testConstructExpressionDataDoubleMatrixWithGeoValues.
/**
* This is a self-contained test. That is, it does not depend on the setup in onSetUpInTransaction}. It tests
* creating an {@link ExpressionDataDoubleMatrix} using real values from the Gene Expression Omnibus (GEO). That is,
* we have obtained information from GSE994. The probe sets used are 218120_s_at and 121_at, and the samples used
* are GSM15697 and GSM15744. Specifically, we the Gemma objects that correspond to the GEO objects are:
* DesignElement 1 = 218120_s_at, DesignElement 2 = 121_at
* BioAssay 1 = "Current Smoker 73", BioAssay 2 = "Former Smoker 34"
* BioMaterial 1 = "GSM15697", BioMaterial 2 = "GSM15744"
* BioAssayDimension = "GSM15697, GSM15744" (the names of all the biomaterials).
*/
@Test
public void testConstructExpressionDataDoubleMatrixWithGeoValues() {
ByteArrayConverter bac = new ByteArrayConverter();
ee = ExpressionExperiment.Factory.newInstance();
QuantitationType qt = QuantitationType.Factory.newInstance();
qt.setName("VALUE");
qt.setIsBackgroundSubtracted(false);
qt.setIsNormalized(false);
qt.setIsBackground(false);
qt.setIsRatio(false);
qt.setIsPreferred(true);
qt.setIsMaskedPreferred(false);
qt.setRepresentation(PrimitiveType.DOUBLE);
BioAssayDimension bioAssayDimension = BioAssayDimension.Factory.newInstance();
bioAssayDimension.setName("GSM15697, GSM15744");
List<BioAssay> assays = new ArrayList<>();
BioAssay assay1 = BioAssay.Factory.newInstance();
assay1.setName("Current Smoker 73");
BioMaterial sample1 = BioMaterial.Factory.newInstance();
sample1.setName("GSM15697");
assay1.setSampleUsed(sample1);
assays.add(assay1);
BioAssay assay2 = BioAssay.Factory.newInstance();
assay2.setName("Former Smoker 34");
BioMaterial sample2 = BioMaterial.Factory.newInstance();
sample2.setName("GSM15744");
assay2.setSampleUsed(sample2);
assays.add(assay2);
bioAssayDimension.setBioAssays(assays);
RawExpressionDataVector vector1 = RawExpressionDataVector.Factory.newInstance();
double[] ddata1 = { 74.9, 101.7 };
byte[] bdata1 = bac.doubleArrayToBytes(ddata1);
vector1.setData(bdata1);
vector1.setQuantitationType(qt);
vector1.setBioAssayDimension(bioAssayDimension);
RawExpressionDataVector vector2 = RawExpressionDataVector.Factory.newInstance();
double[] ddata2 = { 404.6, 318.7 };
byte[] bdata2 = bac.doubleArrayToBytes(ddata2);
vector2.setData(bdata2);
vector2.setQuantitationType(qt);
vector2.setBioAssayDimension(bioAssayDimension);
ArrayDesign ad = ArrayDesign.Factory.newInstance();
ad.setName("test ar");
CompositeSequence de1 = CompositeSequence.Factory.newInstance();
de1.setName("218120_s_at");
vector1.setDesignElement(de1);
BioSequence bs1 = BioSequence.Factory.newInstance();
bs1.setName("test1");
de1.setBiologicalCharacteristic(bs1);
de1.setArrayDesign(ad);
CompositeSequence de2 = CompositeSequence.Factory.newInstance();
de2.setName("121_at");
BioSequence bs2 = BioSequence.Factory.newInstance();
bs2.setName("test2");
de2.setBiologicalCharacteristic(bs2);
de2.setArrayDesign(ad);
vector2.setDesignElement(de2);
Collection<RawExpressionDataVector> eeVectors = new LinkedHashSet<>();
eeVectors.add(vector1);
eeVectors.add(vector2);
ee.setRawExpressionDataVectors(eeVectors);
ExpressionDataDoubleMatrix expressionDataMatrix = new ExpressionDataDoubleMatrix(eeVectors);
assertNotNull(expressionDataMatrix);
assertEquals(expressionDataMatrix.rows(), 2);
assertEquals(expressionDataMatrix.columns(), 2);
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method print.
@SuppressWarnings("unused")
private void print(Collection<DesignElementDataVector> newVectors) {
StringBuilder buf = new StringBuilder();
ByteArrayConverter conv = new ByteArrayConverter();
for (DesignElementDataVector vector : newVectors) {
buf.append(vector.getDesignElement());
QuantitationType qtype = vector.getQuantitationType();
if (qtype.getRepresentation().equals(PrimitiveType.DOUBLE)) {
double[] vals = conv.byteArrayToDoubles(vector.getData());
for (double d : vals) {
buf.append("\t").append(d);
}
} else if (qtype.getRepresentation().equals(PrimitiveType.INT)) {
int[] vals = conv.byteArrayToInts(vector.getData());
for (int i : vals) {
buf.append("\t").append(i);
}
} else if (qtype.getRepresentation().equals(PrimitiveType.BOOLEAN)) {
boolean[] vals = conv.byteArrayToBooleans(vector.getData());
for (boolean d : vals) {
buf.append("\t").append(d);
}
} else if (qtype.getRepresentation().equals(PrimitiveType.STRING)) {
String[] vals = conv.byteArrayToStrings(vector.getData());
for (String d : vals) {
buf.append("\t").append(d);
}
}
buf.append("\n");
}
VectorMergingServiceImpl.log.info("\n" + buf);
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorCreateHelperServiceImpl method reorderByDesign.
@Override
@Transactional
public void reorderByDesign(Long eeId) {
ExpressionExperiment ee = expressionExperimentDao.load(eeId);
if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have a populated experimental design, skipping");
return;
}
Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors();
if (processedDataVectors.size() == 0) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have processed data");
return;
}
Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee);
if (dims.size() > 1) {
this.checkAllBioAssayDimensionsMatch(dims);
}
BioAssayDimension bioassaydim = dims.iterator().next();
List<BioMaterial> start = new ArrayList<>();
for (BioAssay ba : bioassaydim.getBioAssays()) {
start.add(ba.getSampleUsed());
}
/*
* Get the ordering we want.
*/
List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors());
/*
* Map of biomaterials to the new order index.
*/
final Map<BioMaterial, Integer> ordering = new HashMap<>();
int i = 0;
for (BioMaterial bioMaterial : orderByExperimentalDesign) {
ordering.put(bioMaterial, i);
i++;
}
/*
* Map of the original order to new order of bioassays.
*/
Map<Integer, Integer> indexes = new HashMap<>();
Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>();
for (BioAssayDimension bioAssayDimension : dims) {
Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
assert bioAssays != null;
/*
* Initialize the new bioassay list.
*/
List<BioAssay> resorted = new ArrayList<>(bioAssays.size());
for (int m = 0; m < bioAssays.size(); m++) {
resorted.add(null);
}
for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) {
BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex);
BioMaterial sam1 = bioAssay.getSampleUsed();
if (ordering.containsKey(sam1)) {
Integer newIndex = ordering.get(sam1);
resorted.set(newIndex, bioAssay);
/*
* Should be the same for all dimensions....
*/
assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex);
indexes.put(oldIndex, newIndex);
} else {
throw new IllegalStateException();
}
}
BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance();
newBioAssayDimension.setBioAssays(resorted);
newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design");
newBioAssayDimension.setDescription("Data was reordered based on the experimental design.");
newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension);
old2new.put(bioAssayDimension, newBioAssayDimension);
}
ByteArrayConverter converter = new ByteArrayConverter();
for (ProcessedExpressionDataVector v : processedDataVectors) {
BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension());
assert revisedBioAssayDimension != null;
double[] data = converter.byteArrayToDoubles(v.getData());
/*
* Put the data in the order of the bioAssayDimension.
*/
Double[] resortedData = new Double[data.length];
for (int k = 0; k < data.length; k++) {
resortedData[k] = data[indexes.get(k)];
}
v.setData(converter.toBytes(resortedData));
v.setBioAssayDimension(revisedBioAssayDimension);
}
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors");
this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design");
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesTest method print.
/**
* Debug code.
*/
@SuppressWarnings("unused")
private void print(Collection<RawExpressionDataVector> calls) {
ByteArrayConverter bac = new ByteArrayConverter();
BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
System.err.print("\n");
for (BioAssay bas : dim.getBioAssays()) {
System.err.print("\t" + bas);
}
System.err.print("\n");
for (DesignElementDataVector vector : calls) {
System.err.print(vector.getDesignElement());
byte[] dat = vector.getData();
boolean[] row = bac.byteArrayToBooleans(dat);
for (boolean b : row) {
System.err.print("\t" + b);
}
System.err.print("\n");
}
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesTest method testMissingValue.
@Test
public void testMissingValue() throws Exception {
ExpressionExperiment old = eeService.findByShortName("GSE2221");
if (old != null)
eeService.remove(old);
InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/shortGenePix/GSE2221_family.soft.gz"));
GeoFamilyParser parser = new GeoFamilyParser();
parser.parse(is);
GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE2221");
DatasetCombiner datasetCombiner = new DatasetCombiner();
GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
series.setSampleCorrespondence(correspondence);
Object result = this.gc.convert(series);
assertNotNull(result);
ExpressionExperiment expExp = (ExpressionExperiment) ((Collection<?>) result).iterator().next();
expExp = persisterHelper.persist(expExp, persisterHelper.prepare(expExp));
Collection<RawExpressionDataVector> calls = tcmv.computeMissingValues(expExp, 2.0, new ArrayList<Double>());
assertEquals(500, calls.size());
BioAssayDimension dim = calls.iterator().next().getBioAssayDimension();
// Spot check the results. For sample ME-TMZ, ID #27 should be 'true' and 26 should be false.
ByteArrayConverter bac = new ByteArrayConverter();
boolean foundA = false;
boolean foundB = false;
for (DesignElementDataVector vector : calls) {
if (vector.getDesignElement().getName().equals("26")) {
byte[] dat = vector.getData();
boolean[] row = bac.byteArrayToBooleans(dat);
int i = 0;
for (BioAssay bas : dim.getBioAssays()) {
if (bas.getName().equals("expression array ME-TMZ")) {
assertTrue(!row[i]);
foundA = true;
}
i++;
}
}
if (vector.getDesignElement().getName().equals("27")) {
byte[] dat = vector.getData();
boolean[] row = bac.byteArrayToBooleans(dat);
int i = 0;
for (BioAssay bas : dim.getBioAssays()) {
if (bas.getName().equals("expression array ME-TMZ")) {
assertTrue(row[i]);
foundB = true;
}
i++;
}
}
}
assertTrue(foundA && foundB);
}
Aggregations