use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class ProcessedExpressionDataCreateServiceTest method testReorder.
@Test
public void testReorder() throws Exception {
ExpressionExperiment old = eeService.findByShortName("GSE404");
if (old != null) {
eeService.remove(old);
}
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse404Short")));
@SuppressWarnings("unchecked") Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE404", false, true, false);
this.ee = results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
this.ee = (ExpressionExperiment) e.getData();
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance();
factor.setType(FactorType.CATEGORICAL);
factor.setName(ee.getShortName() + " design");
factor.setExperimentalDesign(ee.getExperimentalDesign());
factor = eeService.addFactor(ee, factor);
FactorValue fv1 = FactorValue.Factory.newInstance();
FactorValue fv2 = FactorValue.Factory.newInstance();
fv1.setValue("foo");
fv1.setExperimentalFactor(factor);
fv2.setValue("bar");
fv2.setIsBaseline(true);
fv2.setExperimentalFactor(factor);
eeService.addFactorValue(ee, fv1);
eeService.addFactorValue(ee, fv2);
List<BioAssay> basInOrder = new ArrayList<>(ee.getBioAssays());
Collections.sort(basInOrder, new Comparator<BioAssay>() {
@Override
public int compare(BioAssay o1, BioAssay o2) {
return o1.getId().compareTo(o2.getId());
}
});
int i = 0;
for (BioAssay ba : basInOrder) {
// bioAssayService.thawRawAndProcessed( ba );
BioMaterial bm = ba.getSampleUsed();
assert fv1.getId() != null;
if (!bm.getFactorValues().isEmpty()) {
continue;
}
if (i % 2 == 0) {
bm.getFactorValues().add(fv1);
// log.info( bm + " " + bm.getId() + " => " + fv1 );
} else {
bm.getFactorValues().add(fv2);
// log.info( bm + " " + bm.getId() + " => " + fv2 );
}
bioMaterialService.update(bm);
i++;
}
factor = this.experimentalFactorService.load(factor.getId());
assertEquals(2, factor.getFactorValues().size());
/*
* All that was setup. Now do the interesting bit
*/
processedExpressionDataVectorService.reorderByDesign(ee.getId());
/*
* Now check the vectors...
*/
Collection<ProcessedExpressionDataVector> resortedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
// ExpressionDataDoubleMatrix newMat = new ExpressionDataDoubleMatrix( resortedVectors );
// log.info( newMat );
boolean foundVector = false;
assertTrue(resortedVectors.size() > 0);
for (ProcessedExpressionDataVector vector : resortedVectors) {
i = 0;
log.debug(vector.getDesignElement().getName() + " .........................");
// thawingto avoid lazy error because we are outside of transaction in this test. All references in code run
// inside a transaction
BioAssayDimension bioAssayDimension = vector.getBioAssayDimension();
bioAssayDimensionService.thawLite(bioAssayDimension);
Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
for (BioAssay ba : bioAssays) {
BioMaterial bm = ba.getSampleUsed();
assertEquals(1, bm.getFactorValues().size());
FactorValue fv = bm.getFactorValues().iterator().next();
assertNotNull(fv.getId());
log.debug(ba.getId() + " " + fv.getId() + " " + fv);
if (i < 10) {
// first because it is baseline;
assertEquals(fv2, fv);
}
i++;
}
/*
* spot check the data, same place as before.
*/
if (vector.getDesignElement().getName().equals("40")) {
foundVector = true;
ByteArrayConverter conv = new ByteArrayConverter();
Double[] d = ArrayUtils.toObject(conv.byteArrayToDoubles(vector.getData()));
assertEquals(20, d.length);
assertEquals(-0.08, d[1], 0.001);
assertEquals(0.45, d[10], 0.001);
assertEquals(Double.NaN, d[19], 0.001);
}
}
assertTrue("test vector not found", foundVector);
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class LinkAnalysis method getCorrelationDistribution.
public CoexpCorrelationDistribution getCorrelationDistribution() {
CoexpCorrelationDistribution result = CoexpCorrelationDistribution.Factory.newInstance();
DoubleArrayList histogramArrayList = this.metricMatrix.getHistogramArrayList();
result.setNumBins(histogramArrayList.size());
ByteArrayConverter bac = new ByteArrayConverter();
result.setBinCounts(bac.doubleArrayToBytes(MatrixUtil.fromList(histogramArrayList).toArray()));
return result;
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class LinkAnalysisServiceImpl method diagnoseCorrelationDistribution.
/**
* Check properties of the distribution
*/
// Better readability
@SuppressWarnings("StatementWithEmptyBody")
private void diagnoseCorrelationDistribution(ExpressionExperiment ee, CoexpCorrelationDistribution corrDist) throws UnsuitableForAnalysisException {
/*
* Find the median, etc.
*/
ByteArrayConverter bac = new ByteArrayConverter();
double[] binCounts = bac.byteArrayToDoubles(corrDist.getBinCounts());
int numBins = binCounts.length;
DoubleMatrix1D histogram = new DenseDoubleMatrix1D(binCounts);
// QC parameters; quantile, not correlation
double lowerLimitofMiddle = 0.45;
double upperLimitofMiddle = 0.55;
double tailFraction = 0.1;
// normalize
histogram.assign(Functions.div(histogram.zSum()));
double lowerTailDensity = 0.0;
double upperTailDensity = 0.0;
double median = 0.0;
// cumulative
double s = 0.0;
double middleDensity = 0.0;
for (int bin = 0; bin < histogram.size(); bin++) {
// cumulate
s += histogram.get(bin);
/*
* Perhaps these should be adjusted based on the sample size; for smaller data sets, more of the data is
* going to be above 0.9 etc. But in practice this can't have a very big effect.
*/
if (bin == (int) Math.floor(numBins * tailFraction)) {
lowerTailDensity = s;
} else if (bin == (int) Math.floor(numBins * (1.0 - tailFraction))) {
upperTailDensity = 1.0 - s;
} else if (bin > (int) Math.floor(lowerLimitofMiddle * numBins) && bin < (int) Math.floor(upperLimitofMiddle * numBins)) {
middleDensity += histogram.get(bin);
}
if (s >= 0.2) {
// firstQuintile = binToCorrelation( i, numBins );
} else if (s >= 0.5) {
median = this.binToCorrelation(bin, numBins);
} else if (s >= 0.8) {
// lastQuintile = binToCorrelation( i, numBins );
}
}
String message = "";
boolean bad = false;
if (median > 0.2 || median < -0.2) {
bad = true;
message = "Correlation distribution fails QC: median far from center (" + median + ")";
} else if (lowerTailDensity + upperTailDensity > middleDensity) {
bad = true;
message = "Correlation distribution fails QC: tails too heavy";
}
if (bad) {
throw new UnsuitableForAnalysisException(ee, message);
}
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class DataUpdater method makeNewVectors.
private Collection<RawExpressionDataVector> makeNewVectors(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data, QuantitationType qt) {
ByteArrayConverter bArrayConverter = new ByteArrayConverter();
Collection<RawExpressionDataVector> vectors = new HashSet<>();
BioAssayDimension bioAssayDimension = data.getBestBioAssayDimension();
assert bioAssayDimension != null;
assert !bioAssayDimension.getBioAssays().isEmpty();
bioAssayDimension = assayDimensionService.findOrCreate(bioAssayDimension);
assert !bioAssayDimension.getBioAssays().isEmpty();
for (int i = 0; i < data.rows(); i++) {
byte[] bdata = bArrayConverter.doubleArrayToBytes(data.getRow(i));
RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
vector.setData(bdata);
CompositeSequence cs = data.getRowElement(i).getDesignElement();
if (cs == null) {
continue;
}
if (!cs.getArrayDesign().equals(targetPlatform)) {
throw new IllegalArgumentException("Input data must use the target platform (was: " + cs.getArrayDesign() + ", expected: " + targetPlatform);
}
vector.setDesignElement(cs);
vector.setQuantitationType(qt);
vector.setExpressionExperiment(ee);
vector.setBioAssayDimension(bioAssayDimension);
vectors.add(vector);
}
return vectors;
}
use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.
the class GeoSuperSeriesLoadTest method testFetchAndLoadSuperSeriesB.
/*
* See bug 2064. GSE14618 is a superseries of GSE14613 and GSE14615. This is actually even worse, because some
* samples were run on both platforms. This is a situation we don't really want to handle completely.
*
*/
@Test
public void testFetchAndLoadSuperSeriesB() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse14618superser")));
ee = ees.findByShortName("GSE14618");
this.tearDown();
// noinspection unchecked
Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE14618", false, true, false, true, false);
assertEquals(1, results.size());
ee = results.iterator().next();
ee = ees.findByShortName("GSE14618");
ee = ees.thawLite(ee);
Collection<QuantitationType> qts = ee.getQuantitationTypes();
assertEquals(1, qts.size());
Collection<ArrayDesign> arrayDesignsUsed = ees.getArrayDesignsUsed(ee);
Collection<ArrayDesign> others = new HashSet<>();
others.add((ArrayDesign) arrayDesignsUsed.toArray()[1]);
ArrayDesign arrayDesign = (ArrayDesign) arrayDesignsUsed.toArray()[0];
ArrayDesign merged = adms.merge(arrayDesign, others, RandomStringUtils.randomAlphabetic(5), RandomStringUtils.randomAlphabetic(5), false);
ee = eepss.switchExperimentToArrayDesign(ee, merged);
vms.mergeVectors(ee);
ee = ees.load(ee.getId());
ee = ees.findByShortName("GSE14618");
ee = ees.thaw(ee);
assertEquals(40, ee.getProcessedExpressionDataVectors().size());
// System.err.println( ee.getProcessedExpressionDataVectors().size() );
boolean found1 = false;
boolean found2 = false;
ByteArrayConverter bac = new ByteArrayConverter();
for (ProcessedExpressionDataVector v : ee.getProcessedExpressionDataVectors()) {
double[] dat = bac.byteArrayToDoubles(v.getData());
int count = 0;
assertEquals(92, dat.length);
if (v.getDesignElement().getName().equals("117_at")) {
found1 = true;
for (double d : dat) {
if (Double.isNaN(d)) {
count++;
}
}
assertEquals("Should have been no missing values", 0, count);
} else if (v.getDesignElement().getName().equals("1552279_a_at")) {
found2 = true;
for (double d : dat) {
if (Double.isNaN(d)) {
count++;
}
}
assertEquals("Wrong number of missing values", 42, count);
}
}
assertTrue("Didn't find first test probe expected.", found1);
assertTrue("Didn't find second test probe expected.", found2);
}
Aggregations