use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesImpl method computeMissingValues.
@Override
public Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment ee, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
ee = expressionExperimentService.thawLite(ee);
Collection<QuantitationType> usefulQuantitationTypes = ExpressionDataMatrixBuilder.getUsefulQuantitationTypes(ee);
StopWatch timer = new StopWatch();
timer.start();
TwoChannelMissingValuesImpl.log.info("Loading vectors ...");
Collection<RawExpressionDataVector> rawVectors = rawExpressionDataVectorService.find(usefulQuantitationTypes);
Collection<ProcessedExpressionDataVector> procVectors = new HashSet<>();
if (rawVectors.isEmpty()) {
procVectors = processedExpressionDataVectorService.find(usefulQuantitationTypes);
processedExpressionDataVectorService.thaw(procVectors);
} else {
rawExpressionDataVectorService.thaw(rawVectors);
}
timer.stop();
this.logTimeInfo(timer, procVectors.size() + rawVectors.size());
Collection<? extends DesignElementDataVector> builderVectors = new HashSet<>(rawVectors.isEmpty() ? procVectors : rawVectors);
System.out.println("Building matrix with vectors that I just thawed");
ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(builderVectors);
Collection<BioAssayDimension> dims = builder.getBioAssayDimensions();
/*
* Note we have to do this one array design at a time, because we are producing DesignElementDataVectors which
* must be associated with the correct BioAssayDimension.
*/
TwoChannelMissingValuesImpl.log.info("Study has " + dims.size() + " bioassaydimensions");
if (extraMissingValueIndicators != null && extraMissingValueIndicators.size() > 0) {
TwoChannelMissingValuesImpl.log.info("There are " + extraMissingValueIndicators.size() + " manually-set missing value indicators");
}
ExpressionDataDoubleMatrix preferredData = builder.getPreferredData();
ExpressionDataDoubleMatrix bkgDataA = builder.getBackgroundChannelA();
ExpressionDataDoubleMatrix bkgDataB = builder.getBackgroundChannelB();
ExpressionDataDoubleMatrix signalDataA = builder.getSignalChannelA();
ExpressionDataDoubleMatrix signalDataB = builder.getSignalChannelB();
if (builder.isAnyMissing()) {
if (bkgDataA != null) {
for (QuantitationType qt : bkgDataA.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataA");
break;
}
}
}
if (bkgDataB != null) {
for (QuantitationType qt : bkgDataB.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataB");
break;
}
}
}
if (signalDataA != null) {
for (QuantitationType qt : signalDataA.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataA");
break;
}
}
}
if (signalDataB != null) {
for (QuantitationType qt : signalDataB.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataB");
break;
}
}
}
}
Collection<RawExpressionDataVector> dimRes = this.computeMissingValues(ee, preferredData, signalDataA, signalDataB, bkgDataA, bkgDataB, signalToNoiseThreshold, extraMissingValueIndicators);
return new HashSet<>(dimRes);
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class ProcessedExpressionDataCreateServiceTest method testReorder.
@Test
public void testReorder() throws Exception {
ExpressionExperiment old = eeService.findByShortName("GSE404");
if (old != null) {
eeService.remove(old);
}
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse404Short")));
@SuppressWarnings("unchecked") Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE404", false, true, false);
this.ee = results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
this.ee = (ExpressionExperiment) e.getData();
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance();
factor.setType(FactorType.CATEGORICAL);
factor.setName(ee.getShortName() + " design");
factor.setExperimentalDesign(ee.getExperimentalDesign());
factor = eeService.addFactor(ee, factor);
FactorValue fv1 = FactorValue.Factory.newInstance();
FactorValue fv2 = FactorValue.Factory.newInstance();
fv1.setValue("foo");
fv1.setExperimentalFactor(factor);
fv2.setValue("bar");
fv2.setIsBaseline(true);
fv2.setExperimentalFactor(factor);
eeService.addFactorValue(ee, fv1);
eeService.addFactorValue(ee, fv2);
List<BioAssay> basInOrder = new ArrayList<>(ee.getBioAssays());
Collections.sort(basInOrder, new Comparator<BioAssay>() {
@Override
public int compare(BioAssay o1, BioAssay o2) {
return o1.getId().compareTo(o2.getId());
}
});
int i = 0;
for (BioAssay ba : basInOrder) {
// bioAssayService.thawRawAndProcessed( ba );
BioMaterial bm = ba.getSampleUsed();
assert fv1.getId() != null;
if (!bm.getFactorValues().isEmpty()) {
continue;
}
if (i % 2 == 0) {
bm.getFactorValues().add(fv1);
// log.info( bm + " " + bm.getId() + " => " + fv1 );
} else {
bm.getFactorValues().add(fv2);
// log.info( bm + " " + bm.getId() + " => " + fv2 );
}
bioMaterialService.update(bm);
i++;
}
factor = this.experimentalFactorService.load(factor.getId());
assertEquals(2, factor.getFactorValues().size());
/*
* All that was setup. Now do the interesting bit
*/
processedExpressionDataVectorService.reorderByDesign(ee.getId());
/*
* Now check the vectors...
*/
Collection<ProcessedExpressionDataVector> resortedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
// ExpressionDataDoubleMatrix newMat = new ExpressionDataDoubleMatrix( resortedVectors );
// log.info( newMat );
boolean foundVector = false;
assertTrue(resortedVectors.size() > 0);
for (ProcessedExpressionDataVector vector : resortedVectors) {
i = 0;
log.debug(vector.getDesignElement().getName() + " .........................");
// thawingto avoid lazy error because we are outside of transaction in this test. All references in code run
// inside a transaction
BioAssayDimension bioAssayDimension = vector.getBioAssayDimension();
bioAssayDimensionService.thawLite(bioAssayDimension);
Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
for (BioAssay ba : bioAssays) {
BioMaterial bm = ba.getSampleUsed();
assertEquals(1, bm.getFactorValues().size());
FactorValue fv = bm.getFactorValues().iterator().next();
assertNotNull(fv.getId());
log.debug(ba.getId() + " " + fv.getId() + " " + fv);
if (i < 10) {
// first because it is baseline;
assertEquals(fv2, fv);
}
i++;
}
/*
* spot check the data, same place as before.
*/
if (vector.getDesignElement().getName().equals("40")) {
foundVector = true;
ByteArrayConverter conv = new ByteArrayConverter();
Double[] d = ArrayUtils.toObject(conv.byteArrayToDoubles(vector.getData()));
assertEquals(20, d.length);
assertEquals(-0.08, d[1], 0.001);
assertEquals(0.45, d[10], 0.001);
assertEquals(Double.NaN, d[19], 0.001);
}
}
assertTrue("test vector not found", foundVector);
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class ProcessedExpressionDataCreateServiceTest method testComputeDevRankForExpressionExperimentMultiArrayWithGaps.
/**
* Three platforms, one sample was not run on GPL81. It's 'Norm-1a', but the name we use for the sample is random.
*/
@SuppressWarnings("unchecked")
@Test
public void testComputeDevRankForExpressionExperimentMultiArrayWithGaps() throws Exception {
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse482short")));
Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE482", false, true, false);
this.ee = results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
this.ee = ((Collection<ExpressionExperiment>) e.getData()).iterator().next();
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
Collection<ProcessedExpressionDataVector> preferredVectors = this.processedExpressionDataVectorService.getProcessedDataVectors(ee);
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.thaw(preferredVectors);
ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(preferredVectors);
assertEquals(10, mat.columns());
boolean found = false;
for (int i = 0; i < mat.rows(); i++) {
Double[] row = mat.getRow(i);
// debugging
if (i == 0) {
for (int j = 0; j < row.length; j++) {
BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
System.err.println(ba.getName());
}
}
System.err.print(mat.getRowElement(i).getDesignElement().getName() + "\t");
for (double d : row) {
System.err.print(String.format("%4.2f\t", d));
}
System.err.print("\n");
CompositeSequence el = mat.getDesignElementForRow(i);
for (int j = 0; j < row.length; j++) {
BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
if (ba.getName().matches("PGA-MurLungHyper-Norm-1a[ABC]v2-s2") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
assertEquals(Double.NaN, row[j], 0.0001);
found = true;
} else {
assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
}
}
}
assertTrue(found);
/*
* Now do this through the processedExpressionDataVectorService
*/
Collection<DoubleVectorValueObject> da = this.processedExpressionDataVectorService.getProcessedDataArrays(ee);
assertEquals(30, da.size());
found = false;
boolean first = true;
for (DoubleVectorValueObject v : da) {
CompositeSequenceValueObject el = v.getDesignElement();
double[] row = v.getData();
// debugging
if (first) {
for (int j = 0; j < row.length; j++) {
BioAssayValueObject ba = v.getBioAssays().get(j);
System.err.println(ba.getName());
}
first = false;
}
System.err.print(el.getName() + "\t");
for (double d : row) {
System.err.print(String.format("%4.2f\t", d));
}
System.err.print("\n");
assertEquals(10, row.length);
for (int j = 0; j < row.length; j++) {
assertNotNull(v.getBioAssays());
BioAssayValueObject ba = v.getBioAssays().get(j);
if (ba.getName().startsWith("Missing bioassay for biomaterial") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
assertEquals(Double.NaN, row[j], 0.0001);
found = true;
} else {
assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
}
}
}
assertTrue(found);
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class ProcessedExpressionDataCreateServiceTest method testComputeDevRankForExpressionExperimentB.
@SuppressWarnings("unchecked")
@Test
public void testComputeDevRankForExpressionExperimentB() throws Exception {
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("GSE5949short")));
Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE5949", false, true, false);
this.ee = results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
this.ee = ((Collection<ExpressionExperiment>) e.getData()).iterator().next();
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
Collection<ProcessedExpressionDataVector> preferredVectors = this.processedExpressionDataVectorService.getProcessedDataVectors(ee);
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
int numQts = ee.getQuantitationTypes().size();
for (ProcessedExpressionDataVector d : preferredVectors) {
assertTrue(d.getQuantitationType().getIsMaskedPreferred());
assertTrue(ee.getQuantitationTypes().contains(d.getQuantitationType()));
assertNotNull(d.getRankByMean());
assertNotNull(d.getRankByMax());
}
assertNotNull(ee.getNumberOfDataVectors());
ExpressionExperimentValueObject s = expressionExperimentReportService.generateSummary(ee.getId());
assertNotNull(s);
assertEquals(ee.getNumberOfDataVectors(), s.getProcessedExpressionVectorCount());
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
// repeat, make sure deleted old QTs.
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
assertEquals(numQts, ee.getQuantitationTypes().size());
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class VectorMergingServiceTest method test.
@Test
public final void test() throws Exception {
/*
* Need a persistent experiment that uses multiple array designs. Then merge the designs, switch the vectors,
* and merge the vectors. GSE3443
*/
/*
* The experiment uses the following GPLs
*
* GPL2868, GPL2933, GPL2934, GPL2935, GPL2936, GPL2937, GPL2938
*
* Example of a sequence appearing on more than one platform: N57553
*/
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse3443merge")));
Collection<?> results = geoService.fetchAndLoad("GSE3443", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
ee = this.eeService.thawLite(ee);
Collection<ArrayDesign> aas = eeService.getArrayDesignsUsed(ee);
assertEquals(7, aas.size());
/*
* Check number of sequences across all platforms. This is how many elements we need on the new platform, plus
* extras for duplicated sequences (e.g. elements that don't have a sequence...)
*/
Collection<ArrayDesign> taas = new HashSet<>();
Set<BioSequence> oldbs = new HashSet<>();
for (ArrayDesign arrayDesign : aas) {
arrayDesign = arrayDesignService.thaw(arrayDesign);
taas.add(arrayDesign);
for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
log.info(cs + " " + cs.getBiologicalCharacteristic());
oldbs.add(cs.getBiologicalCharacteristic());
}
}
assertEquals(63, oldbs.size());
/*
* Check total size of elements across all 7 platforms.
*/
int totalElements = 0;
for (ArrayDesign arrayDesign : taas) {
totalElements += arrayDesign.getCompositeSequences().size();
}
assertEquals(140, totalElements);
ArrayDesign firstaa = taas.iterator().next();
aas.remove(firstaa);
assertEquals(null, firstaa.getMergedInto());
mergedAA = arrayDesignMergeService.merge(firstaa, taas, "testMerge" + RandomStringUtils.randomAlphabetic(5), "merged" + RandomStringUtils.randomAlphabetic(5), false);
assertEquals(72, mergedAA.getCompositeSequences().size());
Set<BioSequence> seenBs = new HashSet<>();
for (CompositeSequence cs : mergedAA.getCompositeSequences()) {
seenBs.add(cs.getBiologicalCharacteristic());
}
assertEquals(63, seenBs.size());
// just to make this explicit. The new array design has to contain all the old sequences.
assertEquals(oldbs.size(), seenBs.size());
ee = eeService.thaw(ee);
assertEquals(1828, ee.getRawExpressionDataVectors().size());
ee = eePlatformSwitchService.switchExperimentToArrayDesign(ee, mergedAA);
ee = eeService.thaw(ee);
// check we actually got switched over.
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(mergedAA, ba.getArrayDesignUsed());
}
for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
assertEquals(mergedAA, v.getDesignElement().getArrayDesign());
}
assertEquals(15, ee.getQuantitationTypes().size());
assertEquals(1828, ee.getRawExpressionDataVectors().size());
ee = vectorMergingService.mergeVectors(ee);
// check we got the right processed data
Collection<ProcessedExpressionDataVector> pvs = processedExpressionDataVectorService.getProcessedDataVectors(ee);
assertEquals(72, pvs.size());
ee = eeService.thaw(ee);
Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee, 50);
assertEquals(50, processedDataArrays.size());
}
Aggregations