use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class SVDServiceHelperImpl method svd.
@Override
public SVDValueObject svd(ExpressionExperiment ee) {
assert ee != null;
Collection<ProcessedExpressionDataVector> vectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
if (vectors.isEmpty()) {
throw new IllegalArgumentException("Experiment must have processed data already to do SVD");
}
processedExpressionDataVectorService.thaw(vectors);
ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(vectors);
SVDServiceHelperImpl.log.info("Starting SVD");
ExpressionDataSVD svd = new ExpressionDataSVD(mat);
SVDServiceHelperImpl.log.info("SVD done, postprocessing and storing results.");
/*
* Save the results
*/
DoubleMatrix<Integer, BioMaterial> v = svd.getV();
BioAssayDimension b = mat.getBestBioAssayDimension();
PrincipalComponentAnalysis pca = this.updatePca(ee, svd, v, b);
return this.svdFactorAnalysis(pca);
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class GeoDatasetServiceTest method testFetchAndLoadGSE9048.
@Test
public void testFetchAndLoadGSE9048() throws Exception {
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
Collection<?> results = geoService.fetchAndLoad("GSE9048", false, true, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
log.info("Test skipped because GSE9048 was already loaded - clean the DB before running the test");
return;
}
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
aclTestUtils.checkEEAcls(ee);
Collection<QuantitationType> qts = eeService.getQuantitationTypes(ee);
assertEquals(16, qts.size());
twoChannelMissingValues.computeMissingValues(ee);
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
qts = eeService.getQuantitationTypes(ee);
// 16 that were imported plus the detection call we added.
assertEquals(17, qts.size());
Collection<ProcessedExpressionDataVector> dataVectors = processedExpressionDataVectorService.computeProcessedExpressionData(ee);
assertEquals(10, dataVectors.size());
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
qts = eeService.getQuantitationTypes(ee);
assertEquals(18, qts.size());
File f = dataFileService.writeOrLocateDataFile(ee, true, true);
assertTrue(f.canRead());
assertTrue(f.length() > 0);
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class GeoDatasetServiceTest method testFetchAndLoadGSE18707.
/*
* For bug 2312 - qts getting dropped.
*/
@Test
public void testFetchAndLoadGSE18707() throws Exception {
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
Collection<?> results = geoService.fetchAndLoad("GSE18707", false, true, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
log.info("Test skipped because GSE18707 was already loaded - clean the DB before running the test");
return;
}
// Mouse430A_2.
ee = eeService.findByShortName("GSE18707");
aclTestUtils.checkEEAcls(ee);
Collection<QuantitationType> qts = eeService.getQuantitationTypes(ee);
assertEquals(1, qts.size());
QuantitationType qt = qts.iterator().next();
assertEquals("Processed Affymetrix Rosetta intensity values", qt.getDescription());
Collection<ProcessedExpressionDataVector> dataVectors = processedExpressionDataVectorService.computeProcessedExpressionData(ee);
assertEquals(100, dataVectors.size());
ee = eeService.findByShortName("GSE18707");
qts = eeService.getQuantitationTypes(ee);
assertEquals(2, qts.size());
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class BaseAnalyzerConfigurationTest method configureVectors.
void configureVectors(List<BioMaterial> bioMaterials, String resourcePath) throws Exception {
this.vectors = new HashSet<>();
DoubleMatrixReader r = new DoubleMatrixReader();
String path;
if (resourcePath == null) {
path = "/data/stat-tests/anova-test-data.txt";
} else {
path = resourcePath;
}
DoubleMatrix<String, String> dataMatrix = r.read(this.getClass().getResourceAsStream(path));
// RandomData randomData = new RandomDataImpl( new MersenneTwister( 0 ) ); // fixed seed - important!
Collection<CompositeSequence> compositeSequences = new HashSet<>();
for (int i = 0; i < BaseAnalyzerConfigurationTest.NUM_DESIGN_ELEMENTS; i++) {
ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
vector.setBioAssayDimension(bioAssayDimension);
vector.setQuantitationType(quantitationType);
CompositeSequence cs = CompositeSequence.Factory.newInstance();
cs.setName(dataMatrix.getRowName(i));
cs.setId(i + 1000L);
cs.setArrayDesign(arrayDesign);
vector.setDesignElement(cs);
vector.setId(i + 10000L);
double[] dvals = new double[bioMaterials.size()];
for (int j = 0; j < dvals.length; j++) {
dvals[j] = dataMatrix.get(i, j);
}
byte[] bvals = bac.doubleArrayToBytes(dvals);
vector.setData(bvals);
vectors.add(vector);
compositeSequences.add(cs);
}
expressionExperiment.setProcessedExpressionDataVectors(vectors);
arrayDesign.setCompositeSequences(compositeSequences);
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class ExpressionExperimentPlatformSwitchService method runOldAd.
private void runOldAd(ExpressionExperiment ee, ArrayDesign arrayDesign, Map<BioSequence, Collection<CompositeSequence>> designElementMap, BioAssayDimension maxBAD, Map<CompositeSequence, Collection<BioAssayDimension>> usedDesignElements, ArrayDesign oldAd) {
if (oldAd.equals(arrayDesign))
return;
oldAd = arrayDesignService.thaw(oldAd);
if (oldAd.getCompositeSequences().size() == 0 && !oldAd.getTechnologyType().equals(TechnologyType.NONE)) {
/*
* Bug 3451 - this is okay if it is a RNA-seq experiment etc. prior to data upload.
*/
throw new IllegalStateException(oldAd + " has no elements");
}
Collection<QuantitationType> qts = expressionExperimentService.getQuantitationTypes(ee, oldAd);
ExpressionExperimentPlatformSwitchService.log.info("Processing " + qts.size() + " quantitation types for vectors on " + oldAd);
for (QuantitationType type : qts) {
// use each design element only once per quantitation type + bioassaydimension per array design
usedDesignElements.clear();
Collection<RawExpressionDataVector> rawForQt = this.getRawVectorsForOneQuantitationType(oldAd, type);
Collection<ProcessedExpressionDataVector> processedForQt = this.getProcessedVectorsForOneQuantitationType(oldAd, type);
if (//
(rawForQt == null || rawForQt.size() == 0) && (processedForQt == null || processedForQt.size() == 0)) {
/*
* This can happen when the quantitation types vary for the array designs.
*/
ExpressionExperimentPlatformSwitchService.log.debug("No vectors for " + type + " on " + oldAd);
continue;
}
// This check assures we do not mix raw and processed vectors further down the line
if ((rawForQt != null && rawForQt.size() > 0) && (processedForQt != null && processedForQt.size() > 0)) {
throw new IllegalStateException("Two types of vector for quantitationType " + type);
}
Collection<DesignElementDataVector> vectors = new HashSet<>();
if (rawForQt != null) {
vectors.addAll(rawForQt);
}
if (processedForQt != null) {
vectors.addAll(processedForQt);
}
ExpressionExperimentPlatformSwitchService.log.info("Switching " + vectors.size() + " vectors for " + type + " from " + oldAd.getShortName() + " to " + arrayDesign.getShortName());
int count = 0;
// noinspection MismatchedQueryAndUpdateOfCollection // Only used for logging
Collection<DesignElementDataVector> unMatched = new HashSet<>();
for (DesignElementDataVector vector : vectors) {
assert RawExpressionDataVector.class.isAssignableFrom(vector.getClass()) : "Unexpected class: " + vector.getClass().getName();
CompositeSequence oldDe = vector.getDesignElement();
if (oldDe.getArrayDesign().equals(arrayDesign)) {
continue;
}
this.processVector(designElementMap, usedDesignElements, vector, maxBAD);
if (++count % 20000 == 0) {
ExpressionExperimentPlatformSwitchService.log.info("Found matches for " + count + " vectors for " + type);
}
}
/*
* This is bad.
*/
if (unMatched.size() > 0) {
throw new IllegalStateException("There were " + unMatched.size() + " vectors that couldn't be matched to the new design for: " + type + ", example: " + unMatched.iterator().next());
}
// Force collection update
if (rawForQt != null && rawForQt.size() > 0) {
int s = ee.getRawExpressionDataVectors().size();
ee.getRawExpressionDataVectors().removeAll(rawForQt);
assert s > ee.getRawExpressionDataVectors().size();
ee.getRawExpressionDataVectors().addAll(rawForQt);
assert s == ee.getRawExpressionDataVectors().size();
} else if (processedForQt != null && processedForQt.size() > 0) {
int s = ee.getProcessedExpressionDataVectors().size();
ee.getProcessedExpressionDataVectors().removeAll(processedForQt);
assert s > ee.getProcessedExpressionDataVectors().size();
ee.getProcessedExpressionDataVectors().addAll(processedForQt);
assert s == ee.getProcessedExpressionDataVectors().size();
}
}
}
Aggregations