use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorCreateTaskImpl method execute.
@Override
public TaskResult execute() {
ExpressionExperiment ee = taskCommand.getExpressionExperiment();
Collection<ProcessedExpressionDataVector> processedVectors;
if (taskCommand.isCorrelationMatrixOnly()) {
// only create them if necessary. This is sort of stupid, it's just so I didn't have to create a whole other
// task for the correlation matrix computation.
processedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
if (processedVectors.isEmpty()) {
processedVectors = processedExpressionDataVectorService.computeProcessedExpressionData(ee);
}
} else {
processedVectors = processedExpressionDataVectorService.computeProcessedExpressionData(ee);
}
coexpressionMatrixService.create(ee);
return new TaskResult(taskCommand, processedVectors.size());
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixTest method testMatrixConversion.
@Test
public void testMatrixConversion() throws Exception {
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("")));
Collection<?> results = geoService.fetchAndLoad("GSE8294", false, true, false);
newee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
newee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
}
newee = expressionExperimentService.thaw(newee);
// make sure we really thawRawAndProcessed them, so we can get the design element sequences.
Collection<RawExpressionDataVector> vectors = newee.getRawExpressionDataVectors();
rawExpressionDataVectorService.thaw(vectors);
ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(vectors);
ExpressionDataDoubleMatrix matrix = builder.getPreferredData();
assertTrue(!Double.isNaN(matrix.get(10, 0)));
assertEquals(66, matrix.rows());
assertEquals(9, matrix.columns());
/*
* Additional tests for files and outlier marking.
*/
processedDataVectorService.computeProcessedExpressionData(newee);
File f1 = expressionDataFileService.writeOrLocateDataFile(expressionExperimentService.load(newee.getId()), true, true);
assertNotNull(f1);
assertTrue(f1.exists());
expressionDataFileService.deleteAllFiles(newee);
assertTrue(!f1.exists());
/*
* outlier removal.
*/
BioAssay tba = newee.getBioAssays().iterator().next();
Collection<BioAssay> ol = new HashSet<>();
ol.add(tba);
sampleRemoveService.markAsMissing(ol);
assertTrue(tba.getIsOutlier());
newee = expressionExperimentService.thaw(newee);
Collection<ProcessedExpressionDataVector> vecs = newee.getProcessedExpressionDataVectors();
this.processedDataVectorService.thaw(vecs);
assertTrue(!vecs.isEmpty());
ExpressionDataMatrixBuilder matrixBuilder = new ExpressionDataMatrixBuilder(vecs);
ExpressionDataDoubleMatrix data = matrixBuilder.getProcessedData();
assertNotNull(data);
assertTrue(Double.isNaN(data.getColumn(tba)[10]));
sampleRemoveService.unmarkAsMissing(ol);
newee = expressionExperimentService.load(newee.getId());
newee = expressionExperimentService.thaw(newee);
vecs = newee.getProcessedExpressionDataVectors();
this.processedDataVectorService.thaw(vecs);
assertTrue(!vecs.isEmpty());
matrixBuilder = new ExpressionDataMatrixBuilder(vecs);
data = matrixBuilder.getProcessedData();
assertTrue(!tba.getIsOutlier());
assertTrue(!Double.isNaN(data.getColumn(tba)[10]));
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class PreprocessorServiceImpl method batchCorrect.
@Override
public void batchCorrect(ExpressionExperiment ee, boolean force) throws PreprocessingException {
String note = "ComBat batch correction";
String detail = null;
/*
* This leaves the raw data alone; it updates the processed data.
*/
this.checkArrayDesign(ee);
ee = expressionExperimentService.thawLite(ee);
this.checkCorrectable(ee, force);
/*
* If there are predicted outliers, but which we decide are okay, we just go ahead.
*/
if (!force) {
this.checkOutliers(ee);
} else {
note = "[Forced]" + note;
detail = "Batch correction skipped outlier check.";
PreprocessorServiceImpl.log.warn(detail);
}
try {
Collection<ProcessedExpressionDataVector> vecs = this.getProcessedExpressionDataVectors(ee);
// TODO log-transform if not already, update QT. See https://github.com/PavlidisLab/Gemma/issues/50
ExpressionDataDoubleMatrix correctedData = this.getCorrectedData(ee, vecs);
// Convert to vectors
processedExpressionDataVectorService.createProcessedDataVectors(ee, correctedData.toProcessedDataVectors());
AuditEventType eventType = BatchCorrectionEvent.Factory.newInstance();
String bConf = expressionExperimentService.getBatchConfound(ee);
if (bConf != null && force) {
String add = "Batch correction forced over a detected confound: " + bConf;
// noinspection ConstantConditions // That is simply false.
detail = (detail == null) ? add : detail + "\n" + add;
}
auditTrailService.addUpdateEvent(ee, eventType, note, detail);
this.removeInvalidatedData(ee);
this.processExceptForVectorCreate(ee);
} catch (Exception e) {
throw new PreprocessingException(e);
}
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorCreateHelperServiceImpl method reorderByDesign.
@Override
@Transactional
public void reorderByDesign(Long eeId) {
ExpressionExperiment ee = expressionExperimentDao.load(eeId);
if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have a populated experimental design, skipping");
return;
}
Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors();
if (processedDataVectors.size() == 0) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have processed data");
return;
}
Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee);
if (dims.size() > 1) {
this.checkAllBioAssayDimensionsMatch(dims);
}
BioAssayDimension bioassaydim = dims.iterator().next();
List<BioMaterial> start = new ArrayList<>();
for (BioAssay ba : bioassaydim.getBioAssays()) {
start.add(ba.getSampleUsed());
}
/*
* Get the ordering we want.
*/
List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors());
/*
* Map of biomaterials to the new order index.
*/
final Map<BioMaterial, Integer> ordering = new HashMap<>();
int i = 0;
for (BioMaterial bioMaterial : orderByExperimentalDesign) {
ordering.put(bioMaterial, i);
i++;
}
/*
* Map of the original order to new order of bioassays.
*/
Map<Integer, Integer> indexes = new HashMap<>();
Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>();
for (BioAssayDimension bioAssayDimension : dims) {
Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
assert bioAssays != null;
/*
* Initialize the new bioassay list.
*/
List<BioAssay> resorted = new ArrayList<>(bioAssays.size());
for (int m = 0; m < bioAssays.size(); m++) {
resorted.add(null);
}
for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) {
BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex);
BioMaterial sam1 = bioAssay.getSampleUsed();
if (ordering.containsKey(sam1)) {
Integer newIndex = ordering.get(sam1);
resorted.set(newIndex, bioAssay);
/*
* Should be the same for all dimensions....
*/
assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex);
indexes.put(oldIndex, newIndex);
} else {
throw new IllegalStateException();
}
}
BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance();
newBioAssayDimension.setBioAssays(resorted);
newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design");
newBioAssayDimension.setDescription("Data was reordered based on the experimental design.");
newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension);
old2new.put(bioAssayDimension, newBioAssayDimension);
}
ByteArrayConverter converter = new ByteArrayConverter();
for (ProcessedExpressionDataVector v : processedDataVectors) {
BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension());
assert revisedBioAssayDimension != null;
double[] data = converter.byteArrayToDoubles(v.getData());
/*
* Put the data in the order of the bioAssayDimension.
*/
Double[] resortedData = new Double[data.length];
for (int k = 0; k < data.length; k++) {
resortedData[k] = data[indexes.get(k)];
}
v.setData(converter.toBytes(resortedData));
v.setBioAssayDimension(revisedBioAssayDimension);
}
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors");
this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design");
}
use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceImpl method comBat.
@Override
public ExpressionDataDoubleMatrix comBat(ExpressionExperiment ee) {
/*
* is there a batch to use?
*/
ExperimentalFactor batch = this.getBatchFactor(ee);
if (batch == null) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("No batch factor found");
return null;
}
/*
* Extract data
*/
Collection<ProcessedExpressionDataVector> vectos = processedExpressionDataVectorService.getProcessedDataVectors(ee);
processedExpressionDataVectorService.thaw(vectos);
ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(vectos);
return this.comBat(mat);
}
Aggregations