use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method mergeVectors.
@Override
public ExpressionExperiment mergeVectors(ExpressionExperiment ee) {
Collection<ArrayDesign> arrayDesigns = expressionExperimentService.getArrayDesignsUsed(ee);
if (arrayDesigns.size() > 1) {
throw new IllegalArgumentException("Cannot cope with more than one platform; switch experiment to use a (merged) platform first");
}
ee = expressionExperimentService.thaw(ee);
Collection<QuantitationType> qts = expressionExperimentService.getQuantitationTypes(ee);
VectorMergingServiceImpl.log.info(qts.size() + " quantitation types for potential merge");
/*
* Load all the bioassay dimensions, which will be merged.
*/
Collection<BioAssayDimension> allOldBioAssayDims = new HashSet<>();
for (BioAssay ba : ee.getBioAssays()) {
Collection<BioAssayDimension> oldBioAssayDims = bioAssayService.findBioAssayDimensions(ba);
for (BioAssayDimension bioAssayDim : oldBioAssayDims) {
if (bioAssayDim.getDescription().startsWith(VectorMergingServiceImpl.MERGED_DIM_DESC_PREFIX)) {
// not foolproof, but avoids some artifacts - e.g. if there were previous failed attempts at this.
continue;
}
allOldBioAssayDims.add(bioAssayDim);
}
}
if (allOldBioAssayDims.size() == 0) {
throw new IllegalStateException("No bioAssayDimensions found to merge (previously merged ones are filtered, data may be corrupt?");
}
if (allOldBioAssayDims.size() == 1) {
VectorMergingServiceImpl.log.warn("Experiment already has only a single bioAssayDimension, nothing seems to need merging. Bailing");
return ee;
}
VectorMergingServiceImpl.log.info(allOldBioAssayDims.size() + " bioAssayDimensions to merge");
List<BioAssayDimension> sortedOldDims = this.sortedBioAssayDimensions(allOldBioAssayDims);
BioAssayDimension newBioAd = this.getNewBioAssayDimension(sortedOldDims);
int totalBioAssays = newBioAd.getBioAssays().size();
assert totalBioAssays == ee.getBioAssays().size() : "experiment has " + ee.getBioAssays().size() + " but new bioAssayDimension has " + totalBioAssays;
Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = this.getVectors(ee, qts, allOldBioAssayDims);
/*
* This will run into problems if there are excess quantitation types
*/
int numSuccessfulMergers = 0;
for (QuantitationType type : qt2Vec.keySet()) {
Collection<RawExpressionDataVector> oldVecs = qt2Vec.get(type);
if (oldVecs.isEmpty()) {
VectorMergingServiceImpl.log.warn("No vectors for " + type);
continue;
}
Map<CompositeSequence, Collection<RawExpressionDataVector>> deVMap = this.getDevMap(oldVecs);
if (deVMap == null) {
VectorMergingServiceImpl.log.info("Vector merging will not be done for " + type + " as there is only one vector per element already");
continue;
}
VectorMergingServiceImpl.log.info("Processing " + oldVecs.size() + " vectors for " + type);
Collection<RawExpressionDataVector> newVectors = new HashSet<>();
int numAllMissing = 0;
int missingValuesForQt = 0;
for (CompositeSequence de : deVMap.keySet()) {
RawExpressionDataVector vector = this.initializeNewVector(ee, newBioAd, type, de);
Collection<RawExpressionDataVector> dedvs = deVMap.get(de);
/*
* these ugly nested loops are to ENSURE that we get the vector reconstructed properly. For each of the
* old bioassayDimensions, find the designElementDataVector that uses it. If there isn't one, fill in
* the values for that dimension with missing data. We go through the dimensions in the same order that
* we joined them up.
*/
List<Object> data = new ArrayList<>();
int totalMissingInVector = this.makeMergedData(sortedOldDims, newBioAd, type, de, dedvs, data);
missingValuesForQt += totalMissingInVector;
if (totalMissingInVector == totalBioAssays) {
numAllMissing++;
// we don't save data that is all missing.
continue;
}
if (data.size() != totalBioAssays) {
throw new IllegalStateException("Wrong number of values for " + de + " / " + type + ", expected " + totalBioAssays + ", got " + data.size());
}
byte[] newDataAr = converter.toBytes(data.toArray());
vector.setData(newDataAr);
newVectors.add(vector);
}
// TRANSACTION
vectorMergingHelperService.persist(ee, type, newVectors);
if (numAllMissing > 0) {
VectorMergingServiceImpl.log.info(numAllMissing + " vectors had all missing values and were junked for " + type);
}
if (missingValuesForQt > 0) {
VectorMergingServiceImpl.log.info(missingValuesForQt + " total missing values: " + type);
}
VectorMergingServiceImpl.log.info("Removing " + oldVecs.size() + " old vectors for " + type);
rawExpressionDataVectorService.remove(oldVecs);
ee.getRawExpressionDataVectors().removeAll(oldVecs);
numSuccessfulMergers++;
}
if (numSuccessfulMergers == 0) {
/*
* Try to clean up
*/
this.bioAssayDimensionService.remove(newBioAd);
throw new IllegalStateException("Nothing was merged. Maybe all the vectors are effectively merged already");
}
expressionExperimentService.update(ee);
// Several transactions
this.cleanUp(ee, allOldBioAssayDims, newBioAd);
// transaction
this.audit(ee, "Vector merging performed, merged " + allOldBioAssayDims + " old bioassay dimensions for " + qts.size() + " quantitation types.");
// several transactions
try {
preprocessorService.process(ee);
} catch (PreprocessingException e) {
VectorMergingServiceImpl.log.error("Error during postprocessing", e);
}
return ee;
}
use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceImpl method checkCorrectability.
@Override
public boolean checkCorrectability(ExpressionExperiment ee, boolean force) {
for (QuantitationType qt : expressionExperimentService.getQuantitationTypes(ee)) {
if (qt.getIsBatchCorrected()) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment already has a batch-corrected quantitation type: " + ee + ": " + qt);
return false;
}
}
ExperimentalFactor batch = this.getBatchFactor(ee);
if (batch == null) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("No batch factor found: " + ee);
return false;
}
String bConf = expressionExperimentService.getBatchConfound(ee);
if (bConf != null && !force) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment can not be batch corrected: " + bConf);
ExpressionExperimentBatchCorrectionServiceImpl.log.info("To force batch-correction of a confounded experiment, use the force option (note, that this option also allows outliers while batch correcting).");
return false;
}
/*
* Make sure we have at least two samples per batch. This generally won't happen if batches were defined by
* Gemma.
*/
Map<Long, Integer> batches = new HashMap<>();
Set<BioMaterial> seen = new HashSet<>();
for (BioAssay ba : ee.getBioAssays()) {
BioMaterial bm = ba.getSampleUsed();
if (seen.contains(bm))
continue;
seen.add(bm);
for (FactorValue fv : bm.getFactorValues()) {
if (fv.getExperimentalFactor().equals(batch)) {
Long batchId = fv.getId();
if (!batches.containsKey(batchId))
batches.put(batchId, 0);
batches.put(batchId, batches.get(batchId) + 1);
}
}
}
/*
* consider merging batches. - we already do this when we create the batch factor, so in general batches should
* always have at least 2 samples
*/
for (Long batchId : batches.keySet()) {
if (batches.get(batchId) < 2) {
ExpressionExperimentBatchCorrectionServiceImpl.log.info("Batch with only one sample detected, correction not possible: " + ee + ", batchId=" + batchId);
return false;
}
}
return true;
}
use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceImpl method doComBat.
private ExpressionDataDoubleMatrix doComBat(ExpressionExperiment ee, ExpressionDataDoubleMatrix originalDataMatrix, ObjectMatrix<BioMaterial, ExperimentalFactor, Object> design) {
ObjectMatrix<BioMaterial, String, Object> designU = this.convertFactorValuesToStrings(design);
DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.getMatrix();
designU = this.orderMatrix(matrix, designU);
ScaleType scale = originalDataMatrix.getQuantitationTypes().iterator().next().getScale();
boolean transformed = false;
if (!(scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOGBASEUNKNOWN) || scale.equals(ScaleType.LN))) {
ExpressionExperimentBatchCorrectionServiceImpl.log.info(" *** COMBAT: LOG TRANSFORMING ***");
transformed = true;
MatrixStats.logTransform(matrix);
}
/*
* Process
*/
ComBat<CompositeSequence, BioMaterial> comBat = new ComBat<>(matrix, designU);
// false: NONPARAMETRIC
DoubleMatrix2D results = comBat.run(true);
// note these plots always reflect the parametric setup.
// TEMPORARY?
comBat.plot(ee.getId() + "." + FileTools.cleanForFileName(ee.getShortName()));
/*
* Postprocess. Results is a raw matrix/
*/
DoubleMatrix<CompositeSequence, BioMaterial> correctedDataMatrix = new DenseDoubleMatrix<>(results.toArray());
correctedDataMatrix.setRowNames(matrix.getRowNames());
correctedDataMatrix.setColumnNames(matrix.getColNames());
if (transformed) {
MatrixStats.unLogTransform(correctedDataMatrix);
}
ExpressionDataDoubleMatrix correctedExpressionDataMatrix = new ExpressionDataDoubleMatrix(originalDataMatrix, correctedDataMatrix);
assert correctedExpressionDataMatrix.getQuantitationTypes().size() == 1;
/*
* It is easier if we make a new quantitationtype.
*/
QuantitationType oldQt = correctedExpressionDataMatrix.getQuantitationTypes().iterator().next();
QuantitationType newQt = this.makeNewQuantitationType(oldQt);
correctedExpressionDataMatrix.getQuantitationTypes().clear();
correctedExpressionDataMatrix.getQuantitationTypes().add(newQt);
// Sanity check...
for (int i = 0; i < correctedExpressionDataMatrix.columns(); i++) {
assert correctedExpressionDataMatrix.getBioMaterialForColumn(i).equals(originalDataMatrix.getBioMaterialForColumn(i));
}
return correctedExpressionDataMatrix;
}
use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method regressionResiduals.
/**
* @param matrix on which to perform regression.
* @param config containing configuration of factors to include. Any interactions or subset configuration is
* ignored. Data are <em>NOT</em> log transformed unless they come in that way. (the qValueThreshold will be
* ignored)
* @param retainScale if true, the data retain the global mean (intercept)
* @return residuals from the regression.
*/
@Override
public ExpressionDataDoubleMatrix regressionResiduals(ExpressionDataDoubleMatrix matrix, DifferentialExpressionAnalysisConfig config, boolean retainScale) {
if (config.getFactorsToInclude().isEmpty()) {
LinearModelAnalyzer.log.warn("No factors");
return matrix;
}
/*
* Note that this method relies on similar code to doAnalysis, for the setup stages.
*/
List<ExperimentalFactor> factors = config.getFactorsToInclude();
List<BioMaterial> samplesUsed = ExperimentalDesignUtils.getOrderedSamples(matrix, factors);
Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
DesignMatrix properDesignMatrix = new DesignMatrix(designMatrix, true);
ExpressionDataDoubleMatrix dmatrix = new ExpressionDataDoubleMatrix(samplesUsed, matrix);
DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
// perform weighted least squares regression on COUNT data
QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
LeastSquaresFit fit;
if (quantitationType.getScale().equals(ScaleType.COUNT)) {
LinearModelAnalyzer.log.info("Calculating residuals of weighted least squares regression on COUNT data");
// note: data is not log transformed
DoubleMatrix1D librarySize = MatrixStats.colSums(sNamedMatrix);
MeanVarianceEstimator mv = new MeanVarianceEstimator(properDesignMatrix, sNamedMatrix, librarySize);
fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix, mv.getWeights());
} else {
fit = new LeastSquaresFit(properDesignMatrix, sNamedMatrix);
}
DoubleMatrix2D residuals = fit.getResiduals();
if (retainScale) {
DoubleMatrix1D intercept = fit.getCoefficients().viewRow(0);
for (int i = 0; i < residuals.rows(); i++) {
residuals.viewRow(i).assign(Functions.plus(intercept.get(i)));
}
}
DoubleMatrix<CompositeSequence, BioMaterial> f = new DenseDoubleMatrix<>(residuals.toArray());
f.setRowNames(dmatrix.getMatrix().getRowNames());
f.setColumnNames(dmatrix.getMatrix().getColNames());
return new ExpressionDataDoubleMatrix(dmatrix, f);
}
use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class PreprocessorServiceImpl method checkQuantitationType.
private void checkQuantitationType(ExpressionDataDoubleMatrix correctedData) {
Collection<QuantitationType> qts = correctedData.getQuantitationTypes();
assert !qts.isEmpty();
if (qts.size() > 1) {
throw new IllegalArgumentException("Only support a single quantitation type");
}
QuantitationType batchCorrectedQt = qts.iterator().next();
assert batchCorrectedQt.getIsBatchCorrected();
}
Aggregations