use of ubic.gemma.model.common.quantitationtype.ScaleType in project Gemma by PavlidisLab.
the class LoadSimpleExpressionDataCli method configureQuantitationType.
private void configureQuantitationType(String[] fields, SimpleExpressionExperimentMetaData metaData) {
metaData.setQuantitationTypeName(fields[LoadSimpleExpressionDataCli.Q_NAME_I]);
metaData.setQuantitationTypeDescription(fields[LoadSimpleExpressionDataCli.Q_DESCRIPTION_I]);
metaData.setGeneralType(GeneralType.QUANTITATIVE);
StandardQuantitationType sQType = StandardQuantitationType.fromString(fields[LoadSimpleExpressionDataCli.Q_TYPE_I]);
metaData.setType(sQType);
ScaleType sType = ScaleType.fromString(fields[LoadSimpleExpressionDataCli.Q_SCALE_I]);
metaData.setScale(sType);
}
use of ubic.gemma.model.common.quantitationtype.ScaleType in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixUtil method filterAndLog2Transform.
/**
* Log2 transform if necessary, do any required filtering prior to analysis. Count data is converted to log2CPM (but
* we store log2cpm as the processed data, so that is what would generally be used).
*
* @param quantitationType QT
* @param dmatrix matrix
* @return ee data double matrix
*/
public static ExpressionDataDoubleMatrix filterAndLog2Transform(QuantitationType quantitationType, ExpressionDataDoubleMatrix dmatrix) {
ScaleType scaleType = ExpressionDataDoubleMatrixUtil.findScale(quantitationType, dmatrix.getMatrix());
if (scaleType.equals(ScaleType.LOG2)) {
ExpressionDataDoubleMatrixUtil.log.info("Data is already on a log2 scale");
} else if (scaleType.equals(ScaleType.LN)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from ln to log2 **** ");
MatrixStats.convertToLog2(dmatrix.getMatrix(), Math.E);
} else if (scaleType.equals(ScaleType.LOG10)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from log10 to log2 **** ");
MatrixStats.convertToLog2(dmatrix.getMatrix(), 10);
} else if (scaleType.equals(ScaleType.LINEAR)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** LOG TRANSFORMING **** ");
MatrixStats.logTransform(dmatrix.getMatrix());
} else if (scaleType.equals(ScaleType.COUNT)) {
/*
* Since we store log2cpm this shouldn't be reached any more. We don't do it in place.
*/
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from count to log2 counts per million **** ");
DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
DoubleMatrix<CompositeSequence, BioMaterial> log2cpm = MatrixStats.convertToLog2Cpm(dmatrix.getMatrix(), librarySize);
dmatrix = new ExpressionDataDoubleMatrix(dmatrix, log2cpm);
} else {
throw new UnknownLogScaleException("Can't figure out what scale the data are on");
}
/*
* We do this second because doing it first causes some kind of subtle problem ... (round off? I could not
* really track this down).
*
* Remove zero-variance rows, but also rows that have lots of equal values even if variance is non-zero. This
* happens when data is "clipped" (e.g., all values under 10 set to 10).
*/
int r = dmatrix.rows();
dmatrix = ExpressionExperimentFilter.zeroVarianceFilter(dmatrix);
if (dmatrix.rows() < r) {
ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to low variance");
}
r = dmatrix.rows();
if (dmatrix.columns() > ExpressionDataDoubleMatrixUtil.COLUMNS_LIMIT) {
dmatrix = ExpressionExperimentFilter.tooFewDistinctValues(dmatrix, ExpressionDataDoubleMatrixUtil.VALUES_LIMIT);
if (dmatrix.rows() < r) {
ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to too many identical values");
}
}
return dmatrix;
}
use of ubic.gemma.model.common.quantitationtype.ScaleType in project Gemma by PavlidisLab.
the class ExpressionExperimentFilter method isLogTransformed.
/**
* @param eeDoubleMatrix the matrix
* @return true if the data looks like it is already log transformed, false otherwise. This is based on the
* quantitation types and, as a check, looking at the data itself.
*/
private boolean isLogTransformed(ExpressionDataDoubleMatrix eeDoubleMatrix) {
Collection<QuantitationType> quantitationTypes = eeDoubleMatrix.getQuantitationTypes();
for (QuantitationType qt : quantitationTypes) {
ScaleType scale = qt.getScale();
if (scale.equals(ScaleType.LN) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOGBASEUNKNOWN)) {
ExpressionExperimentFilter.log.info("Quantitationtype says the data is already log transformed");
return true;
}
}
if (this.isTwoColor()) {
ExpressionExperimentFilter.log.info("Data is from a two-color array, assuming it is log transformed");
return true;
}
for (int i = 0; i < eeDoubleMatrix.rows(); i++) {
for (int j = 0; j < eeDoubleMatrix.columns(); j++) {
double v = eeDoubleMatrix.get(i, j);
if (v > 20) {
ExpressionExperimentFilter.log.info("Data has large values, doesn't look log transformed");
return false;
}
}
}
ExpressionExperimentFilter.log.info("Data looks log-transformed, but not sure...assuming it is");
return true;
}
use of ubic.gemma.model.common.quantitationtype.ScaleType in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceImpl method doComBat.
private ExpressionDataDoubleMatrix doComBat(ExpressionExperiment ee, ExpressionDataDoubleMatrix originalDataMatrix, ObjectMatrix<BioMaterial, ExperimentalFactor, Object> design) {
ObjectMatrix<BioMaterial, String, Object> designU = this.convertFactorValuesToStrings(design);
DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.getMatrix();
designU = this.orderMatrix(matrix, designU);
ScaleType scale = originalDataMatrix.getQuantitationTypes().iterator().next().getScale();
boolean transformed = false;
if (!(scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOGBASEUNKNOWN) || scale.equals(ScaleType.LN))) {
ExpressionExperimentBatchCorrectionServiceImpl.log.info(" *** COMBAT: LOG TRANSFORMING ***");
transformed = true;
MatrixStats.logTransform(matrix);
}
/*
* Process
*/
ComBat<CompositeSequence, BioMaterial> comBat = new ComBat<>(matrix, designU);
// false: NONPARAMETRIC
DoubleMatrix2D results = comBat.run(true);
// note these plots always reflect the parametric setup.
// TEMPORARY?
comBat.plot(ee.getId() + "." + FileTools.cleanForFileName(ee.getShortName()));
/*
* Postprocess. Results is a raw matrix/
*/
DoubleMatrix<CompositeSequence, BioMaterial> correctedDataMatrix = new DenseDoubleMatrix<>(results.toArray());
correctedDataMatrix.setRowNames(matrix.getRowNames());
correctedDataMatrix.setColumnNames(matrix.getColNames());
if (transformed) {
MatrixStats.unLogTransform(correctedDataMatrix);
}
ExpressionDataDoubleMatrix correctedExpressionDataMatrix = new ExpressionDataDoubleMatrix(originalDataMatrix, correctedDataMatrix);
assert correctedExpressionDataMatrix.getQuantitationTypes().size() == 1;
/*
* It is easier if we make a new quantitationtype.
*/
QuantitationType oldQt = correctedExpressionDataMatrix.getQuantitationTypes().iterator().next();
QuantitationType newQt = this.makeNewQuantitationType(oldQt);
correctedExpressionDataMatrix.getQuantitationTypes().clear();
correctedExpressionDataMatrix.getQuantitationTypes().add(newQt);
// Sanity check...
for (int i = 0; i < correctedExpressionDataMatrix.columns(); i++) {
assert correctedExpressionDataMatrix.getBioMaterialForColumn(i).equals(originalDataMatrix.getBioMaterialForColumn(i));
}
return correctedExpressionDataMatrix;
}
Aggregations