use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixUtil method subtractMatrices.
/**
* Subtract two matrices. Ideally, they matrices are conformant, but if they are not (as some rows are sometimes
* missing for some quantitation types), this method attempts to handle it anyway (see below). The rows and columns
* do not have to be in the same order, but they do have to have the same column keys and row keys (with the
* exception of missing rows). The result is stored in a. (a - b).
* If the number of rows are not the same, and/or the rows have different keys in the two matrices, some rows will
* simply not get subtracted and a warning will be issued.
*
* @param a matrix a
* @param b matrix b
* @throws IllegalArgumentException if the matrices are not column-conformant.
*/
public static void subtractMatrices(ExpressionDataDoubleMatrix a, ExpressionDataDoubleMatrix b) {
// checkConformant( a, b );
if (a.columns() != b.columns())
throw new IllegalArgumentException("Unequal column counts: " + a.columns() + " != " + b.columns());
int columns = a.columns();
for (ExpressionDataMatrixRowElement el : a.getRowElements()) {
int rowNum = el.getIndex();
CompositeSequence del = el.getDesignElement();
if (b.getRow(del) == null) {
ExpressionDataDoubleMatrixUtil.log.warn("Matrix 'b' is missing a row for " + del + ", it will not be subtracted");
continue;
}
for (int i = 0; i < columns; i++) {
BioAssay assay = a.getBioAssaysForColumn(i).iterator().next();
double valA = a.get(del, assay);
double valB = b.get(del, assay);
a.set(rowNum, i, valA - valB);
}
}
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixUtil method filterAndLog2Transform.
/**
* Log2 transform if necessary, do any required filtering prior to analysis. Count data is converted to log2CPM (but
* we store log2cpm as the processed data, so that is what would generally be used).
*
* @param quantitationType QT
* @param dmatrix matrix
* @return ee data double matrix
*/
public static ExpressionDataDoubleMatrix filterAndLog2Transform(QuantitationType quantitationType, ExpressionDataDoubleMatrix dmatrix) {
ScaleType scaleType = ExpressionDataDoubleMatrixUtil.findScale(quantitationType, dmatrix.getMatrix());
if (scaleType.equals(ScaleType.LOG2)) {
ExpressionDataDoubleMatrixUtil.log.info("Data is already on a log2 scale");
} else if (scaleType.equals(ScaleType.LN)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from ln to log2 **** ");
MatrixStats.convertToLog2(dmatrix.getMatrix(), Math.E);
} else if (scaleType.equals(ScaleType.LOG10)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from log10 to log2 **** ");
MatrixStats.convertToLog2(dmatrix.getMatrix(), 10);
} else if (scaleType.equals(ScaleType.LINEAR)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** LOG TRANSFORMING **** ");
MatrixStats.logTransform(dmatrix.getMatrix());
} else if (scaleType.equals(ScaleType.COUNT)) {
/*
* Since we store log2cpm this shouldn't be reached any more. We don't do it in place.
*/
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from count to log2 counts per million **** ");
DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
DoubleMatrix<CompositeSequence, BioMaterial> log2cpm = MatrixStats.convertToLog2Cpm(dmatrix.getMatrix(), librarySize);
dmatrix = new ExpressionDataDoubleMatrix(dmatrix, log2cpm);
} else {
throw new UnknownLogScaleException("Can't figure out what scale the data are on");
}
/*
* We do this second because doing it first causes some kind of subtle problem ... (round off? I could not
* really track this down).
*
* Remove zero-variance rows, but also rows that have lots of equal values even if variance is non-zero. This
* happens when data is "clipped" (e.g., all values under 10 set to 10).
*/
int r = dmatrix.rows();
dmatrix = ExpressionExperimentFilter.zeroVarianceFilter(dmatrix);
if (dmatrix.rows() < r) {
ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to low variance");
}
r = dmatrix.rows();
if (dmatrix.columns() > ExpressionDataDoubleMatrixUtil.COLUMNS_LIMIT) {
dmatrix = ExpressionExperimentFilter.tooFewDistinctValues(dmatrix, ExpressionDataDoubleMatrixUtil.VALUES_LIMIT);
if (dmatrix.rows() < r) {
ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to too many identical values");
}
}
return dmatrix;
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixUtil method scalarDivideMatrix.
/**
* Divide all values by the dividend
*
* @param matrix matrix
* @param dividend dividend
* @throws IllegalArgumentException if dividend == 0.
*/
public static void scalarDivideMatrix(ExpressionDataDoubleMatrix matrix, double dividend) {
if (dividend == 0)
throw new IllegalArgumentException("Can't divide by zero");
int columns = matrix.columns();
for (ExpressionDataMatrixRowElement el : matrix.getRowElements()) {
CompositeSequence del = el.getDesignElement();
for (int i = 0; i < columns; i++) {
BioAssay bm = matrix.getBioAssaysForColumn(i).iterator().next();
double valA = matrix.get(del, bm);
matrix.set(del, bm, valA / dividend);
}
}
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class GenericGenelistDesignGenerator method getExistingGeneMap.
/**
* For gene symbols.
*/
private Map<Gene, CompositeSequence> getExistingGeneMap(ArrayDesign arrayDesign) {
Map<Gene, CompositeSequence> existingElements = new HashMap<>();
if (arrayDesign.getCompositeSequences().isEmpty())
return existingElements;
AbstractCLI.log.info("Loading genes for existing platform ...");
Map<CompositeSequence, Collection<Gene>> geneMap = compositeSequenceService.getGenes(arrayDesign.getCompositeSequences());
AbstractCLI.log.info("Platform has genes already for " + geneMap.size() + "/" + arrayDesign.getCompositeSequences().size() + " elements.");
for (CompositeSequence cs : geneMap.keySet()) {
Collection<Gene> genes = geneMap.get(cs);
/*
* Two genes with the same symbol, but might be a mistake from an earlier run.
*/
Gene g = null;
if (genes.size() > 1) {
AbstractCLI.log.warn("More than one gene for: " + cs + ": " + StringUtils.join(genes, ";"));
for (Gene cg : genes) {
if (cg.getOfficialSymbol().equals(cs.getName())) {
g = cg;
}
}
} else {
g = genes.iterator().next();
}
existingElements.put(g, cs);
}
return existingElements;
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class BaseExpressionDataMatrix method selectVectors.
/**
* Selects all the vectors passed in (uses them to initialize the data)
*/
void selectVectors(Collection<? extends DesignElementDataVector> vectors) {
QuantitationType quantitationType = null;
int i = 0;
List<DesignElementDataVector> sorted = this.sortVectorsByDesignElement(vectors);
for (DesignElementDataVector vector : sorted) {
if (this.expressionExperiment == null)
this.expressionExperiment = vector.getExpressionExperiment();
QuantitationType vectorQuantitationType = vector.getQuantitationType();
CompositeSequence designElement = vector.getDesignElement();
this.bioAssayDimensions.put(designElement, vector.getBioAssayDimension());
if (quantitationType == null) {
quantitationType = vectorQuantitationType;
this.getQuantitationTypes().add(vectorQuantitationType);
} else {
if (quantitationType != vectorQuantitationType) {
throw new IllegalArgumentException("Cannot pass vectors from more than one quantitation type");
}
}
this.addToRowMaps(i, designElement);
i++;
}
}
Aggregations