Search in sources :

Example 46 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixUtil method subtractMatrices.

/**
 * Subtract two matrices. Ideally, they matrices are conformant, but if they are not (as some rows are sometimes
 * missing for some quantitation types), this method attempts to handle it anyway (see below). The rows and columns
 * do not have to be in the same order, but they do have to have the same column keys and row keys (with the
 * exception of missing rows). The result is stored in a. (a - b).
 * If the number of rows are not the same, and/or the rows have different keys in the two matrices, some rows will
 * simply not get subtracted and a warning will be issued.
 *
 * @param a matrix a
 * @param b matrix b
 * @throws IllegalArgumentException if the matrices are not column-conformant.
 */
public static void subtractMatrices(ExpressionDataDoubleMatrix a, ExpressionDataDoubleMatrix b) {
    // checkConformant( a, b );
    if (a.columns() != b.columns())
        throw new IllegalArgumentException("Unequal column counts: " + a.columns() + " != " + b.columns());
    int columns = a.columns();
    for (ExpressionDataMatrixRowElement el : a.getRowElements()) {
        int rowNum = el.getIndex();
        CompositeSequence del = el.getDesignElement();
        if (b.getRow(del) == null) {
            ExpressionDataDoubleMatrixUtil.log.warn("Matrix 'b' is missing a row for " + del + ", it will not be subtracted");
            continue;
        }
        for (int i = 0; i < columns; i++) {
            BioAssay assay = a.getBioAssaysForColumn(i).iterator().next();
            double valA = a.get(del, assay);
            double valB = b.get(del, assay);
            a.set(rowNum, i, valA - valB);
        }
    }
}
Also used : CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 47 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixUtil method filterAndLog2Transform.

/**
 * Log2 transform if necessary, do any required filtering prior to analysis. Count data is converted to log2CPM (but
 * we store log2cpm as the processed data, so that is what would generally be used).
 *
 * @param quantitationType QT
 * @param dmatrix          matrix
 * @return ee data double matrix
 */
public static ExpressionDataDoubleMatrix filterAndLog2Transform(QuantitationType quantitationType, ExpressionDataDoubleMatrix dmatrix) {
    ScaleType scaleType = ExpressionDataDoubleMatrixUtil.findScale(quantitationType, dmatrix.getMatrix());
    if (scaleType.equals(ScaleType.LOG2)) {
        ExpressionDataDoubleMatrixUtil.log.info("Data is already on a log2 scale");
    } else if (scaleType.equals(ScaleType.LN)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from ln to log2 **** ");
        MatrixStats.convertToLog2(dmatrix.getMatrix(), Math.E);
    } else if (scaleType.equals(ScaleType.LOG10)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from log10 to log2 **** ");
        MatrixStats.convertToLog2(dmatrix.getMatrix(), 10);
    } else if (scaleType.equals(ScaleType.LINEAR)) {
        ExpressionDataDoubleMatrixUtil.log.info(" **** LOG TRANSFORMING **** ");
        MatrixStats.logTransform(dmatrix.getMatrix());
    } else if (scaleType.equals(ScaleType.COUNT)) {
        /*
             * Since we store log2cpm this shouldn't be reached any more. We don't do it in place.
             */
        ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from count to log2 counts per million **** ");
        DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
        DoubleMatrix<CompositeSequence, BioMaterial> log2cpm = MatrixStats.convertToLog2Cpm(dmatrix.getMatrix(), librarySize);
        dmatrix = new ExpressionDataDoubleMatrix(dmatrix, log2cpm);
    } else {
        throw new UnknownLogScaleException("Can't figure out what scale the data are on");
    }
    /*
         * We do this second because doing it first causes some kind of subtle problem ... (round off? I could not
         * really track this down).
         *
         * Remove zero-variance rows, but also rows that have lots of equal values even if variance is non-zero. This
         * happens when data is "clipped" (e.g., all values under 10 set to 10).
         */
    int r = dmatrix.rows();
    dmatrix = ExpressionExperimentFilter.zeroVarianceFilter(dmatrix);
    if (dmatrix.rows() < r) {
        ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to low variance");
    }
    r = dmatrix.rows();
    if (dmatrix.columns() > ExpressionDataDoubleMatrixUtil.COLUMNS_LIMIT) {
        dmatrix = ExpressionExperimentFilter.tooFewDistinctValues(dmatrix, ExpressionDataDoubleMatrixUtil.VALUES_LIMIT);
        if (dmatrix.rows() < r) {
            ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to too many identical values");
        }
    }
    return dmatrix;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ScaleType(ubic.gemma.model.common.quantitationtype.ScaleType) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) UnknownLogScaleException(ubic.gemma.core.analysis.preprocess.UnknownLogScaleException)

Example 48 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixUtil method scalarDivideMatrix.

/**
 * Divide all values by the dividend
 *
 * @param matrix   matrix
 * @param dividend dividend
 * @throws IllegalArgumentException if dividend == 0.
 */
public static void scalarDivideMatrix(ExpressionDataDoubleMatrix matrix, double dividend) {
    if (dividend == 0)
        throw new IllegalArgumentException("Can't divide by zero");
    int columns = matrix.columns();
    for (ExpressionDataMatrixRowElement el : matrix.getRowElements()) {
        CompositeSequence del = el.getDesignElement();
        for (int i = 0; i < columns; i++) {
            BioAssay bm = matrix.getBioAssaysForColumn(i).iterator().next();
            double valA = matrix.get(del, bm);
            matrix.set(del, bm, valA / dividend);
        }
    }
}
Also used : CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 49 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class GenericGenelistDesignGenerator method getExistingGeneMap.

/**
 * For gene symbols.
 */
private Map<Gene, CompositeSequence> getExistingGeneMap(ArrayDesign arrayDesign) {
    Map<Gene, CompositeSequence> existingElements = new HashMap<>();
    if (arrayDesign.getCompositeSequences().isEmpty())
        return existingElements;
    AbstractCLI.log.info("Loading genes for existing platform ...");
    Map<CompositeSequence, Collection<Gene>> geneMap = compositeSequenceService.getGenes(arrayDesign.getCompositeSequences());
    AbstractCLI.log.info("Platform has genes already for " + geneMap.size() + "/" + arrayDesign.getCompositeSequences().size() + " elements.");
    for (CompositeSequence cs : geneMap.keySet()) {
        Collection<Gene> genes = geneMap.get(cs);
        /*
             * Two genes with the same symbol, but might be a mistake from an earlier run.
             */
        Gene g = null;
        if (genes.size() > 1) {
            AbstractCLI.log.warn("More than one gene for: " + cs + ": " + StringUtils.join(genes, ";"));
            for (Gene cg : genes) {
                if (cg.getOfficialSymbol().equals(cs.getName())) {
                    g = cg;
                }
            }
        } else {
            g = genes.iterator().next();
        }
        existingElements.put(g, cs);
    }
    return existingElements;
}
Also used : Gene(ubic.gemma.model.genome.Gene) HashMap(java.util.HashMap) Collection(java.util.Collection) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 50 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class BaseExpressionDataMatrix method selectVectors.

/**
 * Selects all the vectors passed in (uses them to initialize the data)
 */
void selectVectors(Collection<? extends DesignElementDataVector> vectors) {
    QuantitationType quantitationType = null;
    int i = 0;
    List<DesignElementDataVector> sorted = this.sortVectorsByDesignElement(vectors);
    for (DesignElementDataVector vector : sorted) {
        if (this.expressionExperiment == null)
            this.expressionExperiment = vector.getExpressionExperiment();
        QuantitationType vectorQuantitationType = vector.getQuantitationType();
        CompositeSequence designElement = vector.getDesignElement();
        this.bioAssayDimensions.put(designElement, vector.getBioAssayDimension());
        if (quantitationType == null) {
            quantitationType = vectorQuantitationType;
            this.getQuantitationTypes().add(vectorQuantitationType);
        } else {
            if (quantitationType != vectorQuantitationType) {
                throw new IllegalArgumentException("Cannot pass vectors from more than one quantitation type");
            }
        }
        this.addToRowMaps(i, designElement);
        i++;
    }
}
Also used : DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)206 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)43 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)40 Gene (ubic.gemma.model.genome.Gene)32 Test (org.junit.Test)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)19 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)18 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)18 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)18 StopWatch (org.apache.commons.lang3.time.StopWatch)17 HashSet (java.util.HashSet)15 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)15 ArrayList (java.util.ArrayList)14 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)14 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)13 Taxon (ubic.gemma.model.genome.Taxon)12 Collection (java.util.Collection)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11