Search in sources :

Example 96 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ExpressionDataStringMatrix method createMatrix.

private StringMatrix<Integer, Integer> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
    int numRows = this.rowDesignElementMapByInteger.keySet().size();
    StringMatrix<Integer, Integer> mat = new StringMatrix<>(numRows, maxSize);
    for (int j = 0; j < mat.columns(); j++) {
        mat.addColumnName(j);
    }
    // initialize the matrix to "";
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            mat.set(i, j, "");
        }
    }
    ByteArrayConverter bac = new ByteArrayConverter();
    for (DesignElementDataVector vector : vectors) {
        CompositeSequence designElement = vector.getDesignElement();
        assert designElement != null : "No designelement for " + vector;
        Integer rowIndex = this.rowElementMap.get(designElement);
        assert rowIndex != null;
        mat.addRowName(rowIndex);
        byte[] bytes = vector.getData();
        String[] vals = bac.byteArrayToStrings(bytes);
        BioAssayDimension dimension = vector.getBioAssayDimension();
        Collection<BioAssay> bioAssays = dimension.getBioAssays();
        assert bioAssays.size() == vals.length : "Expected " + vals.length + " got " + bioAssays.size();
        Iterator<BioAssay> it = bioAssays.iterator();
        for (int j = 0; j < bioAssays.size(); j++) {
            BioAssay bioAssay = it.next();
            Integer column = this.columnAssayMap.get(bioAssay);
            assert column != null;
            mat.setByKeys(rowIndex, column, vals[j]);
        }
    }
    ExpressionDataStringMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
    return mat;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) StringMatrix(ubic.basecode.dataStructure.matrix.StringMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 97 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class MatrixWriter method writeWithStringifiedGeneAnnotations.

/**
 * @param geneAnnotations Map of composite sequences to an array of delimited strings: [probe name,genes symbol,
 *                        gene Name] -- these include the "|" to indicate multiple genes, and originate in the platform annotation
 *                        files.
 * @param writeHeader     the writer header
 * @param matrix          the matrix
 * @param orderByDesign   if true, the columns are in the order defined by
 *                        ExpressionDataMatrixColumnSort.orderByExperimentalDesign
 * @param writeGeneInfo   whether to write gene info
 * @param writer          the writer to use
 * @param writeSequence   whether to write sequence
 * @throws IOException when the write failed
 */
// Possible external use
@SuppressWarnings({ "unused", "WeakerAccess" })
public void writeWithStringifiedGeneAnnotations(Writer writer, ExpressionDataMatrix<?> matrix, Map<CompositeSequence, String[]> geneAnnotations, boolean writeHeader, boolean writeSequence, boolean writeGeneInfo, boolean orderByDesign) throws IOException {
    int rows = matrix.rows();
    List<BioMaterial> orderedBioMaterials = this.getBioMaterialsInRequestedOrder(matrix, orderByDesign);
    StringBuffer buf = new StringBuffer();
    if (writeHeader) {
        this.writeHeader(orderedBioMaterials, matrix, geneAnnotations, writeSequence, writeGeneInfo, buf);
    }
    for (int j = 0; j < rows; j++) {
        CompositeSequence probeForRow = matrix.getDesignElementForRow(j);
        buf.append(probeForRow.getName()).append("\t");
        this.writeSequence(writeSequence, buf, probeForRow);
        if (writeGeneInfo) {
            this.addGeneInfoFromStrings(buf, probeForRow, geneAnnotations);
        }
        int orderedBioMLastIndex = orderedBioMaterials.size() - 1;
        for (BioMaterial bioMaterial : orderedBioMaterials) {
            int i = matrix.getColumnIndex(bioMaterial);
            Object val = matrix.get(j, i);
            // Don't want line to contain a trailing unnecessary tab
            if (orderedBioMaterials.indexOf(bioMaterial) == orderedBioMLastIndex) {
                buf.append(val);
            } else {
                buf.append(val).append("\t");
            }
        }
        buf.append("\n");
    }
    writer.write(buf.toString());
    writer.flush();
    Log.debug("Done writing");
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 98 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class MatrixWriter method write.

/**
 * @param orderByDesign   if true, the columns are in the order defined by
 *                        ExpressionDataMatrixColumnSort.orderByExperimentalDesign
 * @param writeSequence   whether to write sequence
 * @param writer          the writer to use
 * @param writeGeneInfo   whether to write gene info
 * @param matrix          the matrix
 * @param writeHeader     the writer header
 * @param geneAnnotations Map of composite sequences to an array of delimited strings: [probe name,genes symbol,
 *                        gene Name] -- these include the "|" to indicate multiple genes, and originate in the platform annotation
 *                        files.
 * @throws IOException when the write failed
 */
// Possible external use
@SuppressWarnings({ "unused", "WeakerAccess" })
public void write(Writer writer, ExpressionDataMatrix<?> matrix, Map<CompositeSequence, Collection<Gene>> geneAnnotations, boolean writeHeader, boolean writeSequence, boolean writeGeneInfo, boolean orderByDesign) throws IOException {
    int rows = matrix.rows();
    List<BioMaterial> bioMaterials = this.getBioMaterialsInRequestedOrder(matrix, orderByDesign);
    StringBuffer buf = new StringBuffer();
    if (writeHeader) {
        this.writeHeader(bioMaterials, matrix, geneAnnotations, writeSequence, writeGeneInfo, buf);
    }
    for (int j = 0; j < rows; j++) {
        CompositeSequence probeForRow = matrix.getDesignElementForRow(j);
        buf.append(probeForRow.getName()).append("\t");
        this.writeSequence(writeSequence, buf, probeForRow);
        if (writeGeneInfo) {
            this.addGeneInfo(buf, probeForRow, geneAnnotations);
        }
        // print the data.
        for (BioMaterial bioMaterial : bioMaterials) {
            buf.append("\t");
            int i = matrix.getColumnIndex(bioMaterial);
            Object val = matrix.get(j, i);
            if (val == null || (val instanceof Double && Double.isNaN((Double) val))) {
                buf.append("");
            } else if (val instanceof Double) {
                buf.append(String.format("%.3g", (Double) val));
            } else {
                buf.append(val);
            }
        }
        buf.append("\n");
    }
    writer.write(buf.toString());
    writer.flush();
    Log.debug("Done writing");
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 99 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class DataUpdater method log2cpmFromCounts.

/**
 * For back filling log2cpm when only counts are available. This wouldn't be used routinely, because new experiments
 * get log2cpm computed when loaded.
 *
 * @param ee ee
 * @param qt qt
 */
public void log2cpmFromCounts(ExpressionExperiment ee, QuantitationType qt) {
    ee = experimentService.thawLite(ee);
    /*
         * Get the count data; Make sure it is currently preferred (so we don't do this twice by accident)
         * We need to do this from the Raw data, not the data that has been normalized etc.
         */
    Collection<RawExpressionDataVector> counts = rawExpressionDataVectorService.find(qt);
    ExpressionDataDoubleMatrix countMatrix = new ExpressionDataDoubleMatrix(counts);
    try {
        /*
             * Get the count data quantitation type and make it non-preferred
             */
        qt.setIsPreferred(false);
        qtService.update(qt);
        // so updated QT is attached.
        ee = experimentService.thawLite(ee);
        QuantitationType log2cpmQt = this.makelog2cpmQt();
        DoubleMatrix1D librarySize = MatrixStats.colSums(countMatrix.getMatrix());
        DoubleMatrix<CompositeSequence, BioMaterial> log2cpmMatrix = MatrixStats.convertToLog2Cpm(countMatrix.getMatrix(), librarySize);
        ExpressionDataDoubleMatrix log2cpmEEMatrix = new ExpressionDataDoubleMatrix(ee, log2cpmQt, log2cpmMatrix);
        assert log2cpmEEMatrix.getQuantitationTypes().iterator().next().getIsPreferred();
        Collection<ArrayDesign> platforms = experimentService.getArrayDesignsUsed(ee);
        if (platforms.size() > 1)
            throw new IllegalArgumentException("Cannot apply to multiplatform data sets");
        this.addData(ee, platforms.iterator().next(), log2cpmEEMatrix);
    } catch (Exception e) {
        DataUpdater.log.error(e, e);
        // try to recover.
        qt.setIsPreferred(true);
        qtService.update(qt);
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ConfigurationException(org.apache.commons.configuration.ConfigurationException) PreprocessingException(ubic.gemma.core.analysis.preprocess.PreprocessingException) IOException(java.io.IOException)

Example 100 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class DifferentialExpressionResultDaoImpl method thaw.

@Override
public void thaw(final DifferentialExpressionAnalysisResult result) {
    Session session = this.getSessionFactory().getCurrentSession();
    session.buildLockRequest(LockOptions.NONE).lock(result);
    Hibernate.initialize(result);
    CompositeSequence cs = result.getProbe();
    Hibernate.initialize(cs);
    Collection<ContrastResult> contrasts = result.getContrasts();
    for (ContrastResult contrast : contrasts) {
        FactorValue f = contrast.getFactorValue();
        Hibernate.initialize(f);
        // noinspection ResultOfMethodCallIgnored
        f.getIsBaseline();
    }
}
Also used : CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)206 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)43 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)40 Gene (ubic.gemma.model.genome.Gene)32 Test (org.junit.Test)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)19 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)18 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)18 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)18 StopWatch (org.apache.commons.lang3.time.StopWatch)17 HashSet (java.util.HashSet)15 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)15 ArrayList (java.util.ArrayList)14 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)14 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)13 Taxon (ubic.gemma.model.genome.Taxon)12 Collection (java.util.Collection)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11