Search in sources :

Example 21 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class LinearModelAnalyzer method getProbeToGeneMap.

/**
 * Needed to compute the number of genes tested/detected.
 */
private Map<CompositeSequence, Collection<Gene>> getProbeToGeneMap(Map<String, ? extends Collection<DifferentialExpressionAnalysisResult>> resultLists) {
    Map<CompositeSequence, Collection<Gene>> result = new HashMap<>();
    for (Collection<DifferentialExpressionAnalysisResult> resultList : resultLists.values()) {
        for (DifferentialExpressionAnalysisResult d : resultList) {
            CompositeSequence probe = d.getProbe();
            result.put(probe, new HashSet<Gene>());
        }
    }
    // testing environment, etc.
    if (result.isEmpty()) {
        return new HashMap<>();
    }
    return compositeSequenceService.getGenes(result.keySet());
}
Also used : Gene(ubic.gemma.model.genome.Gene) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 22 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ExpressionDataSVD method equalize.

/**
 * Implements the method described in the SPELL paper, alternative interpretation as related by Q. Morris. Set all
 * components to have equal weight (set all singular values to 1)
 *
 * @return the reconstructed matrix; values that were missing before are re-masked.
 */
public ExpressionDataDoubleMatrix equalize() {
    DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
    for (int i = 0; i < copy.columns(); i++) {
        copy.set(i, i, 1.0);
    }
    double[][] rawU = svd.getU().getRawMatrix();
    double[][] rawS = copy.getRawMatrix();
    double[][] rawV = svd.getV().getRawMatrix();
    DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
    DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
    DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
    Algebra a = new Algebra();
    DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
    reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
    reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
    // re-mask the missing values.
    for (int i = 0; i < reconstructed.rows(); i++) {
        for (int j = 0; j < reconstructed.columns(); j++) {
            if (Double.isNaN(this.missingValueInfo.get(i, j))) {
                reconstructed.set(i, j, Double.NaN);
            }
        }
    }
    return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DenseDoubleMatrix2D(cern.colt.matrix.impl.DenseDoubleMatrix2D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 23 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ExpressionDataSVD method winnow.

/**
 * Implements method described in Skillicorn et al., "Strategies for winnowing microarray data" (also section 3.5.5
 * of his book)
 *
 * @param thresholdQuantile Enter 0.5 for median. Value must be &gt; 0 and &lt; 1.
 * @return a filtered matrix
 */
public ExpressionDataDoubleMatrix winnow(double thresholdQuantile) {
    if (thresholdQuantile <= 0 || thresholdQuantile >= 1) {
        throw new IllegalArgumentException("Threshold quantile should be a value between 0 and 1 exclusive");
    }
    class NormCmp implements Comparable<NormCmp> {

        private Double norm;

        private int rowIndex;

        private NormCmp(int rowIndex, Double norm) {
            super();
            this.rowIndex = rowIndex;
            this.norm = norm;
        }

        @Override
        public int compareTo(NormCmp o) {
            return this.norm.compareTo(o.norm);
        }

        public int getRowIndex() {
            return rowIndex;
        }

        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + ((norm == null) ? 0 : norm.hashCode());
            return result;
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj)
                return true;
            if (obj == null)
                return false;
            if (this.getClass() != obj.getClass())
                return false;
            NormCmp other = (NormCmp) obj;
            if (norm == null) {
                return other.norm == null;
            } else
                return norm.equals(other.norm);
        }
    }
    // order rows by distance from the origin. This is proportional to the 1-norm.
    Algebra a = new Algebra();
    List<NormCmp> os = new ArrayList<>();
    for (int i = 0; i < this.expressionData.rows(); i++) {
        double[] row = this.getU().getRow(i);
        DoubleMatrix1D rom = new DenseDoubleMatrix1D(row);
        double norm1 = a.norm1(rom);
        os.add(new NormCmp(i, norm1));
    }
    Collections.sort(os);
    int quantileLimit = (int) Math.floor(this.expressionData.rows() * thresholdQuantile);
    quantileLimit = Math.max(0, quantileLimit);
    List<CompositeSequence> keepers = new ArrayList<>();
    for (int i = 0; i < quantileLimit; i++) {
        NormCmp x = os.get(i);
        CompositeSequence d = this.expressionData.getDesignElementForRow(x.getRowIndex());
        keepers.add(d);
    }
    // remove genes which are near the origin in SVD space. FIXME: make sure the missing values are still masked.
    return new ExpressionDataDoubleMatrix(this.expressionData, keepers);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleArrayList(cern.colt.list.DoubleArrayList) ArrayList(java.util.ArrayList) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) Algebra(cern.colt.matrix.linalg.Algebra) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D) DenseDoubleMatrix1D(cern.colt.matrix.impl.DenseDoubleMatrix1D)

Example 24 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class DatabaseViewGeneratorImpl method formatDiffExResult.

private String formatDiffExResult(ExpressionExperiment ee, DifferentialExpressionAnalysisResult probeAnalysisResult, String factorName, String factorURI, String baselineDescription) {
    CompositeSequence cs = probeAnalysisResult.getProbe();
    Collection<Gene> genes = compositeSequenceService.getGenes(cs);
    if (genes.isEmpty() || genes.size() > 1) {
        return null;
    }
    Gene g = genes.iterator().next();
    if (g.getNcbiGeneId() == null)
        return null;
    Collection<ContrastResult> contrasts = probeAnalysisResult.getContrasts();
    StringBuilder buf = new StringBuilder();
    for (ContrastResult cr : contrasts) {
        FactorValue factorValue = cr.getFactorValue();
        String direction = cr.getLogFoldChange() < 0 ? "-" : "+";
        String factorValueDescription = ExperimentalDesignUtils.prettyString(factorValue);
        buf.append(String.format("%d\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n", ee.getId(), ee.getShortName(), g.getNcbiGeneId().toString(), g.getId(), factorName, factorURI, baselineDescription, factorValueDescription, direction));
    }
    return buf.toString();
}
Also used : FactorValue(ubic.gemma.model.expression.experiment.FactorValue) Gene(ubic.gemma.model.genome.Gene) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ContrastResult(ubic.gemma.model.analysis.expression.diff.ContrastResult)

Example 25 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ArrayDesignAnnotationServiceImpl method populateProbeNameToIdMap.

private static void populateProbeNameToIdMap(ArrayDesign arrayDesign, Map<Long, Collection<Gene>> results, Map<String, Long> probeNameToId) {
    for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
        results.put(cs.getId(), new HashSet<Gene>());
        if (probeNameToId.containsKey(cs.getName())) {
            ArrayDesignAnnotationServiceImpl.log.warn("Duplicate probe name: " + cs.getName());
        }
        probeNameToId.put(cs.getName(), cs.getId());
    }
}
Also used : Gene(ubic.gemma.model.genome.Gene) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)206 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)43 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)40 Gene (ubic.gemma.model.genome.Gene)32 Test (org.junit.Test)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)19 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)18 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)18 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)18 StopWatch (org.apache.commons.lang3.time.StopWatch)17 HashSet (java.util.HashSet)15 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)15 ArrayList (java.util.ArrayList)14 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)14 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)13 Taxon (ubic.gemma.model.genome.Taxon)12 Collection (java.util.Collection)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11