Search in sources :

Example 81 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class DifferentialExpressionAnalyzerServiceImpl method extendResultSet.

private void extendResultSet(ExpressionAnalysisResultSet oldrs, ExpressionAnalysisResultSet temprs) {
    assert oldrs.getId() != null;
    /*
         * Copy the results over.
         */
    Map<CompositeSequence, DifferentialExpressionAnalysisResult> p2der = new HashMap<>();
    for (DifferentialExpressionAnalysisResult der : oldrs.getResults()) {
        p2der.put(der.getProbe(), der);
    }
    Collection<DifferentialExpressionAnalysisResult> toAdd = new ArrayList<>();
    for (DifferentialExpressionAnalysisResult newr : temprs.getResults()) {
        if (!p2der.containsKey(newr.getProbe())) {
            toAdd.add(newr);
        }
        newr.setResultSet(oldrs);
    }
    if (toAdd.isEmpty()) {
        DifferentialExpressionAnalyzerServiceImpl.log.warn("Somewhat surprisingly, no new results were added");
    } else {
        DifferentialExpressionAnalyzerServiceImpl.log.info(toAdd.size() + " transient results added to the old analysis result set: " + oldrs.getId());
    }
    boolean added = oldrs.getResults().addAll(toAdd);
    assert added;
    assert oldrs.getResults().size() >= toAdd.size();
}
Also used : DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 82 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class QuantitationTypeData method isTwoColor.

private boolean isTwoColor() {
    for (DesignElementDataVector v : vectors) {
        CompositeSequence d = v.getDesignElement();
        TechnologyType technologyType = d.getArrayDesign().getTechnologyType();
        if (technologyType.equals(TechnologyType.ONECOLOR) || technologyType.equals(TechnologyType.NONE)) {
            continue;
        }
        QuantitationType qt = v.getQuantitationType();
        if ((qt.getIsPreferred() || qt.getIsMaskedPreferred()) && qt.getIsRatio()) {
            return true;
        }
    }
    return false;
}
Also used : TechnologyType(ubic.gemma.model.expression.arrayDesign.TechnologyType) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) StandardQuantitationType(ubic.gemma.model.common.quantitationtype.StandardQuantitationType) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 83 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class LinkAnalysis method getGenesTested.

/**
 * Gene the genes that were tested, according to the rows that are currently in the dataMatrix (so call this after
 * filtering!)
 *
 * @return set
 */
public Set<Gene> getGenesTested() {
    Set<Gene> genes = new HashSet<>();
    for (CompositeSequence cs : dataMatrix.getRowNames()) {
        Set<Gene> geneClusters = this.probeToGeneMap.get(cs);
        if (geneClusters == null) {
            if (numWarnings <= LinkAnalysis.MAX_WARNINGS) {
                LinkAnalysis.log.warn("No genes for: " + cs);
                numWarnings++;
                if (numWarnings > LinkAnalysis.MAX_WARNINGS) {
                    LinkAnalysis.log.warn("Further warnings will be suppressed");
                }
            }
            continue;
        }
        genes.addAll(geneClusters);
    }
    return genes;
}
Also used : Gene(ubic.gemma.model.genome.Gene) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 84 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class LinkAnalysisServiceImpl method writeLinks.

/**
 * Write links as text.
 */
private void writeLinks(final LinkAnalysis la, FilterConfig filterConfig, Writer wr) throws IOException {
    Map<CompositeSequence, Set<Gene>> probeToGeneMap = la.getProbeToGeneMap();
    ObjectArrayList links = la.getKeep();
    double subsetSize = la.getConfig().getSubsetSize();
    List<String> buf = new ArrayList<>();
    if (la.getConfig().isSubset() && links.size() > subsetSize) {
        la.getConfig().setSubsetUsed(true);
    }
    wr.write(la.getConfig().toString());
    wr.write(filterConfig.toString());
    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(4);
    Integer probeDegreeThreshold = la.getConfig().getProbeDegreeThreshold();
    int i = 0;
    int keptLinksCount = 0;
    Random generator = new Random();
    double rand;
    double fraction = subsetSize / links.size();
    int skippedDueToDegree = 0;
    for (int n = links.size(); i < n; i++) {
        Object val = links.getQuick(i);
        if (val == null)
            continue;
        Link m = (Link) val;
        Double w = m.getWeight();
        int x = m.getx();
        int y = m.gety();
        if (probeDegreeThreshold > 0 && (la.getProbeDegree(x) > probeDegreeThreshold || la.getProbeDegree(y) > probeDegreeThreshold)) {
            skippedDueToDegree++;
            continue;
        }
        CompositeSequence p1 = la.getProbe(x);
        CompositeSequence p2 = la.getProbe(y);
        Set<Gene> g1 = probeToGeneMap.get(p1);
        Set<Gene> g2 = probeToGeneMap.get(p2);
        List<String> genes1 = new ArrayList<>();
        for (Gene cluster : g1) {
            String t = cluster.getOfficialSymbol();
            genes1.add(t);
        }
        List<String> genes2 = new ArrayList<>();
        for (Gene cluster : g2) {
            String t = cluster.getOfficialSymbol();
            genes2.add(t);
        }
        if (genes2.size() == 0 || genes1.size() == 0) {
            continue;
        }
        String gene1String = StringUtils.join(genes1.iterator(), "|");
        String gene2String = StringUtils.join(genes2.iterator(), "|");
        if (gene1String.equals(gene2String)) {
            continue;
        }
        if (++keptLinksCount % 50000 == 0) {
            LinkAnalysisServiceImpl.log.info(keptLinksCount + " links retained");
        }
        if (la.getConfig().isSubsetUsed()) {
            rand = generator.nextDouble();
            if (rand > fraction)
                continue;
        }
        buf.add(p1.getId() + "\t" + p2.getId() + "\t" + gene1String + "\t" + gene2String + "\t" + nf.format(w) + // save links
        "\n");
    // wr.write( p1.getId() + "\t" + p2.getId() + "\t" + gene1String + "\t" + gene2String + "\t" + nf.format( w
    // ) + "\n" );
    }
    wr.write("# totalLinks:" + keptLinksCount + "\n");
    wr.write("# printedLinks:" + buf.size() + "\n");
    wr.write("# skippedDueToHighNodeDegree:" + skippedDueToDegree + "\n");
    for (String line : buf) {
        // write links to file
        wr.write(line);
    }
    if (la.getConfig().isSubsetUsed()) {
        // subset option activated
        LinkAnalysisServiceImpl.log.info("Done, " + keptLinksCount + "/" + links.size() + " links kept, " + buf.size() + " links printed");
    // wr.write("# Amount of links before subsetting/after subsetting: " + links.size() + "/" + numPrinted +
    // "\n" );
    } else {
        LinkAnalysisServiceImpl.log.info("Done, " + keptLinksCount + "/" + links.size() + " links printed (some may have been filtered)");
    }
    wr.flush();
}
Also used : ObjectArrayList(cern.colt.list.ObjectArrayList) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ObjectArrayList(cern.colt.list.ObjectArrayList) Gene(ubic.gemma.model.genome.Gene) Link(ubic.basecode.dataStructure.Link) NumberFormat(java.text.NumberFormat)

Example 85 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class LinkAnalysisServiceImpl method getProbe2GeneMap.

/**
 * Fills in the probe2gene map for the linkAnalysis. Note that the collection DOES NOT contain probes that have NO
 * genes mapped
 *
 * @param eeDoubleMatrix - used to make sure we don't use probes from vectors that are removed?
 */
private void getProbe2GeneMap(LinkAnalysis la, Collection<ProcessedExpressionDataVector> dataVectors, ExpressionDataDoubleMatrix eeDoubleMatrix) {
    Collection<CompositeSequence> probesForVectors = new HashSet<>();
    for (DesignElementDataVector v : dataVectors) {
        CompositeSequence cs = v.getDesignElement();
        if (eeDoubleMatrix.getRow(cs) != null)
            probesForVectors.add(cs);
    }
    Map<CompositeSequence, Collection<BioSequence2GeneProduct>> specificityData = csService.getGenesWithSpecificity(probesForVectors);
    assert !specificityData.isEmpty();
    /*
         * Convert the specificity
         */
    Map<CompositeSequence, Set<Gene>> probeToGeneMap = new HashMap<>();
    for (CompositeSequence cs : specificityData.keySet()) {
        Collection<BioSequence2GeneProduct> bioSequenceToGeneProducts = specificityData.get(cs);
        if (!probeToGeneMap.containsKey(cs)) {
            probeToGeneMap.put(cs, new HashSet<Gene>());
        }
        for (BioSequence2GeneProduct bioSequence2GeneProduct : bioSequenceToGeneProducts) {
            Gene gene = bioSequence2GeneProduct.getGeneProduct().getGene();
            probeToGeneMap.get(cs).add(gene);
        }
    }
    /*
         * Remove the probes that have no mapping
         */
    int startingSize = probeToGeneMap.size();
    int numRemoved = 0;
    for (Iterator<CompositeSequence> it = probeToGeneMap.keySet().iterator(); it.hasNext(); ) {
        CompositeSequence cs = it.next();
        if (probeToGeneMap.get(cs).isEmpty()) {
            it.remove();
            numRemoved++;
        }
    }
    if (numRemoved > 0) {
        LinkAnalysisServiceImpl.log.info(numRemoved + "/" + startingSize + " elements had no genes mapped and were removed.");
    }
    // assert !probeToGeneMap.isEmpty();
    if (probeToGeneMap.isEmpty()) {
        throw new IllegalStateException("No probes are mapped to genes; example=" + probeToGeneMap.keySet().iterator().next());
    }
    la.setProbeToGeneMap(probeToGeneMap);
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) Gene(ubic.gemma.model.genome.Gene) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)

Aggregations

CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)206 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)43 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)40 Gene (ubic.gemma.model.genome.Gene)32 Test (org.junit.Test)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)19 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)18 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)18 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)18 StopWatch (org.apache.commons.lang3.time.StopWatch)17 HashSet (java.util.HashSet)15 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)15 ArrayList (java.util.ArrayList)14 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)14 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)13 Taxon (ubic.gemma.model.genome.Taxon)12 Collection (java.util.Collection)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11