Search in sources :

Example 1 with Link

use of ubic.basecode.dataStructure.Link in project Gemma by PavlidisLab.

the class LinkAnalysisServiceImpl method writeLinks.

/**
 * Write links as text.
 */
private void writeLinks(final LinkAnalysis la, FilterConfig filterConfig, Writer wr) throws IOException {
    Map<CompositeSequence, Set<Gene>> probeToGeneMap = la.getProbeToGeneMap();
    ObjectArrayList links = la.getKeep();
    double subsetSize = la.getConfig().getSubsetSize();
    List<String> buf = new ArrayList<>();
    if (la.getConfig().isSubset() && links.size() > subsetSize) {
        la.getConfig().setSubsetUsed(true);
    }
    wr.write(la.getConfig().toString());
    wr.write(filterConfig.toString());
    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(4);
    Integer probeDegreeThreshold = la.getConfig().getProbeDegreeThreshold();
    int i = 0;
    int keptLinksCount = 0;
    Random generator = new Random();
    double rand;
    double fraction = subsetSize / links.size();
    int skippedDueToDegree = 0;
    for (int n = links.size(); i < n; i++) {
        Object val = links.getQuick(i);
        if (val == null)
            continue;
        Link m = (Link) val;
        Double w = m.getWeight();
        int x = m.getx();
        int y = m.gety();
        if (probeDegreeThreshold > 0 && (la.getProbeDegree(x) > probeDegreeThreshold || la.getProbeDegree(y) > probeDegreeThreshold)) {
            skippedDueToDegree++;
            continue;
        }
        CompositeSequence p1 = la.getProbe(x);
        CompositeSequence p2 = la.getProbe(y);
        Set<Gene> g1 = probeToGeneMap.get(p1);
        Set<Gene> g2 = probeToGeneMap.get(p2);
        List<String> genes1 = new ArrayList<>();
        for (Gene cluster : g1) {
            String t = cluster.getOfficialSymbol();
            genes1.add(t);
        }
        List<String> genes2 = new ArrayList<>();
        for (Gene cluster : g2) {
            String t = cluster.getOfficialSymbol();
            genes2.add(t);
        }
        if (genes2.size() == 0 || genes1.size() == 0) {
            continue;
        }
        String gene1String = StringUtils.join(genes1.iterator(), "|");
        String gene2String = StringUtils.join(genes2.iterator(), "|");
        if (gene1String.equals(gene2String)) {
            continue;
        }
        if (++keptLinksCount % 50000 == 0) {
            LinkAnalysisServiceImpl.log.info(keptLinksCount + " links retained");
        }
        if (la.getConfig().isSubsetUsed()) {
            rand = generator.nextDouble();
            if (rand > fraction)
                continue;
        }
        buf.add(p1.getId() + "\t" + p2.getId() + "\t" + gene1String + "\t" + gene2String + "\t" + nf.format(w) + // save links
        "\n");
    // wr.write( p1.getId() + "\t" + p2.getId() + "\t" + gene1String + "\t" + gene2String + "\t" + nf.format( w
    // ) + "\n" );
    }
    wr.write("# totalLinks:" + keptLinksCount + "\n");
    wr.write("# printedLinks:" + buf.size() + "\n");
    wr.write("# skippedDueToHighNodeDegree:" + skippedDueToDegree + "\n");
    for (String line : buf) {
        // write links to file
        wr.write(line);
    }
    if (la.getConfig().isSubsetUsed()) {
        // subset option activated
        LinkAnalysisServiceImpl.log.info("Done, " + keptLinksCount + "/" + links.size() + " links kept, " + buf.size() + " links printed");
    // wr.write("# Amount of links before subsetting/after subsetting: " + links.size() + "/" + numPrinted +
    // "\n" );
    } else {
        LinkAnalysisServiceImpl.log.info("Done, " + keptLinksCount + "/" + links.size() + " links printed (some may have been filtered)");
    }
    wr.flush();
}
Also used : ObjectArrayList(cern.colt.list.ObjectArrayList) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ObjectArrayList(cern.colt.list.ObjectArrayList) Gene(ubic.gemma.model.genome.Gene) Link(ubic.basecode.dataStructure.Link) NumberFormat(java.text.NumberFormat)

Example 2 with Link

use of ubic.basecode.dataStructure.Link in project Gemma by PavlidisLab.

the class AbstractMatrixRowPairAnalysis method keepCorrellation.

/**
 * Decide whether to keep the correlation. The correlation must be greater or equal to the set thresholds.
 */
void keepCorrellation(int i, int j, double correl, int numused) {
    if (keepers == null) {
        return;
    }
    if (Double.isNaN(correl))
        return;
    if (omitNegativeCorrelationLinks && correl < 0.0) {
        return;
    }
    double acorrel = Math.abs(correl);
    double c;
    if (useAbsoluteValue) {
        c = acorrel;
    } else {
        c = correl;
    }
    if (upperTailThreshold != 0.0 && c >= upperTailThreshold && (!this.usePvalueThreshold || this.correctedPvalue(i, j, correl, numused) <= this.pValueThreshold)) {
        keepers.add(new Link(i, j, correl));
    } else if (!useAbsoluteValue && lowerTailThreshold != 0.0 && c <= lowerTailThreshold && (!this.usePvalueThreshold || this.correctedPvalue(i, j, correl, numused) <= this.pValueThreshold)) {
        keepers.add(new Link(i, j, correl));
    }
}
Also used : Link(ubic.basecode.dataStructure.Link)

Example 3 with Link

use of ubic.basecode.dataStructure.Link in project Gemma by PavlidisLab.

the class LinkAnalysis method computeProbeDegrees.

/**
 * Populates a map of probe index (in matrix) -> how many links. FIXME do at gene level
 */
private void computeProbeDegrees() {
    this.probeDegreeMap = new HashMap<>();
    for (Integer i = 0; i < metricMatrix.size(); i++) {
        probeDegreeMap.put(i, 0);
    }
    for (int i = 0; i < keep.size(); i++) {
        Link l = (Link) keep.get(i);
        Integer x = l.getx();
        Integer y = l.gety();
        probeDegreeMap.put(x, probeDegreeMap.get(x) + 1);
        probeDegreeMap.put(y, probeDegreeMap.get(y) + 1);
    }
}
Also used : Link(ubic.basecode.dataStructure.Link)

Example 4 with Link

use of ubic.basecode.dataStructure.Link in project Gemma by PavlidisLab.

the class LinkAnalysisPersisterImpl method saveLinks.

/**
 * @return how many links were saved
 */
private int saveLinks(LinkAnalysis la, ObjectArrayList links) {
    LinkCreator c = this.getLinkCreator(la);
    int selfLinksSkipped = 0;
    int duplicateLinksSkipped = 0;
    Set<Gene> genesWithLinks = new HashSet<>();
    Set<NonPersistentNonOrderedCoexpLink> linksForDb = new HashSet<>();
    for (int i = 0, n = links.size(); i < n; i++) {
        Object val = links.getQuick(i);
        if (val == null)
            continue;
        Link m = (Link) val;
        Double w = m.getWeight();
        int x = m.getx();
        int y = m.gety();
        CompositeSequence p1 = la.getProbe(x);
        CompositeSequence p2 = la.getProbe(y);
        /*
             * we have to deal with all the possible genes pairs, if probes map to more than one pair. A single pair of
             * probes could result in more than one link. This assumes that preprocessing of the data allowed retention
             * of probes that map to more than one gene.
             */
        for (Gene g1 : la.getProbeToGeneMap().get(p1)) {
            boolean g1HasLinks = false;
            for (Gene g2 : la.getProbeToGeneMap().get(p2)) {
                if (g1.equals(g2)) {
                    selfLinksSkipped++;
                    continue;
                }
                NonPersistentNonOrderedCoexpLink link = new NonPersistentNonOrderedCoexpLink(this.initCoexp(w, c, g1, g2));
                if (linksForDb.contains(link)) {
                    /*
                         * This happens if there is more than one probe retained for a gene (or both genes) and the
                         * coexpression shows up more than once (different pair of probes, same genes).
                         */
                    if (LinkAnalysisPersisterImpl.log.isDebugEnabled())
                        LinkAnalysisPersisterImpl.log.debug("Skipping duplicate: " + link);
                    duplicateLinksSkipped++;
                    continue;
                /*
                         * FIXME what do we do when a pair of genes is both positively and negatively correlated in the
                         * same experiment? Currently they are both kept, but if we go to a completely gene-based
                         * analysis we wouldn't do that, so it's an inconsistency;
                         */
                }
                if (LinkAnalysisPersisterImpl.log.isDebugEnabled()) {
                    LinkAnalysisPersisterImpl.log.debug("Adding : " + link);
                }
                linksForDb.add(link);
                g1HasLinks = true;
                genesWithLinks.add(g2);
            }
            if (g1HasLinks)
                genesWithLinks.add(g1);
        }
        if (i > 0 && i % 200000 == 0) {
            LinkAnalysisPersisterImpl.log.info(i + " links checked");
        }
    }
    if (selfLinksSkipped > 0) {
        LinkAnalysisPersisterImpl.log.info(selfLinksSkipped + " self-links skipped");
    }
    if (duplicateLinksSkipped > 0) {
        LinkAnalysisPersisterImpl.log.info(duplicateLinksSkipped + " duplicate links skipped (likely cause: more than one probe supporting the same link)");
    }
    if (linksForDb.isEmpty()) {
        throw new RuntimeException("No links left!");
    }
    LinkAnalysisPersisterImpl.log.info(linksForDb.size() + " links ready for saving to db");
    if (!la.getGenesTested().containsAll(genesWithLinks))
        throw new AssertionError();
    /*
         * Do the actual database writing. It's a good idea to do this part in one (big) transaction. Note that even if
         * there are no links, we still update the "genes tested" information.
         */
    this.gene2GeneCoexpressionService.createOrUpdate(la.getExpressionExperiment(), new ArrayList<>(linksForDb), c, la.getGenesTested());
    /*
         * Update the meta-data about the analysis
         */
    CoexpressionAnalysis analysisObj = la.getAnalysisObj();
    assert analysisObj.getId() != null;
    analysisObj.setNumberOfElementsAnalyzed(la.getGenesTested().size());
    analysisObj.setNumberOfLinks(linksForDb.size());
    coexpressionAnalysisService.update(analysisObj);
    return linksForDb.size();
/*
         * Updating node degree cannot be done here, since we need to know the support. We have to do that
         * "periodically" if we want it available in summary form.
         */
}
Also used : LinkCreator(ubic.gemma.persistence.service.association.coexpression.LinkCreator) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) CoexpressionAnalysis(ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis) Gene(ubic.gemma.model.genome.Gene) NonPersistentNonOrderedCoexpLink(ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink) NonPersistentNonOrderedCoexpLink(ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink) Link(ubic.basecode.dataStructure.Link)

Aggregations

Link (ubic.basecode.dataStructure.Link)4 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)2 Gene (ubic.gemma.model.genome.Gene)2 ObjectArrayList (cern.colt.list.ObjectArrayList)1 NumberFormat (java.text.NumberFormat)1 CoexpressionAnalysis (ubic.gemma.model.analysis.expression.coexpression.CoexpressionAnalysis)1 LinkCreator (ubic.gemma.persistence.service.association.coexpression.LinkCreator)1 NonPersistentNonOrderedCoexpLink (ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink)1