Search in sources :

Example 6 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class DataGroup method getHiCReadsForChromosome.

public HiCHitCollection getHiCReadsForChromosome(Chromosome c) {
    HiCHitCollection collection = new HiCHitCollection(c.name());
    for (int i = 0; i < dataSets.length; i++) {
        if (dataSets[i] instanceof HiCDataStore) {
            HiCHitCollection thisCollection = ((HiCDataStore) dataSets[i]).getHiCReadsForChromosome(c);
            collection.addCollection(thisCollection);
        }
    }
    // TODO: Do an incremental add to this collection so that we don't have
    // to re-sort it each time.  This is nastily inefficient
    collection.sortCollection();
    return collection;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)

Example 7 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class HeatmapMatrix method run.

public void run() {
    // First calculate chromosome offsets so we can access the
    // relevant probes more quickly when we come to adding other
    // end data.
    Hashtable<String, Integer> offsets = new Hashtable<String, Integer>();
    offsets.put(probes[0].probe.chromosome().name(), 0);
    for (int i = 1; i < probes.length; i++) {
        if (probes[i].probe.chromosome() != probes[i - 1].probe.chromosome()) {
            offsets.put(probes[i].probe.chromosome().name(), i);
        }
    }
    // We also need an initial list of total counts for all of our probes
    // so we can do the O/E calculations.  We can incorporate into this
    // the ability to correct for linkage.
    int[] totalCisCounts = new int[probes.length];
    int[] totalTransCounts = new int[probes.length];
    getTotalCounts(totalCisCounts, totalTransCounts);
    if (cancel) {
        Enumeration<ProgressListener> en2 = listeners.elements();
        while (en2.hasMoreElements()) {
            en2.nextElement().progressCancelled();
        }
        return;
    }
    // We'll make up an ever expanding list of interactions which we
    // care about
    Vector<InteractionProbePair> filteredInteractions = new Vector<InteractionProbePair>();
    // We'll need to correct for the number of tests we perform, but we're also able
    // to skip a load of tests based on the initial filters we supplied (non-statistical ones)
    // so we're going to keep a record of how many valid tests we actually need to correct for.
    // 
    // The worse case scenario is that we do every possible comparison (but only one way round).
    // After some testing this proved to be a bad idea.  We skipped so many tests where the
    // interaction wasn't observed that we ended up hugely under-correcting our final p-values
    // and making a load of false positive predictions.  We can do this more correctly later,
    // but for now we're either going to correct for every possible test, or for just every
    // possible cis test if we're excluding trans hits.
    long numberOfTestsPerformed = 0;
    if (initialMaxDistance > 0) {
        // We're just counting cis interactions
        int currentChrCount = 1;
        Chromosome currentChr = probes[0].probe.chromosome();
        for (int p = 1; p < probes.length; p++) {
            if (probes[p].probe.chromosome() == currentChr) {
                ++currentChrCount;
            } else {
                numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
                currentChrCount = 1;
                currentChr = probes[p].probe.chromosome();
            }
        }
        numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
    } else {
        numberOfTestsPerformed = (probes.length * ((long) probes.length - 1)) / 2;
    }
    for (int p = 0; p < probes.length; p++) {
        if (p % 100 == 0) {
            Enumeration<ProgressListener> en = listeners.elements();
            while (en.hasMoreElements()) {
                en.nextElement().progressUpdated("Processed " + p + " probes", p, probes.length);
            }
            if (cancel) {
                Enumeration<ProgressListener> en2 = listeners.elements();
                while (en2.hasMoreElements()) {
                    en2.nextElement().progressCancelled();
                }
                return;
            }
        }
        // System.err.println("Getting interactions for "+probes[p].probe);
        // We temporarily store the interactions with this probe which means we
        // can make a decision about which ones we're keeping as we go along which
        // drastically reduces the amount we need to store.
        // This is going to be the data structure which holds the information
        // on the pairs of probes which have any interactions.  The key is the
        // index position of the pair (x+(y*no of probes), and the value is the
        // number of times this interaction was seen
        Hashtable<Long, Integer> interactionCounts = new Hashtable<Long, Integer>();
        HiCHitCollection hiCHits = dataSet.getHiCReadsForProbe(probes[p].probe);
        String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
        // System.err.println("Found hits on "+chromosomeNames.length+" chromosomes");
        CHROM: for (int c = 0; c < chromosomeNames.length; c++) {
            // Skip all trans reads if there is a max distance set.
            if (initialMaxDistance > 0) {
                if (!probes[p].probe.chromosome().name().equals(chromosomeNames[c])) {
                    continue;
                }
            }
            long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
            SequenceRead.sort(hitReads);
            // We're going to start when we hit the probes for this chromosome
            if (!offsets.containsKey(chromosomeNames[c])) {
                // System.err.println("No probes on chr "+chromosomeNames[c]+" skipping");
                continue;
            }
            int lastIndex = offsets.get(chromosomeNames[c]);
            READ: for (int o = 0; o < hitReads.length; o++) {
                if (cancel) {
                    Enumeration<ProgressListener> en = listeners.elements();
                    while (en.hasMoreElements()) {
                        en.nextElement().progressCancelled();
                    }
                    return;
                }
                // Check against the relevant reads
                int startIndex = lastIndex;
                for (int x = startIndex; x < probes.length; x++) {
                    // Check that we're on the right chromosome
                    if (!probes[x].probe.chromosome().name().equals(chromosomeNames[c])) {
                        // System.err.println("Stopping searching at position "+x+" since we changed to chr "+probes[x].probe.chromosome());
                        continue CHROM;
                    }
                    if (probes[x].probe.start() > SequenceRead.end(hitReads[o])) {
                        // to the next read.
                        continue READ;
                    }
                    if (SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
                        // We overlap with this probe
                        // System.err.println("Found hit to probe "+probes[x].probe);
                        // For probe list probes we could have the same probe several times
                        // so we can add the read to all of the probes which are the same
                        // from this point.
                        // We can skip over interactions where the matched index is less than our index
                        // since we'll duplicate this later anyway.
                        long interactionKey = p + (x * (long) probes.length);
                        if (!interactionCounts.containsKey(interactionKey)) {
                            if (probes[p].index < probes[x].index) {
                                interactionCounts.put(interactionKey, 0);
                            }
                        // else {
                        // System.err.println("Skipping earlier probe hit");
                        // }
                        }
                        // Add one to our counts for this interaction
                        if (probes[p].index < probes[x].index) {
                            interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
                        }
                        // Since we found our first hit here we can start looking from here next time.
                        lastIndex = x;
                        // this one.
                        for (x = x + 1; x < probes.length; x++) {
                            if (probes[x].probe.chromosome() != probes[x - 1].probe.chromosome()) {
                                // We've reached the end for this chromosome
                                break;
                            }
                            if (probes[x].probe == probes[x - 1].probe || SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
                                if (probes[p].index >= probes[x].index)
                                    continue;
                                interactionKey = p + (x * (long) probes.length);
                                if (!interactionCounts.containsKey(interactionKey)) {
                                    if (interactionCounts.size() >= MAX_INTERACTIONS) {
                                        continue READ;
                                    }
                                    interactionCounts.put(interactionKey, 0);
                                }
                                interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
                            } else {
                                break;
                            }
                        }
                        continue READ;
                    }
                // End if overlaps
                }
            // End check each probe
            }
        // End each hit read
        }
        // End each hit chromosome
        // We can now go through the interactions we saw and decide if we
        // want to keep any of them.
        HiCInteractionStrengthCalculator strengthCalc = new HiCInteractionStrengthCalculator(dataSet, correctLinkage);
        Enumeration<Long> en = interactionCounts.keys();
        while (en.hasMoreElements()) {
            long index = en.nextElement();
            int absoluteValue = interactionCounts.get(index);
            if (absoluteValue < initialMinAbsolute)
                continue;
            ProbeWithIndex probe1 = probes[(int) (index % probes.length)];
            ProbeWithIndex probe2 = probes[(int) (index / probes.length)];
            // We calculate the obs/exp based on the total pair count
            // and the relative counts at each end of the interaction
            // Do the interaction strength calculation
            strengthCalc.calculateInteraction(absoluteValue, totalCisCounts[probe1.index], totalTransCounts[probe1.index], totalCisCounts[probe2.index], totalTransCounts[probe2.index], probe1.probe, probe2.probe);
            // We're not counting this here any more.  We work out theoretical numbers at the top instead
            // ++numberOfTestsPerformed;
            float obsExp = (float) strengthCalc.obsExp();
            float pValue = (float) strengthCalc.rawPValue();
            // interaction objects we have to create.
            if (obsExp < initialMinStrength)
                continue;
            // This isn't the final p-value check, but if the raw pvalue fails then the corrected value is never going to pass.
            if (initialMaxSignificance < 1 && pValue > initialMaxSignificance)
                continue;
            InteractionProbePair interaction = new InteractionProbePair(probe1.probe, probe1.index, probe2.probe, probe2.index, obsExp, absoluteValue);
            interaction.setSignificance(pValue);
            // We check against the current list of filters
            if (passesCurrentFilters(interaction)) {
                // See if the strength of any of the interactions is bigger than our current max
                if (obsExp > maxValue)
                    maxValue = obsExp;
                if (filteredInteractions.size() >= MAX_INTERACTIONS) {
                    Enumeration<ProgressListener> en2 = listeners.elements();
                    while (en2.hasMoreElements()) {
                        en2.nextElement().progressWarningReceived(new SeqMonkException("More than " + MAX_INTERACTIONS + " interactions passed the filters. Showing as many as I can"));
                    }
                } else {
                    filteredInteractions.add(interaction);
                }
            }
        }
    }
    // Put the interactions which worked into an Array
    interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
    // System.err.println("Found a set of "+interactions.length+" initially filtered interactions");
    // Apply multiple testing correction
    Arrays.sort(interactions, new Comparator<InteractionProbePair>() {

        public int compare(InteractionProbePair o1, InteractionProbePair o2) {
            return Float.compare(o1.signficance(), o2.signficance());
        }
    });
    for (int i = 0; i < interactions.length; i++) {
        interactions[i].setSignificance(interactions[i].signficance() * ((float) (numberOfTestsPerformed) / (i + 1)));
        // We can't allow corrected p-values to end up in a different order to the uncorrected ones
        if (i > 0 && interactions[i].signficance() < interactions[i - 1].signficance()) {
            interactions[i].setSignificance(interactions[i - 1].signficance());
        }
    }
    // Now re-filter the interactions to get those which finally pass the p-value filter
    filteredInteractions.clear();
    for (int i = 0; i < interactions.length; i++) {
        if (initialMaxSignificance >= 1 || interactions[i].signficance() < initialMaxSignificance) {
            filteredInteractions.add(interactions[i]);
        } else {
            break;
        }
    }
    // Put the interactions which worked into an Array
    interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
    // System.err.println("Found a set of "+interactions.length+" interactions after multiple testing correction");
    // We've processed all of the reads and the matrix is complete.  We can therefore
    // draw the plot
    Enumeration<ProgressListener> en2 = listeners.elements();
    while (en2.hasMoreElements()) {
        en2.nextElement().progressComplete("heatmap", null);
    }
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Hashtable(java.util.Hashtable) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 8 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class CisTransScatterPlotPanel method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    // We need to find the cis trans counts, and get a non-overlapping
    // set of probes
    Probe[] probes = probeList.getAllProbes();
    // If there aren't any probes there's no point going any further
    if (probes.length == 0)
        return;
    minValueX = 0;
    minValueY = 0;
    maxValueX = 0;
    maxValueY = 0;
    Arrays.sort(probes);
    xData = new int[probes.length];
    yData = new int[probes.length];
    for (int p = 0; p < probes.length; p++) {
        HiCHitCollection hiCHits = store.getHiCReadsForProbe(probes[p]);
        String[] chromosomes = hiCHits.getChromosomeNamesWithHits();
        for (int c = 0; c < chromosomes.length; c++) {
            if (hiCHits.getSourceChromosomeName().equals(chromosomes[c])) {
                xData[p] += hiCHits.getSourcePositionsForChromosome(chromosomes[c]).length;
            } else {
                yData[p] += hiCHits.getSourcePositionsForChromosome(chromosomes[c]).length;
            }
        }
        if (xData[p] < minValueX)
            minValueX = xData[p];
        if (yData[p] < minValueY)
            minValueY = yData[p];
        if (xData[p] > maxValueX)
            maxValueX = xData[p];
        if (yData[p] > maxValueY)
            maxValueY = yData[p];
    }
    if (commonScale) {
        minValueX = Math.min(minValueX, minValueY);
        minValueY = minValueX;
        maxValueX = Math.max(maxValueX, maxValueY);
        maxValueY = maxValueX;
    }
    readyToDraw = true;
    repaint();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 9 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class ReplicateSet method getHiCReadsForProbe.

public HiCHitCollection getHiCReadsForProbe(Probe p) {
    HiCHitCollection collection = new HiCHitCollection(p.chromosome().name());
    for (int i = 0; i < dataStores.length; i++) {
        if (dataStores[i] instanceof HiCDataStore) {
            HiCHitCollection thisCollection = ((HiCDataStore) dataStores[i]).getHiCReadsForProbe(p);
            collection.addCollection(thisCollection);
        }
    }
    collection.sortCollection();
    return collection;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)

Example 10 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class SeqMonkDataWriter method printPairedDataSet.

private boolean printPairedDataSet(PairedDataSet set, PrintStream p, int index, int indexTotal) throws IOException {
    p.println(set.getTotalReadCount() * 2 + "\t" + set.name());
    // Go through one chromosome at a time.
    Chromosome[] chrs = data.genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        HiCHitCollection hiCHits = set.getHiCReadsForChromosome(chrs[c]);
        // Work out how many of these reads we're actually going to output
        int validReadCount = 0;
        for (int c2 = 0; c2 < chrs.length; c2++) {
            validReadCount += hiCHits.getSourcePositionsForChromosome(chrs[c2].name()).length;
        }
        p.println(chrs[c].name() + "\t" + validReadCount);
        for (int c2 = 0; c2 < chrs.length; c2++) {
            long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chrs[c2].name());
            long[] hitReads = hiCHits.getHitPositionsForChromosome(chrs[c2].name());
            for (int j = 0; j < sourceReads.length; j++) {
                if (cancel) {
                    cancelled(p);
                    return false;
                }
                // TODO: Fix the progress bar
                if ((j % (1 + (validReadCount / 10))) == 0) {
                    Enumeration<ProgressListener> e2 = listeners.elements();
                    while (e2.hasMoreElements()) {
                        e2.nextElement().progressUpdated("Writing data for " + set.name(), index * chrs.length + c, indexTotal * chrs.length);
                    }
                }
                p.println(sourceReads[j] + "\t" + chrs[c2].name() + "\t" + hitReads[j]);
            }
        }
    }
    // Print a blank line after the last chromosome
    p.println("");
    return true;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)

Aggregations

HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)21 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)7 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)6 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)6 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)5 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)3 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)3 Vector (java.util.Vector)2 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ObjectInputStream (java.io.ObjectInputStream)1 Hashtable (java.util.Hashtable)1 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)1 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)1 HiCInteractionStrengthCalculator (uk.ac.babraham.SeqMonk.DataTypes.Interaction.HiCInteractionStrengthCalculator)1 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)1 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)1