Search in sources :

Example 1 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class VisibleStoresParser method processHiCDataStore.

private DataSet processHiCDataStore(DataStore store) throws SeqMonkException {
    int extendBy = prefs.extendReads();
    boolean reverse = prefs.reverseReads();
    boolean removeStrand = prefs.removeStrandInfo();
    PairedDataSet newData = new PairedDataSet(store.name() + "_reimport", "Reimported from " + store.name(), prefs.removeDuplicates(), prefs.hiCDistance(), prefs.hiCIgnoreTrans());
    // Now process the data
    Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        progressUpdated("Processing " + store.name() + " chr " + chrs[c].name(), c, chrs.length);
        // We make the call to get exportable reads so we don't duplicate reads
        // when we export things
        HiCHitCollection hitCollection = ((HiCDataStore) store).getExportableReadsForChromosome(chrs[c]);
        String[] localChromosomes = hitCollection.getChromosomeNamesWithHits();
        for (int c2 = 0; c2 < localChromosomes.length; c2++) {
            Chromosome localChromosome = SeqMonkApplication.getInstance().dataCollection().genome().getChromosome(localChromosomes[c2]).chromosome();
            long[] sourceReads = hitCollection.getSourcePositionsForChromosome(localChromosomes[c2]);
            long[] hitReads = hitCollection.getHitPositionsForChromosome(localChromosomes[c2]);
            for (int r = 0; r < sourceReads.length; r++) {
                if (cancel) {
                    progressCancelled();
                    return null;
                }
                if (downsample && downsampleProbabilty < 1) {
                    if (Math.random() > downsampleProbabilty) {
                        continue;
                    }
                }
                if ((!(reverse || removeStrand)) && extendBy == 0 && (!filterByFeature)) {
                    // Just add them as they are
                    newData.addData(chrs[c], sourceReads[r]);
                    newData.addData(localChromosome, hitReads[r]);
                }
                Feature[] features = null;
                if (filterByFeature) {
                    features = collection.genome().annotationCollection().getFeaturesForType(chrs[c], featureType);
                    Arrays.sort(features);
                }
                int currentFeaturePostion = 0;
                if (filterByFeature) {
                    // See if we're comparing against the right feature
                    while (SequenceRead.start(sourceReads[r]) > features[currentFeaturePostion].location().end() && currentFeaturePostion < (features.length - 1)) {
                        currentFeaturePostion++;
                    }
                    // Test to see if we overlap
                    if (SequenceRead.overlaps(sourceReads[r], features[currentFeaturePostion].location().packedPosition())) {
                        if (excludeFeature)
                            continue;
                    } else {
                        if (!excludeFeature)
                            continue;
                    }
                }
                int sourceStart = SequenceRead.start(sourceReads[r]);
                int sourceEend = SequenceRead.end(sourceReads[r]);
                int sourceStrand = SequenceRead.strand(sourceReads[r]);
                int hitStart = SequenceRead.start(sourceReads[r]);
                int hitEend = SequenceRead.end(hitReads[r]);
                int hitStrand = SequenceRead.strand(hitReads[r]);
                if (reverse) {
                    if (sourceStrand == Location.FORWARD) {
                        sourceStrand = Location.REVERSE;
                    } else if (sourceStrand == Location.REVERSE) {
                        sourceStrand = Location.FORWARD;
                    }
                    if (hitStrand == Location.FORWARD) {
                        hitStrand = Location.REVERSE;
                    } else if (hitStrand == Location.REVERSE) {
                        hitStrand = Location.FORWARD;
                    }
                }
                if (removeStrand) {
                    sourceStrand = Location.UNKNOWN;
                    hitStrand = Location.UNKNOWN;
                }
                if (extendBy > 0) {
                    if (sourceStrand == Location.FORWARD) {
                        sourceEend += extendBy;
                    } else if (sourceStrand == Location.REVERSE) {
                        sourceStart -= extendBy;
                    }
                    if (hitStrand == Location.FORWARD) {
                        hitEend += extendBy;
                    } else if (hitStrand == Location.REVERSE) {
                        hitStart -= extendBy;
                    }
                }
                // We also don't allow readings which are beyond the end of the chromosome
                if (sourceEend > chrs[c].length()) {
                    int overrun = sourceEend - chrs[c].length();
                    progressWarningReceived(new SeqMonkException("Reading position " + sourceEend + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                    continue;
                }
                if (hitEend > localChromosome.length()) {
                    int overrun = hitEend - SeqMonkApplication.getInstance().dataCollection().genome().getChromosome(localChromosomes[c2]).chromosome().length();
                    progressWarningReceived(new SeqMonkException("Reading position " + hitEend + " was " + overrun + "bp beyond the end of chr" + localChromosome.name() + " (" + chrs[c].length() + ")"));
                    continue;
                }
                // We can now make the new readings
                long sourceRead = SequenceRead.packPosition(sourceStart, sourceEend, sourceStrand);
                long hitRead = SequenceRead.packPosition(hitStart, hitEend, hitStrand);
                if (!prefs.isHiC()) {
                    // HiC additions are deferred until we know the other end is OK too.
                    newData.addData(chrs[c], sourceRead);
                    newData.addData(localChromosome, hitRead);
                }
            }
        }
    }
    return newData;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 2 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class PairedDataSet method getHiCReadsForProbe.

public HiCHitCollection getHiCReadsForProbe(Probe p) {
    HiCHitCollection hitCollection = new HiCHitCollection(p.chromosome().name());
    HiCHitCollection lastCachedHits = updateCacheToChromosome(p.chromosome());
    if (lastChromosomeHitNames.length != lastIndices.length) {
        throw new IllegalArgumentException("Chr names " + lastChromosomeHitNames.length + " but indices " + lastIndices.length);
    }
    for (int c = 0; c < lastChromosomeHitNames.length; c++) {
        long[] sourceReads = lastCachedHits.getSourcePositionsForChromosome(lastChromosomeHitNames[c]);
        long[] hitReads = lastCachedHits.getHitPositionsForChromosome(lastChromosomeHitNames[c]);
        if (sourceReads.length != hitReads.length) {
            throw new IllegalStateException("Source reads and hit reads had different lengths for chr " + lastChromosomeHitNames[c] + " " + sourceReads.length + " vs " + hitReads.length + " current values are " + lastCachedHits.getSourcePositionsForChromosome(lastChromosomeHitNames[c]).length + " and " + lastCachedHits.getSourcePositionsForChromosome(lastChromosomeHitNames[c]).length);
        }
        if (sourceReads.length == 0)
            continue;
        int startPos = lastIndices[c];
        if (startPos < 0) {
            // System.err.println("Started looking for reads at position "+startPos+" (below 0)");
            // Shouldn't happen, but better safe than sorry
            startPos = 0;
        }
        if (startPos >= sourceReads.length) {
            startPos = sourceReads.length - 1;
        }
        // We can now backtrack from here to find the first possible hit
        boolean setCache = false;
        // System.err.println("Starting chr "+chromosomes[c]+" at index "+startPos+" position "+SequenceRead.midPoint(sourceReads[startPos]));
        for (int i = startPos - 1; i >= 0; i--) {
            // Reads come in order, so we can stop when we've seen enough.
            if (SequenceRead.end(sourceReads[i]) < p.start() - 5000) {
                // System.err.println("Finished backtracking at index "+i+" position "+SequenceRead.midPoint(sourceReads[i]));
                break;
            }
            if (SequenceRead.overlaps(sourceReads[i], p.packedPosition())) {
                // They overlap.
                // ++hitCount;
                hitCollection.addHit(lastChromosomeHitNames[c], sourceReads[i], hitReads[i]);
                if (i != startPos - 1 && !setCache) {
                    // This is the last read so set the cache position
                    lastIndices[c] = i;
                    setCache = true;
                }
            }
        }
        for (int i = startPos; i < sourceReads.length; i++) {
            // Reads come in order, so we can stop when we've seen enough.
            if (SequenceRead.start(sourceReads[i]) > p.end()) {
                // System.err.println("Finished forward tracking at index "+i+" position "+SequenceRead.midPoint(sourceReads[i]));
                break;
            }
            if (SequenceRead.overlaps(sourceReads[i], p.packedPosition())) {
                // They overlap.
                // ++hitCount;
                hitCollection.addHit(lastChromosomeHitNames[c], sourceReads[i], hitReads[i]);
                lastChromosome = p.chromosome();
                lastIndices[c] = i;
            }
        }
    }
    return hitCollection;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)

Example 3 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class PairedDataSet method getHiCReadsForChromosome.

public synchronized HiCHitCollection getHiCReadsForChromosome(Chromosome c) {
    if (!isFinalised)
        finalise();
    if (readData.containsKey(c)) {
        if (readData.get(c).hitCollection != null) {
            // We're not caching, so just give them back the reads
            return readData.get(c).hitCollection;
        } else {
            // Check if we've cached this data
            if (lastCachedHits != null && lastCachedHits.getSourceChromosomeName() == c.name()) {
                return lastCachedHits;
            }
            // Signal that we're accessing the cache so the cache icon can blink!
            SeqMonkApplication.getInstance().cacheUsed();
            // temp file
            try {
                ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(new FileInputStream(readData.get(c).tempFile)));
                lastCachedHits = (HiCHitCollection) ois.readObject();
                ois.close();
                lastChromosomeHitNames = lastCachedHits.getChromosomeNamesWithHits();
                lastIndices = new int[lastChromosomeHitNames.length];
                return lastCachedHits;
            } catch (Exception e) {
                throw new IllegalStateException(e);
            }
        }
    } else {
        lastChromosomeHitNames = new String[0];
        lastCachedHits = new HiCHitCollection(c.name());
        lastIndices = new int[0];
        return lastCachedHits;
    }
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) ObjectInputStream(java.io.ObjectInputStream)

Example 4 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class HiCReadCountQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    float[] corrections = new float[data.length];
    if (correctTotal) {
        float largest = 0;
        if (correctPerMillion) {
            largest = 1000000;
        }
        for (int d = 0; d < data.length; d++) {
            // if (correctOnlyInProbes) {
            // corrections[d] = getTotalCountInProbes(data[d],probes);
            // }
            // else {
            corrections[d] = ((DataStore) data[d]).getTotalReadCount();
            // }
            if (d == 0 && !correctPerMillion) {
                largest = corrections[d];
            } else {
                if (!correctPerMillion && corrections[d] > largest) {
                    largest = corrections[d];
                }
            }
        }
        // We correct everything by the largest count
        for (int d = 0; d < corrections.length; d++) {
            corrections[d] = largest / corrections[d];
        }
    }
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        double lengthCorrection = 1;
        if (correctLength) {
            // We assume a 'normal' probe length of 1kb
            lengthCorrection = (double) 1000 / probes[p].length();
        }
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            double count = 0;
            HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
            String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
            for (int c = 0; c < chromosomeNames.length; c++) {
                long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
                long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
                for (int r = 0; r < sourceReads.length; r++) {
                    // Check if we can ignore this one
                    if (removeDuplicates) {
                        if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
                            continue;
                    }
                    if (SequenceRead.overlaps(hitReads[r], probes[p].packedPosition()) && SequenceRead.compare(hitReads[r], sourceReads[r]) > 0) {
                        continue;
                    }
                    ++count;
                }
            }
            if (logTransform && count == 0) {
                count = 0.9;
            }
            if (correctTotal) {
                count *= corrections[d];
            }
            if (correctLength) {
                count *= lengthCorrection;
            }
            if (logTransform) {
                count = (float) Math.log(count) / log2;
            }
            // TODO: This is icky since the inheritance between HiCDataStore and DataStore
            // isn't properly sorted out.
            ((DataStore) data[d]).setValueForProbe(probes[p], (float) count);
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 5 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class ChromosomeDataTrack method getHiCPixelCounts.

private int[][] getHiCPixelCounts() {
    // Check if we're able to use the last cached data.
    if (lastWidth == width && viewer.currentStart() == lastStart && viewer.currentEnd() == lastEnd && cachedHiCPixelCounts != null) {
        return cachedHiCPixelCounts;
    }
    lastWidth = width;
    lastStart = viewer.currentStart();
    lastEnd = viewer.currentEnd();
    cachedHiCPixelCounts = new int[width + 1][width + 1];
    if (lastChromosome != viewer.chromosome()) {
        HiCHitCollection col = ((HiCDataStore) data).getHiCReadsForChromosome(viewer.chromosome());
        lastSourceHiC = col.getSourcePositionsForChromosome(viewer.chromosome().name());
        lastHitHic = col.getHitPositionsForChromosome(viewer.chromosome().name());
        lastChromosome = viewer.chromosome();
    // Check if the HiC collection is sorted.
    // for (int i=1;i<lastSourceHiC.length;i++) {
    // if (SequenceRead.start(lastSourceHiC[i-1]) > SequenceRead.start(lastSourceHiC[i])) {
    // throw new IllegalStateException("HiC set isn't sorted");
    // }
    // }
    }
    // No need to do anything more if there's no data
    if (lastSourceHiC.length == 0)
        return cachedHiCPixelCounts;
    // Go back until we're sure we're not going to see any more reads
    for (int i = lastInteractionIndexStart; i >= 0; i--) {
        if (i >= lastSourceHiC.length)
            continue;
        if (SequenceRead.start(lastSourceHiC[i]) < viewer.currentStart() - data.getMaxReadLength()) {
            lastInteractionIndexStart = i;
            break;
        }
    }
    // Go forward until we hit our first read
    for (int i = lastInteractionIndexStart; i < reads.length; i++) {
        if (SequenceRead.start(lastSourceHiC[i]) >= viewer.currentStart() - data.getMaxReadLength()) {
            lastInteractionIndexStart = i;
            break;
        }
    }
    for (int i = lastInteractionIndexStart; i < lastSourceHiC.length; i++) {
        // Check if both of the ends are in the current window
        int sourceMidPoint = SequenceRead.midPoint(lastSourceHiC[i]);
        if (SequenceRead.start(lastSourceHiC[i]) > viewer.currentEnd()) {
            break;
        }
        if (sourceMidPoint < viewer.currentStart() || sourceMidPoint > viewer.currentEnd()) {
            continue;
        }
        int hitMidPoint = SequenceRead.midPoint(lastHitHic[i]);
        if (hitMidPoint < viewer.currentStart() || hitMidPoint > viewer.currentEnd()) {
            continue;
        }
        int sourcePixel = bpToPixel(Math.min(sourceMidPoint, hitMidPoint));
        int hitPixel = bpToPixel(Math.max(sourceMidPoint, hitMidPoint));
        // System.err.println("Found valid interaction between "+sourceMidPoint+" and "+hitMidPoint+" which is "+sourcePixel+" to "+hitPixel);
        // Now we can add the interaction to the matrix
        cachedHiCPixelCounts[sourcePixel][hitPixel]++;
    }
    return cachedHiCPixelCounts;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)

Aggregations

HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)21 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)7 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)6 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)6 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)5 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)3 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)3 Vector (java.util.Vector)2 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ObjectInputStream (java.io.ObjectInputStream)1 Hashtable (java.util.Hashtable)1 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)1 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)1 HiCInteractionStrengthCalculator (uk.ac.babraham.SeqMonk.DataTypes.Interaction.HiCInteractionStrengthCalculator)1 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)1 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)1