Search in sources :

Example 11 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class FourCEnrichmentQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    // Start off by finding the right HiC data for each 4C dataset
    HiCDataStore[] parentHiC = new HiCDataStore[data.length];
    Probe[] parentProbes = new Probe[data.length];
    ProbeList[] significantLists = new ProbeList[data.length];
    for (int d = 0; d < data.length; d++) {
        String filename = data[d].fileName();
        filename = filename.replaceAll("HiC other end of ", "");
        filename = filename.replaceAll(" for region.*", "");
        System.err.println("Looking for HiC match to " + filename);
        for (int h = 0; h < hiCData.length; h++) {
            if (((DataStore) hiCData[h]).name().equals(filename)) {
                parentHiC[d] = hiCData[h];
            }
        }
        if (parentHiC[d] == null) {
            progressWarningReceived(new SeqMonkException("Failed to find HiC dataset '" + filename + "' for 4C dataset " + data[d].name()));
            continue;
        }
        significantLists[d] = new ProbeList(application.dataCollection().probeSet(), "4C p<0.05 " + data[d].name(), "4C pipeline significance < 0.05", "P-value");
        // Also make up a probe to represent the region from which
        // the dataset was made
        filename = data[d].fileName();
        filename = filename.replaceAll("^.*for region ", "");
        String[] locationSections = filename.split("[-:]");
        if (locationSections.length != 3) {
            progressWarningReceived(new SeqMonkException("Couldn't extract location from " + filename));
            continue;
        }
        try {
            parentProbes[d] = new Probe(application.dataCollection().genome().getChromosome(locationSections[0]).chromosome(), Integer.parseInt(locationSections[1]), Integer.parseInt(locationSections[2]));
        } catch (Exception e) {
            progressExceptionReceived(e);
            return;
        }
    }
    // Make strength calculators for each HiC set
    HiCInteractionStrengthCalculator[] strengthCalculators = new HiCInteractionStrengthCalculator[parentHiC.length];
    for (int i = 0; i < parentHiC.length; i++) {
        strengthCalculators[i] = new HiCInteractionStrengthCalculator(parentHiC[i], true);
    }
    // Get the cis/trans counts for the parent probes
    int[] parentProbeCisCounts = new int[parentHiC.length];
    int[] parentProbeTransCounts = new int[parentHiC.length];
    for (int p = 0; p < parentHiC.length; p++) {
        if (parentHiC[p] == null)
            continue;
        HiCHitCollection hits = parentHiC[p].getHiCReadsForProbe(parentProbes[p]);
        String[] chrNames = hits.getChromosomeNamesWithHits();
        for (int c = 0; c < chrNames.length; c++) {
            if (chrNames[c].equals(hits.getSourceChromosomeName())) {
                parentProbeCisCounts[p] = hits.getSourcePositionsForChromosome(chrNames[c]).length;
            } else {
                parentProbeTransCounts[p] += hits.getSourcePositionsForChromosome(chrNames[c]).length;
            }
        }
    }
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            if (parentHiC[d] == null)
                continue;
            int thisProbeCisCount = 0;
            int thisProbeTransCount = 0;
            HiCHitCollection hiCHits = parentHiC[d].getHiCReadsForProbe(probes[p]);
            String[] chrNames = hiCHits.getChromosomeNamesWithHits();
            for (int c = 0; c < chrNames.length; c++) {
                if (chrNames[c].equals(hiCHits.getSourceChromosomeName())) {
                    thisProbeCisCount = hiCHits.getSourcePositionsForChromosome(chrNames[c]).length;
                } else {
                    thisProbeTransCount += hiCHits.getSourcePositionsForChromosome(chrNames[c]).length;
                }
            }
            strengthCalculators[d].calculateInteraction(data[d].getReadsForProbe(probes[p]).length, thisProbeCisCount, thisProbeTransCount, parentProbeCisCounts[d], parentProbeTransCounts[d], probes[p], parentProbes[d]);
            float obsExp = (float) strengthCalculators[d].obsExp();
            data[d].setValueForProbe(probes[p], obsExp);
            float pValue = (float) strengthCalculators[d].rawPValue() * probes.length;
            if (pValue < 0.05) {
                significantLists[d].addProbe(probes[p], pValue);
            }
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) HiCInteractionStrengthCalculator(uk.ac.babraham.SeqMonk.DataTypes.Interaction.HiCInteractionStrengthCalculator) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 12 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class HiCCisTransQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            int cisCount = 0;
            int transCount = 0;
            HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
            String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
            for (int c = 0; c < chromosomeNames.length; c++) {
                long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
                long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
                for (int r = 0; r < sourceReads.length; r++) {
                    // Check if we can ignore this one
                    if (removeDuplicates) {
                        if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
                            continue;
                    }
                    if (!chromosomeNames[c].equals(probes[p].chromosome().name())) {
                        ++transCount;
                    } else {
                        if (includeFarCis) {
                            int distance = SequenceRead.fragmentLength(sourceReads[r], hitReads[r]);
                            if (distance > farCisDistance) {
                                ++transCount;
                            } else {
                                // System.err.println("Distance was "+distance);
                                ++cisCount;
                            }
                        } else {
                            ++cisCount;
                        }
                    }
                }
            }
            float percentage = ((transCount * 100f) / (cisCount + transCount));
            if (cisCount + transCount == 0) {
                percentage = 0;
            }
            // TODO: This is icky since the inheritance between HiCDataStore and DataStore
            // isn't properly sorted out.
            ((DataStore) data[d]).setValueForProbe(probes[p], percentage);
        }
    }
    if (correctPerChromosome) {
        Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
        for (int c = 0; c < chrs.length; c++) {
            Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
            float[] thisChrValues = new float[thisChrProbes.length];
            for (int d = 0; d < data.length; d++) {
                DataStore ds = (DataStore) data[d];
                for (int p = 0; p < thisChrProbes.length; p++) {
                    try {
                        thisChrValues[p] = ds.getValueForProbe(thisChrProbes[p]);
                    } catch (SeqMonkException e) {
                    }
                }
                float median = SimpleStats.median(thisChrValues);
                for (int p = 0; p < thisChrProbes.length; p++) {
                    try {
                        ds.setValueForProbe(thisChrProbes[p], ds.getValueForProbe(thisChrProbes[p]) - median);
                    } catch (SeqMonkException e) {
                    }
                }
            }
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 13 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class HiCPrevNextQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    Arrays.sort(probes);
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        if (probes[p].start() - distance < 0 || probes[p].end() + distance > probes[p].chromosome().length()) {
            for (int d = 0; d < data.length; d++) {
                ((DataStore) data[d]).setValueForProbe(probes[p], 0);
            }
            continue;
        }
        // Make up a pseudo probe for the previous and next regions
        Probe previousProbe = new Probe(probes[p].chromosome(), probes[p].start() - distance, probes[p].start());
        Probe nextProbe = new Probe(probes[p].chromosome(), probes[p].end(), probes[p].end() + distance);
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            // Get the counts for the previous and next probes
            int previousTotalCount = data[d].getHiCReadCountForProbe(previousProbe);
            HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
            int nextTotalCount = data[d].getHiCReadCountForProbe(nextProbe);
            // any reads then give up on this one.
            if (previousTotalCount == 0 || nextTotalCount == 0) {
                ((DataStore) data[d]).setValueForProbe(probes[p], 0);
                continue;
            }
            int previousCount = 0;
            int nextCount = 0;
            long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
            SequenceRead.sort(hitPositions);
            for (int r = 0; r < hitPositions.length; r++) {
                // Check if we can ignore this one
                if (removeDuplicates) {
                    if (r > 0 && hitPositions[r] == hitPositions[r - 1])
                        continue;
                }
                // Check if the other end maps to either the prev or next probe
                if (SequenceRead.overlaps(hitPositions[r], previousProbe.packedPosition())) {
                    ++previousCount;
                }
                if (SequenceRead.overlaps(hitPositions[r], nextProbe.packedPosition())) {
                    ++nextCount;
                }
            }
            // If both of the actual counts are zero then don't try to calculate a real value
            if (previousCount == 0 && nextCount == 0) {
                ((DataStore) data[d]).setValueForProbe(probes[p], 0);
                continue;
            }
            // Basically what happens here is that we calculate a normal X^2 value and then
            // turn it into a negative depending on the direction of the deviation from the
            // expected proportion.
            // We need to calculate expected values for the two directions based on the proportion
            // of reads falling into those two regions in total.
            // Correct the counts based on the proportions of the two totals
            double expectedProportion = (previousTotalCount / (double) (nextTotalCount + previousTotalCount));
            int totalObservedCount = previousCount + nextCount;
            double expectedPreviousCount = totalObservedCount * expectedProportion;
            double expectedNextCount = totalObservedCount - expectedPreviousCount;
            // Now we can calculate the X^2 values to compare the expected and observed values
            double chisquare = (Math.pow(previousCount - expectedPreviousCount, 2) / expectedPreviousCount) + (Math.pow(nextCount - expectedNextCount, 2) / expectedNextCount);
            // We now negate this count if the proportions bias towards the next count
            double observedProportion = (previousCount / (double) totalObservedCount);
            if (observedProportion > expectedProportion) {
                chisquare = 0 - chisquare;
            }
            // System.err.println("Raw counts "+previousCount+","+nextCount+" Totals "+previousTotalCount+","+nextTotalCount+" Expected "+expectedPreviousCount+","+expectedNextCount+" Chi "+chisquare);
            // TODO: This is icky since the inheritance between HiCDataStore and DataStore
            // isn't properly sorted out.
            ((DataStore) data[d]).setValueForProbe(probes[p], (float) chisquare);
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 14 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class HiCLengthHistogramPlot method getData.

/**
 * Gets the data.
 *
 * @param pl the pl
 * @return the data
 */
private float[] getData() {
    Vector<Integer> data = new Vector<Integer>();
    // If there is a probeset then we get pairs for the current probe list
    if (probes != null) {
        for (int p = 0; p < probes.length; p++) {
            HiCHitCollection hiCHits = store.getHiCReadsForProbe(probes[p]);
            long[] sourcePositions = hiCHits.getSourcePositionsForChromosome(hiCHits.getSourceChromosomeName());
            long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
            for (int r = 0; r < sourcePositions.length; r++) {
                // Don't enter every pair twice
                if (SequenceRead.compare(sourcePositions[r], hitPositions[r]) < 0)
                    continue;
                data.add(SequenceRead.fragmentLength(sourcePositions[r], hitPositions[r]));
            }
        }
    } else {
        Chromosome[] chrs = SeqMonkApplication.getInstance().dataCollection().genome().getAllChromosomes();
        for (int c1 = 0; c1 < chrs.length; c1++) {
            HiCHitCollection hiCHits = store.getHiCReadsForChromosome(chrs[c1]);
            long[] sourcePositions = hiCHits.getSourcePositionsForChromosome(hiCHits.getSourceChromosomeName());
            long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
            for (int r = 0; r < sourcePositions.length; r++) {
                // Don't enter every pair twice
                if (SequenceRead.compare(sourcePositions[r], hitPositions[r]) < 0)
                    continue;
                data.add(SequenceRead.fragmentLength(sourcePositions[r], hitPositions[r]));
            }
        }
    }
    // Convert to float [] for the return array
    float[] returnData = new float[data.size()];
    int index = 0;
    Enumeration<Integer> en = data.elements();
    while (en.hasMoreElements()) {
        returnData[index] = en.nextElement().floatValue();
        index++;
    }
    data.clear();
    return returnData;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Vector(java.util.Vector)

Example 15 with HiCHitCollection

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.

the class HiCOtherEndExtractor method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    // We need to get the baits and other options from the preferences
    Probe[] baits = prefs.getBaits();
    boolean mergeIntoOne = prefs.mergeIntoOne();
    boolean excludeSelf = prefs.excludeSelf();
    DataSet[] newData;
    if (mergeIntoOne) {
        newData = new DataSet[visibleHiCDataSets.length];
    } else {
        newData = new DataSet[visibleHiCDataSets.length * baits.length];
    }
    try {
        for (int d = 0; d < visibleHiCDataSets.length; d++) {
            if (baits.length == 1) {
                System.err.println("Only one bait");
                newData[d] = new DataSet(prefs.prefix.getText() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for region " + baits[0].chromosome().name() + ":" + baits[0].start() + "-" + baits[0].end(), DataSet.DUPLICATES_REMOVE_NO);
            } else if (mergeIntoOne) {
                System.err.println("Multiple baits, but merging");
                newData[d] = new DataSet(prefs.prefix.getText() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for " + baits.length + " regions", DataSet.DUPLICATES_REMOVE_NO);
            }
            for (int b = 0; b < baits.length; b++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                progressUpdated("Getting other ends from " + visibleHiCDataSets[d].name() + " and " + baits[b].toString(), (d * baits.length) + b, visibleHiCDataSets.length * baits.length);
                if (!(baits.length == 1 || mergeIntoOne)) {
                    newData[(d * baits.length) + b] = new DataSet(baits[b].toString() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for region " + baits[b].chromosome().name() + ":" + baits[b].start() + "-" + baits[b].end(), DataSet.DUPLICATES_REMOVE_NO);
                }
                HiCHitCollection hiCCollection = visibleHiCDataSets[d].getHiCReadsForProbe(baits[b]);
                String[] chromosomes = hiCCollection.getChromosomeNamesWithHits();
                for (int c = 0; c < chromosomes.length; c++) {
                    Chromosome chromosome = collection.genome().getChromosome(chromosomes[c]).chromosome();
                    long[] reads = hiCCollection.getHitPositionsForChromosome(chromosomes[c]);
                    for (int r = 0; r < reads.length; r++) {
                        if (cancel) {
                            progressCancelled();
                            return;
                        }
                        if (excludeSelf && baits[b].chromosome().name().equals(chromosomes[c]) && SequenceRead.overlaps(reads[r], baits[b].packedPosition())) {
                            continue;
                        }
                        if (mergeIntoOne) {
                            newData[d].addData(chromosome, reads[r]);
                        } else {
                            newData[(d * baits.length) + b].addData(chromosome, reads[r]);
                        }
                    }
                }
                if (!mergeIntoOne) {
                    // Cache the data in the new dataset
                    progressUpdated("Caching data", (d * baits.length) + b, visibleHiCDataSets.length * baits.length);
                    newData[(d * baits.length) + b].finalise();
                }
            }
            if (mergeIntoOne) {
                // Cache the data in the new dataset
                progressUpdated("Caching data", d, visibleHiCDataSets.length);
                newData[d].finalise();
            }
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
    }
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Aggregations

HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)21 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)7 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)6 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)6 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)5 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)3 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)3 Vector (java.util.Vector)2 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ObjectInputStream (java.io.ObjectInputStream)1 Hashtable (java.util.Hashtable)1 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)1 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)1 HiCInteractionStrengthCalculator (uk.ac.babraham.SeqMonk.DataTypes.Interaction.HiCInteractionStrengthCalculator)1 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)1 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)1