Search in sources :

Example 61 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class EnrichmentNormalisationQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    if (!isReady()) {
        progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
    }
    Probe[] allProbes = application.dataCollection().probeSet().getAllProbes();
    Probe[] calculateProbes = ((ProbeList) calculateFromProbeList.getSelectedItem()).getAllProbes();
    float[] lowerPercentileValues = new float[data.length];
    float[] upperPercentileValues = new float[data.length];
    // Work out the value at the appropriate percentile
    for (int d = 0; d < data.length; d++) {
        progressUpdated("Calculating correction for " + data[d].name(), d, data.length);
        float[] theseValues = new float[calculateProbes.length];
        for (int p = 0; p < calculateProbes.length; p++) {
            try {
                theseValues[p] = data[d].getValueForProbe(calculateProbes[p]);
            } catch (SeqMonkException e) {
                progressExceptionReceived(e);
            }
        }
        Arrays.sort(theseValues);
        lowerPercentileValues[d] = getPercentileValue(theseValues, lowerPercentile);
        upperPercentileValues[d] = getPercentileValue(theseValues, upperPercentile);
        System.err.println("Lower percentile for " + data[d].name() + " is " + lowerPercentileValues[d] + " from " + theseValues.length + " values");
    }
    float lowerMedian = SimpleStats.median(Arrays.copyOf(lowerPercentileValues, lowerPercentileValues.length));
    float upperMedian = SimpleStats.median(Arrays.copyOf(upperPercentileValues, upperPercentileValues.length));
    for (int d = 0; d < data.length; d++) {
        System.err.println("Looking at " + data[d].name());
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(d, data.length);
        // Now we work out the correction factor we're actually going to use
        float lowerCorrectionFactor = lowerMedian - lowerPercentileValues[d];
        float upperCorrectionFactor = ((upperPercentileValues[d] + lowerCorrectionFactor) - lowerMedian) / (upperMedian - lowerMedian);
        System.err.println("Lower percentile = " + lowerPercentileValues[d]);
        System.err.println("Upper percentile = " + upperPercentileValues[d]);
        System.err.println("Lower median = " + lowerMedian);
        System.err.println("Upper median = " + upperMedian);
        System.err.println("Lower correction = " + lowerCorrectionFactor);
        System.err.println("Upper correction = " + upperCorrectionFactor);
        System.err.println("\n\n");
        // Apply the correction to all probes
        try {
            for (int p = 0; p < allProbes.length; p++) {
                // See if we need to quit
                if (cancel) {
                    progressCancelled();
                    return;
                }
                float probeValue = data[d].getValueForProbe(allProbes[p]);
                probeValue += lowerCorrectionFactor;
                probeValue -= lowerMedian;
                probeValue /= upperCorrectionFactor;
                probeValue += lowerMedian;
                data[d].setValueForProbe(allProbes[p], probeValue);
            }
        } catch (SeqMonkException e) {
            progressExceptionReceived(e);
        }
    }
    quantitatonComplete();
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 62 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class ExactOverlapQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    float[] corrections = new float[data.length];
    if (correctTotal) {
        float largest = 0;
        if (correctPerMillion) {
            largest = 1000000;
        }
        for (int d = 0; d < data.length; d++) {
            if (correctOnlyInProbes) {
                corrections[d] = getTotalCountInProbes(data[d], probes);
            } else {
                corrections[d] = data[d].getTotalReadCount();
            }
            if (d == 0 && !correctPerMillion) {
                largest = corrections[d];
            } else {
                if (!correctPerMillion && corrections[d] > largest) {
                    largest = corrections[d];
                }
            }
        }
        // We correct everything by the largest count
        for (int d = 0; d < corrections.length; d++) {
            corrections[d] = largest / corrections[d];
        }
    }
    // To make this more efficient we'll do this chromosome by chromosome
    Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        progressUpdated("Quantiating probes on " + chrs[c].name(), c, chrs.length);
        Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
        Arrays.sort(thisChrProbes);
        for (int d = 0; d < data.length; d++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            // We'll fetch all reads for this chr and then do a count per position
            ReadsWithCounts reads = data[d].getReadsForChromosome(chrs[c]);
            quantitationType.resetLastRead();
            int startIndex = 0;
            for (int p = 0; p < thisChrProbes.length; p++) {
                int rawCount = 0;
                for (int r = startIndex; r < reads.reads.length; r++) {
                    if (SequenceRead.start(reads.reads[r]) < thisChrProbes[p].start()) {
                        startIndex = r;
                    }
                    if (SequenceRead.start(reads.reads[r]) > thisChrProbes[p].start())
                        break;
                    if (quantitationType.useRead(thisChrProbes[p], reads.reads[r])) {
                        if (SequenceRead.start(reads.reads[r]) == thisChrProbes[p].start() && SequenceRead.end(reads.reads[r]) == thisChrProbes[p].end()) {
                            rawCount += reads.counts[r];
                        }
                    }
                }
                // We have the counts now work out any correction.
                float count = rawCount;
                if (logTransform && count == 0) {
                    count = 0.9f;
                }
                if (correctTotal) {
                    count *= corrections[d];
                }
                if (logTransform) {
                    count = (float) Math.log(count) / log2;
                }
                data[d].setValueForProbe(thisChrProbes[p], count);
            }
        }
    }
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 63 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class FourCEnrichmentQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    // Start off by finding the right HiC data for each 4C dataset
    HiCDataStore[] parentHiC = new HiCDataStore[data.length];
    Probe[] parentProbes = new Probe[data.length];
    ProbeList[] significantLists = new ProbeList[data.length];
    for (int d = 0; d < data.length; d++) {
        String filename = data[d].fileName();
        filename = filename.replaceAll("HiC other end of ", "");
        filename = filename.replaceAll(" for region.*", "");
        System.err.println("Looking for HiC match to " + filename);
        for (int h = 0; h < hiCData.length; h++) {
            if (((DataStore) hiCData[h]).name().equals(filename)) {
                parentHiC[d] = hiCData[h];
            }
        }
        if (parentHiC[d] == null) {
            progressWarningReceived(new SeqMonkException("Failed to find HiC dataset '" + filename + "' for 4C dataset " + data[d].name()));
            continue;
        }
        significantLists[d] = new ProbeList(application.dataCollection().probeSet(), "4C p<0.05 " + data[d].name(), "4C pipeline significance < 0.05", "P-value");
        // Also make up a probe to represent the region from which
        // the dataset was made
        filename = data[d].fileName();
        filename = filename.replaceAll("^.*for region ", "");
        String[] locationSections = filename.split("[-:]");
        if (locationSections.length != 3) {
            progressWarningReceived(new SeqMonkException("Couldn't extract location from " + filename));
            continue;
        }
        try {
            parentProbes[d] = new Probe(application.dataCollection().genome().getChromosome(locationSections[0]).chromosome(), Integer.parseInt(locationSections[1]), Integer.parseInt(locationSections[2]));
        } catch (Exception e) {
            progressExceptionReceived(e);
            return;
        }
    }
    // Make strength calculators for each HiC set
    HiCInteractionStrengthCalculator[] strengthCalculators = new HiCInteractionStrengthCalculator[parentHiC.length];
    for (int i = 0; i < parentHiC.length; i++) {
        strengthCalculators[i] = new HiCInteractionStrengthCalculator(parentHiC[i], true);
    }
    // Get the cis/trans counts for the parent probes
    int[] parentProbeCisCounts = new int[parentHiC.length];
    int[] parentProbeTransCounts = new int[parentHiC.length];
    for (int p = 0; p < parentHiC.length; p++) {
        if (parentHiC[p] == null)
            continue;
        HiCHitCollection hits = parentHiC[p].getHiCReadsForProbe(parentProbes[p]);
        String[] chrNames = hits.getChromosomeNamesWithHits();
        for (int c = 0; c < chrNames.length; c++) {
            if (chrNames[c].equals(hits.getSourceChromosomeName())) {
                parentProbeCisCounts[p] = hits.getSourcePositionsForChromosome(chrNames[c]).length;
            } else {
                parentProbeTransCounts[p] += hits.getSourcePositionsForChromosome(chrNames[c]).length;
            }
        }
    }
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            if (parentHiC[d] == null)
                continue;
            int thisProbeCisCount = 0;
            int thisProbeTransCount = 0;
            HiCHitCollection hiCHits = parentHiC[d].getHiCReadsForProbe(probes[p]);
            String[] chrNames = hiCHits.getChromosomeNamesWithHits();
            for (int c = 0; c < chrNames.length; c++) {
                if (chrNames[c].equals(hiCHits.getSourceChromosomeName())) {
                    thisProbeCisCount = hiCHits.getSourcePositionsForChromosome(chrNames[c]).length;
                } else {
                    thisProbeTransCount += hiCHits.getSourcePositionsForChromosome(chrNames[c]).length;
                }
            }
            strengthCalculators[d].calculateInteraction(data[d].getReadsForProbe(probes[p]).length, thisProbeCisCount, thisProbeTransCount, parentProbeCisCounts[d], parentProbeTransCounts[d], probes[p], parentProbes[d]);
            float obsExp = (float) strengthCalculators[d].obsExp();
            data[d].setValueForProbe(probes[p], obsExp);
            float pValue = (float) strengthCalculators[d].rawPValue() * probes.length;
            if (pValue < 0.05) {
                significantLists[d].addProbe(probes[p], pValue);
            }
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) HiCInteractionStrengthCalculator(uk.ac.babraham.SeqMonk.DataTypes.Interaction.HiCInteractionStrengthCalculator) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 64 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class SmoothingSubtractionQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    if (!isReady()) {
        progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
    }
    Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
    Vector<DataStore> quantitatedStores = new Vector<DataStore>();
    DataSet[] sets = application.dataCollection().getAllDataSets();
    for (int s = 0; s < sets.length; s++) {
        if (sets[s].isQuantitated()) {
            quantitatedStores.add(sets[s]);
        }
    }
    DataGroup[] groups = application.dataCollection().getAllDataGroups();
    for (int g = 0; g < groups.length; g++) {
        if (groups[g].isQuantitated()) {
            quantitatedStores.add(groups[g]);
        }
    }
    DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
    for (int c = 0; c < chromosomes.length; c++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(c, chromosomes.length);
        Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
        float[][] newValues = new float[data.length][allProbes.length];
        try {
            for (int p = 0; p < allProbes.length; p++) {
                // See if we need to quit
                if (cancel) {
                    progressCancelled();
                    return;
                }
                // Find the min and max indices we're going to use.
                int minIndex = p;
                int maxIndex = p;
                if (correctionAction == ADJACENT) {
                    minIndex = p - (distance / 2);
                    maxIndex = minIndex + (distance - 1);
                    if (minIndex < 0)
                        minIndex = 0;
                    if (maxIndex > allProbes.length - 1)
                        maxIndex = allProbes.length - 1;
                } else if (correctionAction == WINDOW) {
                    for (int i = p; i >= 0; i--) {
                        if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
                            break;
                        }
                        minIndex = i;
                    }
                    for (int i = p; i < allProbes.length; i++) {
                        if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
                            break;
                        }
                        maxIndex = i;
                    }
                }
                // Now go through all of the datasets working out the new value for this range
                float[] tempValues = new float[(maxIndex - minIndex) + 1];
                for (int d = 0; d < data.length; d++) {
                    for (int i = minIndex; i <= maxIndex; i++) {
                        tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
                    }
                    newValues[d][p] = SimpleStats.mean(tempValues);
                }
            }
            // Now assign the values for the probes on this chromosome
            for (int d = 0; d < data.length; d++) {
                for (int p = 0; p < allProbes.length; p++) {
                    data[d].setValueForProbe(allProbes[p], data[d].getValueForProbe(allProbes[p]) - newValues[d][p]);
                }
            }
        } catch (SeqMonkException e) {
            progressExceptionReceived(e);
        }
    }
    quantitatonComplete();
}
Also used : DataGroup(uk.ac.babraham.SeqMonk.DataTypes.DataGroup) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 65 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class HiCCisTransQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            int cisCount = 0;
            int transCount = 0;
            HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
            String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
            for (int c = 0; c < chromosomeNames.length; c++) {
                long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
                long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
                for (int r = 0; r < sourceReads.length; r++) {
                    // Check if we can ignore this one
                    if (removeDuplicates) {
                        if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
                            continue;
                    }
                    if (!chromosomeNames[c].equals(probes[p].chromosome().name())) {
                        ++transCount;
                    } else {
                        if (includeFarCis) {
                            int distance = SequenceRead.fragmentLength(sourceReads[r], hitReads[r]);
                            if (distance > farCisDistance) {
                                ++transCount;
                            } else {
                                // System.err.println("Distance was "+distance);
                                ++cisCount;
                            }
                        } else {
                            ++cisCount;
                        }
                    }
                }
            }
            float percentage = ((transCount * 100f) / (cisCount + transCount));
            if (cisCount + transCount == 0) {
                percentage = 0;
            }
            // TODO: This is icky since the inheritance between HiCDataStore and DataStore
            // isn't properly sorted out.
            ((DataStore) data[d]).setValueForProbe(probes[p], percentage);
        }
    }
    if (correctPerChromosome) {
        Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
        for (int c = 0; c < chrs.length; c++) {
            Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
            float[] thisChrValues = new float[thisChrProbes.length];
            for (int d = 0; d < data.length; d++) {
                DataStore ds = (DataStore) data[d];
                for (int p = 0; p < thisChrProbes.length; p++) {
                    try {
                        thisChrValues[p] = ds.getValueForProbe(thisChrProbes[p]);
                    } catch (SeqMonkException e) {
                    }
                }
                float median = SimpleStats.median(thisChrValues);
                for (int p = 0; p < thisChrProbes.length; p++) {
                    try {
                        ds.setValueForProbe(thisChrProbes[p], ds.getValueForProbe(thisChrProbes[p]) - median);
                    } catch (SeqMonkException e) {
                    }
                }
            }
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Aggregations

Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)125 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)54 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)52 Vector (java.util.Vector)48 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)47 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)26 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)21 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)20 HashSet (java.util.HashSet)9 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)9 File (java.io.File)8 PrintWriter (java.io.PrintWriter)8 ProbeTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue)8 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)7 BufferedReader (java.io.BufferedReader)6 FileReader (java.io.FileReader)6 Hashtable (java.util.Hashtable)6 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)6 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)6