Search in sources :

Example 66 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class HiCPrevNextQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    Arrays.sort(probes);
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        if (probes[p].start() - distance < 0 || probes[p].end() + distance > probes[p].chromosome().length()) {
            for (int d = 0; d < data.length; d++) {
                ((DataStore) data[d]).setValueForProbe(probes[p], 0);
            }
            continue;
        }
        // Make up a pseudo probe for the previous and next regions
        Probe previousProbe = new Probe(probes[p].chromosome(), probes[p].start() - distance, probes[p].start());
        Probe nextProbe = new Probe(probes[p].chromosome(), probes[p].end(), probes[p].end() + distance);
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            // Get the counts for the previous and next probes
            int previousTotalCount = data[d].getHiCReadCountForProbe(previousProbe);
            HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
            int nextTotalCount = data[d].getHiCReadCountForProbe(nextProbe);
            // any reads then give up on this one.
            if (previousTotalCount == 0 || nextTotalCount == 0) {
                ((DataStore) data[d]).setValueForProbe(probes[p], 0);
                continue;
            }
            int previousCount = 0;
            int nextCount = 0;
            long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
            SequenceRead.sort(hitPositions);
            for (int r = 0; r < hitPositions.length; r++) {
                // Check if we can ignore this one
                if (removeDuplicates) {
                    if (r > 0 && hitPositions[r] == hitPositions[r - 1])
                        continue;
                }
                // Check if the other end maps to either the prev or next probe
                if (SequenceRead.overlaps(hitPositions[r], previousProbe.packedPosition())) {
                    ++previousCount;
                }
                if (SequenceRead.overlaps(hitPositions[r], nextProbe.packedPosition())) {
                    ++nextCount;
                }
            }
            // If both of the actual counts are zero then don't try to calculate a real value
            if (previousCount == 0 && nextCount == 0) {
                ((DataStore) data[d]).setValueForProbe(probes[p], 0);
                continue;
            }
            // Basically what happens here is that we calculate a normal X^2 value and then
            // turn it into a negative depending on the direction of the deviation from the
            // expected proportion.
            // We need to calculate expected values for the two directions based on the proportion
            // of reads falling into those two regions in total.
            // Correct the counts based on the proportions of the two totals
            double expectedProportion = (previousTotalCount / (double) (nextTotalCount + previousTotalCount));
            int totalObservedCount = previousCount + nextCount;
            double expectedPreviousCount = totalObservedCount * expectedProportion;
            double expectedNextCount = totalObservedCount - expectedPreviousCount;
            // Now we can calculate the X^2 values to compare the expected and observed values
            double chisquare = (Math.pow(previousCount - expectedPreviousCount, 2) / expectedPreviousCount) + (Math.pow(nextCount - expectedNextCount, 2) / expectedNextCount);
            // We now negate this count if the proportions bias towards the next count
            double observedProportion = (previousCount / (double) totalObservedCount);
            if (observedProportion > expectedProportion) {
                chisquare = 0 - chisquare;
            }
            // System.err.println("Raw counts "+previousCount+","+nextCount+" Totals "+previousTotalCount+","+nextTotalCount+" Expected "+expectedPreviousCount+","+expectedNextCount+" Chi "+chisquare);
            // TODO: This is icky since the inheritance between HiCDataStore and DataStore
            // isn't properly sorted out.
            ((DataStore) data[d]).setValueForProbe(probes[p], (float) chisquare);
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 67 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class SmoothingQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    if (!isReady()) {
        progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
    }
    Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
    Vector<DataStore> quantitatedStores = new Vector<DataStore>();
    DataSet[] sets = application.dataCollection().getAllDataSets();
    for (int s = 0; s < sets.length; s++) {
        if (sets[s].isQuantitated()) {
            quantitatedStores.add(sets[s]);
        }
    }
    DataGroup[] groups = application.dataCollection().getAllDataGroups();
    for (int g = 0; g < groups.length; g++) {
        if (groups[g].isQuantitated()) {
            quantitatedStores.add(groups[g]);
        }
    }
    DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
    for (int c = 0; c < chromosomes.length; c++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(c, chromosomes.length);
        Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
        float[][] newValues = new float[data.length][allProbes.length];
        try {
            for (int p = 0; p < allProbes.length; p++) {
                // See if we need to quit
                if (cancel) {
                    progressCancelled();
                    return;
                }
                // Find the min and max indices we're going to use.
                int minIndex = p;
                int maxIndex = p;
                if (correctionAction == ADJACENT) {
                    minIndex = p - (distance / 2);
                    maxIndex = minIndex + (distance - 1);
                    if (minIndex < 0)
                        minIndex = 0;
                    if (maxIndex > allProbes.length - 1)
                        maxIndex = allProbes.length - 1;
                } else if (correctionAction == WINDOW) {
                    for (int i = p; i >= 0; i--) {
                        if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
                            break;
                        }
                        minIndex = i;
                    }
                    for (int i = p; i < allProbes.length; i++) {
                        if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
                            break;
                        }
                        maxIndex = i;
                    }
                }
                // Now go through all of the datasets working out the new value for this range
                float[] tempValues = new float[(maxIndex - minIndex) + 1];
                for (int d = 0; d < data.length; d++) {
                    for (int i = minIndex; i <= maxIndex; i++) {
                        tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
                    }
                    newValues[d][p] = SimpleStats.mean(tempValues);
                }
            }
            // Now assign the values for the probes on this chromosome
            for (int d = 0; d < data.length; d++) {
                for (int p = 0; p < allProbes.length; p++) {
                    data[d].setValueForProbe(allProbes[p], newValues[d][p]);
                }
            }
        } catch (SeqMonkException e) {
            progressExceptionReceived(e);
        }
    }
    quantitatonComplete();
}
Also used : DataGroup(uk.ac.babraham.SeqMonk.DataTypes.DataGroup) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 68 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class FeaturePercentileProbeGenerator method makeProbes.

/**
 * Make a set of probes within an individual feature.
 *
 * @param feature The individual feature to make probes around
 * @param chromosome
 * @param location The actual location to use - must be the whole region
 * @param newProbes The vector to add the new probes to
 * @param sets DataSets to check for reads if we're ignoring blanks
 */
private void makeProbes(Feature feature, Chromosome chromosome, Location location, Vector<Probe> newProbes) {
    int start;
    int end;
    int strand = location.strand();
    if (ignoreDirection)
        strand = Location.UNKNOWN;
    if (lengthType == FeaturePercentileSelectorPanel.PROPORTIONAL_LENGTH) {
        int probeLength = feature.location().length() / probesPerFeature;
        if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
            start = location.start();
            end = location.start();
        } else {
            start = location.end();
            end = location.end();
        }
        for (int i = 0; i < probesPerFeature; i++) {
            int localStart;
            int localEnd;
            if (strand == Location.REVERSE) {
                localStart = end - (probeLength * (i + 1));
                localEnd = localStart + probeLength;
            } else {
                localStart = start + (probeLength * i);
                localEnd = localStart + probeLength;
            }
            Probe p = makeProbe(feature.name() + "_" + (i + 1), chromosome, localStart, localEnd, strand);
            if (p != null) {
                // Add this probe
                newProbes.add(p);
            }
        }
    } else if (lengthType == FeaturePercentileSelectorPanel.FIXED_LENGTH) {
        int proportionalLength;
        if (includeStartEnd) {
            proportionalLength = feature.location().length() / (probesPerFeature - 1);
        } else {
            proportionalLength = feature.location().length() / probesPerFeature;
        }
        if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
            if (includeStartEnd) {
                start = location.start();
                end = location.end();
            } else {
                start = location.start() + (proportionalLength / 2);
                end = location.end() - (proportionalLength / 2);
            }
        } else {
            if (includeStartEnd) {
                start = location.end();
                end = location.end();
            } else {
                start = location.end() + (proportionalLength / 2);
                end = location.end() - (proportionalLength / 2);
            }
        }
        for (int i = 0; i < probesPerFeature; i++) {
            int localStart;
            int localEnd;
            int localCentre;
            if (strand == Location.REVERSE) {
                localCentre = (end - (proportionalLength * i));
                localStart = localCentre - (fixedLength / 2);
                localEnd = localStart + fixedLength;
            } else {
                localCentre = start + (proportionalLength * i);
                localStart = localCentre - (fixedLength / 2);
                localEnd = localStart + fixedLength;
            }
            Probe p = makeProbe(feature.name() + "_" + (i + 1), chromosome, localStart, localEnd, strand);
            if (p != null) {
                // Add this probe
                newProbes.add(p);
            }
        }
    } else {
        throw new IllegalStateException("Unknown length type " + optionPanel.lengthType());
    }
}
Also used : Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 69 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class FeaturePercentileProbeGenerator method makeProbe.

/**
 * Makes an individual probe
 *
 * @param name The name for the probe
 * @param c The Chromosome
 * @param start Start position
 * @param end End position
 * @return The newly generated probe
 */
private Probe makeProbe(String name, Chromosome c, int start, int end, int strand) {
    if (end > c.length())
        end = c.length();
    if (start < 1)
        start = 1;
    if (end < start)
        return null;
    Probe p = new Probe(c, start, end, strand);
    p.setName(name);
    return p;
}
Also used : Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 70 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class SeqMonkParser method parseProbes.

/**
 * Parses the list of probes.
 *
 * @param sections The tab split initial line from the probes section
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void parseProbes(String[] sections) throws SeqMonkException, IOException {
    if (sections.length < 2) {
        throw new SeqMonkException("Probe line didn't contain at least 2 sections");
    }
    if (!sections[0].equals("Probes")) {
        throw new SeqMonkException("Couldn't find expected probes line");
    }
    int n = Integer.parseInt(sections[1]);
    probes = new Probe[n];
    String description = "No generator description available";
    if (sections.length > 2) {
        description = sections[2];
    }
    ProbeSet probeSet = new ProbeSet(description, n);
    if (sections.length > 3) {
        if (sections[3].length() > 0) {
            probeSet.setCurrentQuantitation(sections[3]);
        }
    }
    if (sections.length > 4) {
        probeSet.setComments(sections[4].replaceAll("`", "\n"));
    }
    // We need to save the probeset to the dataset at this point so we can add the probe
    // lists as we get to them.
    application.dataCollection().setProbeSet(probeSet);
    int positionOffset;
    // We used to store chr start and end
    if (thisDataVersion < 8) {
        positionOffset = 4;
    } else // We now store chr and packed position (to give start end and strand)
    {
        positionOffset = 3;
    }
    int expectedSectionLength = 3 + dataSets.length + dataGroups.length;
    String line;
    for (int i = 0; i < n; i++) {
        line = br.readLine();
        if (line == null) {
            throw new SeqMonkException("Ran out of probe data at line " + i + " (expected " + n + " probes)");
        }
        // Since the probes section can have blank trailing sections we need
        // to not trim these, hence the -1 limit.
        sections = line.split("\\t", -1);
        if (i == 0) {
            /*
				 * Older versions of this format put down data for just
				 * datasets.  Newer versions include data for datagroups
				 * as well.  We need to figure out which one we're looking
				 * at
				 */
            if (sections.length == positionOffset + dataSets.length) {
                expectedSectionLength = positionOffset + dataSets.length;
            } else if (sections.length == positionOffset + dataSets.length + dataGroups.length) {
                expectedSectionLength = positionOffset + dataSets.length + dataGroups.length;
            }
        }
        if (sections.length != expectedSectionLength) {
            throw new SeqMonkException("Expected " + expectedSectionLength + " sections in data file for " + sections[0] + " but got " + sections.length);
        }
        if (i % 10000 == 0) {
            Enumeration<ProgressListener> e = listeners.elements();
            while (e.hasMoreElements()) {
                e.nextElement().progressUpdated("Processed data for " + i + " probes", i, n);
            }
        }
        Chromosome c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
        // Sanity check
        if (c == null) {
            throw new SeqMonkException("Couldn't find a chromosome called " + sections[1]);
        }
        Probe p;
        if (thisDataVersion < 8) {
            int start = Integer.parseInt(sections[2]);
            int end = Integer.parseInt(sections[3]);
            p = new Probe(c, start, end);
        } else {
            long packedValue = Long.parseLong(sections[2]);
            p = new Probe(c, packedValue);
        }
        if (!sections[0].equals("null")) {
            p.setName(sections[0]);
        }
        probes[i] = p;
        probeSet.addProbe(probes[i], null);
        for (int j = positionOffset; j < sections.length; j++) {
            if (sections[j].length() == 0)
                continue;
            if ((j - positionOffset) >= dataSets.length) {
                dataGroups[j - (positionOffset + dataSets.length)].setValueForProbe(p, Float.parseFloat(sections[j]));
            } else {
                dataSets[j - positionOffset].setValueForProbe(p, Float.parseFloat(sections[j]));
            }
        }
    }
    application.dataCollection().activeProbeListChanged(probeSet);
    // This rename doesn't actually change the name.  We put this in because
    // the All Probes group is drawn in the data view before probes have been
    // added to it.  This means that it's name isn't updated when the probes have
    // been added and it appears labelled with 0 probes.  This doesn't happen if
    // there are any probe lists under all probes as they cause it to be refreshed,
    // but if you only have the probe set then you need this to make the display show
    // the correct information.
    probeSet.setName("All Probes");
}
Also used : ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Aggregations

Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)125 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)54 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)52 Vector (java.util.Vector)48 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)47 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)26 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)21 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)20 HashSet (java.util.HashSet)9 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)9 File (java.io.File)8 PrintWriter (java.io.PrintWriter)8 ProbeTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue)8 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)7 BufferedReader (java.io.BufferedReader)6 FileReader (java.io.FileReader)6 Hashtable (java.util.Hashtable)6 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)6 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)6