Search in sources :

Example 11 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class SeqMonkParser method parseLists.

/**
 * Parses the set of probe lists.
 *
 * @param sections The tab split initial line from the probe lists section
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void parseLists(String[] sections) throws SeqMonkException, IOException {
    if (sections.length != 2) {
        throw new SeqMonkException("Probe Lists line didn't contain 2 sections");
    }
    int n = Integer.parseInt(sections[1]);
    ProbeList[] lists = new ProbeList[n];
    // We also store the probe lists in their appropriate linkage position
    // to recreate the links between probe lists.  The worst case scenario
    // is that we have one big chain of linked lists so we make a linkage
    // list which is the same size as the number of probe lists.
    ProbeList[] linkage = new ProbeList[n + 1];
    // The 0 linkage list will always be the full ProbeSet
    linkage[0] = application.dataCollection().probeSet();
    for (int i = 0; i < n; i++) {
        String line = br.readLine();
        if (line == null) {
            throw new SeqMonkException("Ran out of probe data at line " + i + " (expected " + n + " probes)");
        }
        String[] listSections = line.split("\\t", -1);
        // we allow for that not being present.
        if (thisDataVersion < 5) {
            lists[i] = new ProbeList(application.dataCollection().probeSet(), listSections[0], "", listSections[1]);
            if (listSections.length > 2) {
                lists[i].setDescription(listSections[2]);
            } else {
                lists[i].setDescription("No description");
            }
        } else {
            lists[i] = new ProbeList(linkage[Integer.parseInt(listSections[0]) - 1], listSections[1], listSections[3], listSections[2]);
            if (listSections.length > 4) {
                lists[i].setComments(listSections[4].replaceAll("`", "\n"));
            }
            linkage[Integer.parseInt(listSections[0])] = lists[i];
        }
    }
    // Next we reach the probe list data.  These comes as a long list of values
    // the first of which is the probe name, then either a numerical value if
    // the probe is contained in that list, or a blank if it isn't.
    String line = br.readLine();
    if (line == null) {
        throw new SeqMonkException("Couldn't find probe line for list data");
    }
    sections = line.split("\\t");
    if (sections.length != 2) {
        throw new SeqMonkException("Probe line didn't contain 2 sections");
    }
    if (!sections[0].equals("Probes")) {
        throw new SeqMonkException("Couldn't find expected probe lists probe line");
    }
    n = Integer.parseInt(sections[1]);
    for (int i = 0; i < n; i++) {
        sections = br.readLine().split("\\t", -1);
        if (sections.length != lists.length + 1) {
            throw new SeqMonkException("Expected " + (lists.length + 1) + " sections in probe list section of data file for " + sections[0] + " but got " + sections.length);
        }
        if (i % 1000 == 0) {
            Enumeration<ProgressListener> e = listeners.elements();
            while (e.hasMoreElements()) {
                e.nextElement().progressUpdated("Processed list data for " + i + " probes", i, n);
            }
        }
        Probe p = probes[i];
        if (p == null) {
            continue;
        }
        for (int j = 0; j < lists.length; j++) {
            if (sections[j + 1].length() > 0) {
                if (sections[j + 1].equals("NaN")) {
                    lists[j].addProbe(p, null);
                } else {
                    lists[j].addProbe(p, new Float(Float.parseFloat(sections[j + 1])));
                }
            }
        }
    }
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 12 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class ProbeListAnnotationParser method parseAnnotation.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.AnnotationParsers.AnnotationParser#parseAnnotation(java.io.File, uk.ac.babraham.SeqMonk.DataTypes.Genome.Genome)
	 */
protected AnnotationSet[] parseAnnotation(File file, Genome genome) throws Exception {
    Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
    AnnotationSet currentAnnotation = new AnnotationSet(genome, probeList.name());
    annotationSets.add(currentAnnotation);
    Probe[] probes = probeList.getAllProbes();
    for (int p = 0; p < probes.length; p++) {
        if (p % 1 + (probes.length / 100) == 0) {
            progressUpdated("Converted " + p + " probes", p, probes.length);
        }
        if (p > 1000000 && p % 1000000 == 0) {
            progressUpdated("Caching...", 0, 1);
            currentAnnotation.finalise();
            currentAnnotation = new AnnotationSet(genome, probeList.name() + "[" + annotationSets.size() + "]");
            annotationSets.add(currentAnnotation);
        }
        Feature feature = new Feature(featureType, probes[p].chromosome().name());
        if (probes[p].hasDefinedName()) {
            feature.addAttribute("name", probes[p].name());
        }
        feature.setLocation(new Location(probes[p].start(), probes[p].end(), probes[p].strand()));
        currentAnnotation.addFeature(feature);
    }
    return annotationSets.toArray(new AnnotationSet[0]);
}
Also used : AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Vector(java.util.Vector) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 13 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class DistanceToFeatureQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    Feature[] features = null;
    Chromosome lastChromsome = null;
    int lastIndex = 0;
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        // See if we're on the same chromosome as last time
        if (lastChromsome == null || probes[p].chromosome() != lastChromsome) {
            lastChromsome = probes[p].chromosome();
            features = application.dataCollection().genome().annotationCollection().getFeaturesForType(lastChromsome, selectedFeature);
            System.err.println("Found " + features.length + " features of type " + selectedFeature + " on chr " + lastChromsome);
            lastIndex = 0;
        }
        int closestDistance = lastChromsome.length();
        int bestIndex = lastIndex;
        for (int i = lastIndex; i >= 0 && i < features.length; i--) {
            int thisDistance = getDistanceToFeature(probes[p], features[i]);
            if (thisDistance < closestDistance) {
                closestDistance = thisDistance;
                bestIndex = i;
            }
            if (features[i].location().end() < probes[p].start())
                break;
        }
        // Now we go forward until we hit the end or we're after the end of the last best feature
        for (int i = lastIndex + 1; i < features.length; i++) {
            int thisDistance = getDistanceToFeature(probes[p], features[i]);
            if (thisDistance < closestDistance) {
                closestDistance = thisDistance;
                bestIndex = i;
            }
            if (features[i].location().start() > Math.max(probes[p].end(), features[lastIndex].location().end()))
                break;
        }
        lastIndex = bestIndex;
        for (int d = 0; d < data.length; d++) {
            if (logTransform.isSelected()) {
                data[d].setValueForProbe(probes[p], (float) (Math.log(closestDistance + 1) / log2));
            } else {
                data[d].setValueForProbe(probes[p], closestDistance);
            }
        }
    }
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)

Example 14 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class HiCPCADomainQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    // We're going to go through the probes one chromosome at a time so we
    // can reduce the complexity we have to deal with
    Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chromosomes.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        currentChromosome = chromosomes[c];
        Probe[] probes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
        if (probes.length < 5) {
            progressWarningReceived(new SeqMonkException("Too few probes on chromosome " + currentChromosome.name() + " - assigning zero to everything"));
            // It's not worth trying to find domains
            for (int d = 0; d < data.length; d++) {
                for (int p = 0; p < probes.length; p++) {
                    ((DataStore) data[d]).setValueForProbe(probes[p], 0f);
                }
            }
            continue;
        }
        ProbeList thisChrProbes = new ProbeList(application.dataCollection().probeSet(), chromosomes[c].name(), "", null);
        for (int p = 0; p < probes.length; p++) {
            thisChrProbes.addProbe(probes[p], 0f);
        }
        for (int d = 0; d < data.length; d++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            currentStore = data[d];
            current = (d * chromosomes.length) + c;
            total = chromosomes.length * data.length;
            progressUpdated("Processing chromosome " + chromosomes[c].name() + " for " + data[d].name(), current, total);
            HeatmapMatrix matrix = new HeatmapMatrix(data[d], new ProbeList[] { thisChrProbes }, application.dataCollection().genome(), optionsPanel.minDistance(), optionsPanel.maxDistance(), optionsPanel.minStrength(), optionsPanel.maxSignificance(), optionsPanel.minAbsolute(), optionsPanel.correctLinkage());
            matrix.addProgressListener(this);
            wait = true;
            matrix.startCalculating();
            while (wait) {
                try {
                    Thread.sleep(100);
                } catch (InterruptedException e) {
                }
            }
            if (cancel) {
                progressCancelled();
                return;
            }
            if (matrix.filteredInteractions().length < 10) {
                progressWarningReceived(new SeqMonkException("Too few interactions on chromosome " + currentChromosome.name() + " for " + data[d].name() + " - assigning zero to everything"));
                // not going to get a sensible answer anyway.
                for (int p = 0; p < probes.length; p++) {
                    ((DataStore) data[d]).setValueForProbe(probes[p], 0f);
                }
                continue;
            }
            InteractionClusterMatrix clusterMatrix = new InteractionClusterMatrix(matrix.filteredInteractions(), probes.length);
            clusterMatrix.addListener(this);
            wait = true;
            clusterMatrix.startCorrelating();
            while (wait) {
                try {
                    Thread.sleep(100);
                } catch (InterruptedException e) {
                }
            }
            float[][] correlationMatrix = clusterMatrix.correlationMatix();
            // Annoyingly the PCA needs a double [][]
            double[][] correlationMatrixDouble = new double[correlationMatrix.length][];
            for (int i = 0; i < correlationMatrix.length; i++) {
                double[] db = new double[correlationMatrix[i].length];
                for (int j = 0; j < db.length; j++) {
                    db[j] = correlationMatrix[i][j];
                }
                correlationMatrixDouble[i] = db;
            }
            // Now we can calculate the PCA values from the correlation matrix
            PCA pca = new PCA(correlationMatrixDouble);
            pca.addProgressListener(this);
            wait = true;
            pca.startCalculating();
            while (wait) {
                try {
                    Thread.sleep(100);
                } catch (InterruptedException e) {
                }
            }
            double[] extractedEigenValues = pca.extractedEigenValues();
            // for these probes
            for (int p = 0; p < probes.length; p++) {
                ((DataStore) data[d]).setValueForProbe(probes[p], (float) extractedEigenValues[p]);
            }
        }
        thisChrProbes.delete();
    }
    quantitatonComplete();
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) InteractionClusterMatrix(uk.ac.babraham.SeqMonk.DataTypes.Interaction.InteractionClusterMatrix) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) HeatmapMatrix(uk.ac.babraham.SeqMonk.DataTypes.Interaction.HeatmapMatrix) PCA(uk.ac.babraham.SeqMonk.Analysis.Statistics.PCA) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 15 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class HiCReadCountQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    float[] corrections = new float[data.length];
    if (correctTotal) {
        float largest = 0;
        if (correctPerMillion) {
            largest = 1000000;
        }
        for (int d = 0; d < data.length; d++) {
            // if (correctOnlyInProbes) {
            // corrections[d] = getTotalCountInProbes(data[d],probes);
            // }
            // else {
            corrections[d] = ((DataStore) data[d]).getTotalReadCount();
            // }
            if (d == 0 && !correctPerMillion) {
                largest = corrections[d];
            } else {
                if (!correctPerMillion && corrections[d] > largest) {
                    largest = corrections[d];
                }
            }
        }
        // We correct everything by the largest count
        for (int d = 0; d < corrections.length; d++) {
            corrections[d] = largest / corrections[d];
        }
    }
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        double lengthCorrection = 1;
        if (correctLength) {
            // We assume a 'normal' probe length of 1kb
            lengthCorrection = (double) 1000 / probes[p].length();
        }
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            double count = 0;
            HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
            String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
            for (int c = 0; c < chromosomeNames.length; c++) {
                long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
                long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
                for (int r = 0; r < sourceReads.length; r++) {
                    // Check if we can ignore this one
                    if (removeDuplicates) {
                        if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
                            continue;
                    }
                    if (SequenceRead.overlaps(hitReads[r], probes[p].packedPosition()) && SequenceRead.compare(hitReads[r], sourceReads[r]) > 0) {
                        continue;
                    }
                    ++count;
                }
            }
            if (logTransform && count == 0) {
                count = 0.9;
            }
            if (correctTotal) {
                count *= corrections[d];
            }
            if (correctLength) {
                count *= lengthCorrection;
            }
            if (logTransform) {
                count = (float) Math.log(count) / log2;
            }
            // TODO: This is icky since the inheritance between HiCDataStore and DataStore
            // isn't properly sorted out.
            ((DataStore) data[d]).setValueForProbe(probes[p], (float) count);
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Aggregations

Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)125 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)54 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)52 Vector (java.util.Vector)48 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)47 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)26 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)21 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)20 HashSet (java.util.HashSet)9 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)9 File (java.io.File)8 PrintWriter (java.io.PrintWriter)8 ProbeTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue)8 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)7 BufferedReader (java.io.BufferedReader)6 FileReader (java.io.FileReader)6 Hashtable (java.util.Hashtable)6 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)6 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)6