Search in sources :

Example 41 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class AnnotatedInteractionReport method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    String annotationTypeValue = (String) annotationType.getSelectedItem();
    int distanceLimit = 0;
    // Check what to do with unannotated probes
    boolean includeAll = true;
    if (((String) excludes.getSelectedItem()).equals("Exclude")) {
        includeAll = false;
    }
    String annotationPositionValue = (String) annotationPosition.getSelectedItem();
    // We're going to set up a set of booleans which tell us which kinds
    // of relationships we're allowed to look for later.
    boolean surrounding = true;
    boolean upstream = true;
    boolean downstream = true;
    boolean matchname = false;
    if (annotationPositionValue.equals("[Don't annotate]")) {
        upstream = false;
        downstream = false;
        surrounding = false;
    } else if (annotationPositionValue.equals("overlapping")) {
        upstream = false;
        downstream = false;
    } else if (annotationPositionValue.equals("surrounding or upstream")) {
        downstream = false;
    } else if (annotationPositionValue.equals("surrounding or downstream")) {
        upstream = false;
    } else if (annotationPositionValue.equals("upstream")) {
        surrounding = false;
        downstream = false;
    } else if (annotationPositionValue.equals("downstream")) {
        surrounding = false;
        upstream = false;
    } else if (annotationPositionValue.equals("closest")) {
    // Leave things as they are!
    } else if (annotationPositionValue.equals("name matched")) {
        matchname = true;
        upstream = false;
        surrounding = false;
        downstream = false;
    } else {
        System.err.println("Didn't recognise position value '" + annotationPositionValue + "'");
    }
    // surrounding.
    if (!annotationPositionValue.equals("surrounding")) {
        if (annotationLimit.getText().length() > 0) {
            distanceLimit = Integer.parseInt(annotationLimit.getText());
        }
    }
    // Since we're going to be making the annotations on the
    // basis of position we should go through all probes one
    // chromosome at a time.
    Feature[] features = null;
    Chromosome lastChr = null;
    // We can now step through the probes looking for the best feature match
    for (int p = 0; p < probes.length; p++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        if (p % 100 == 0) {
            progressUpdated("Processed " + p + " probes", p, probes.length);
        }
        if (!probes[p].chromosome().equals(lastChr)) {
            features = collection.genome().annotationCollection().getFeaturesForType(probes[p].chromosome(), annotationTypeValue);
            lastChr = probes[p].chromosome();
        }
        String nameWithoutExtensions = "";
        String nameWithoutTranscript = "";
        if (matchname) {
            nameWithoutExtensions = probes[p].name().replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
            nameWithoutTranscript = nameWithoutExtensions.replaceAll("-\\d\\d\\d$", "");
        }
        Feature bestFeature = null;
        int closestDistance = 0;
        for (int f = 0; f < features.length; f++) {
            if (matchname) {
                // Simplest check is if the name matches exactly
                if (features[f].name().equals(probes[p].name()) || features[f].name().equals(nameWithoutExtensions) || features[f].name().equals(nameWithoutTranscript)) {
                    bestFeature = features[f];
                    closestDistance = 0;
                    break;
                }
            }
            if (surrounding) {
                if (probes[p].start() <= features[f].location().end() && probes[p].end() >= features[f].location().start()) {
                    bestFeature = features[f];
                    closestDistance = 0;
                    // Once we've found an overlapping feature we quit.
                    break;
                }
            }
            if (downstream) {
                // Check if the feature is downstream
                // Get the distance to the start
                int d = 0;
                if (features[f].location().strand() == Location.FORWARD) {
                    d = features[f].location().start() - probes[p].end();
                } else {
                    d = probes[p].start() - features[f].location().end();
                }
                if (d >= 0) {
                    if (d > distanceLimit || (bestFeature != null && d > closestDistance)) {
                        continue;
                    }
                    // See if this is the closest feature we have so far...
                    if (bestFeature == null || d < closestDistance) {
                        bestFeature = features[f];
                        closestDistance = d;
                    }
                    continue;
                }
            }
            if (upstream) {
                // Check if the feature is upstream
                // Get the distance to the start
                int d = 0;
                if (features[f].location().strand() == Location.FORWARD) {
                    d = probes[p].start() - features[f].location().end();
                } else {
                    d = features[f].location().start() - probes[p].end();
                }
                if (d >= 0) {
                    if (d > distanceLimit || (bestFeature != null && d > closestDistance)) {
                        continue;
                    }
                    // See if this is the closest feature we have so far...
                    if (bestFeature == null || d < closestDistance) {
                        bestFeature = features[f];
                        closestDistance = d;
                    }
                    continue;
                }
            }
        }
        if (bestFeature == null) {
            continue;
        }
        probeAnnotations.put(probes[p], bestFeature);
    }
    if (!includeAll) {
        // We need to filter the interaction list to include only those which
        // have annotations on both probes
        Vector<InteractionProbePair> filteredInteractions = new Vector<InteractionProbePair>();
        for (int i = 0; i < interactions.length; i++) {
            if (probeAnnotations.containsKey(interactions[i].probe1()) && probeAnnotations.containsKey(interactions[i].probe2())) {
                filteredInteractions.add(interactions[i]);
            }
        }
        interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
    }
    TableModel model = new AnnotationTableModel();
    reportComplete(model);
}
Also used : InteractionProbePair(uk.ac.babraham.SeqMonk.DataTypes.Interaction.InteractionProbePair) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) Vector(java.util.Vector) AbstractTableModel(javax.swing.table.AbstractTableModel) TableModel(javax.swing.table.TableModel)

Example 42 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class ExactOverlapQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    float[] corrections = new float[data.length];
    if (correctTotal) {
        float largest = 0;
        if (correctPerMillion) {
            largest = 1000000;
        }
        for (int d = 0; d < data.length; d++) {
            if (correctOnlyInProbes) {
                corrections[d] = getTotalCountInProbes(data[d], probes);
            } else {
                corrections[d] = data[d].getTotalReadCount();
            }
            if (d == 0 && !correctPerMillion) {
                largest = corrections[d];
            } else {
                if (!correctPerMillion && corrections[d] > largest) {
                    largest = corrections[d];
                }
            }
        }
        // We correct everything by the largest count
        for (int d = 0; d < corrections.length; d++) {
            corrections[d] = largest / corrections[d];
        }
    }
    // To make this more efficient we'll do this chromosome by chromosome
    Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        progressUpdated("Quantiating probes on " + chrs[c].name(), c, chrs.length);
        Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
        Arrays.sort(thisChrProbes);
        for (int d = 0; d < data.length; d++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            // We'll fetch all reads for this chr and then do a count per position
            ReadsWithCounts reads = data[d].getReadsForChromosome(chrs[c]);
            quantitationType.resetLastRead();
            int startIndex = 0;
            for (int p = 0; p < thisChrProbes.length; p++) {
                int rawCount = 0;
                for (int r = startIndex; r < reads.reads.length; r++) {
                    if (SequenceRead.start(reads.reads[r]) < thisChrProbes[p].start()) {
                        startIndex = r;
                    }
                    if (SequenceRead.start(reads.reads[r]) > thisChrProbes[p].start())
                        break;
                    if (quantitationType.useRead(thisChrProbes[p], reads.reads[r])) {
                        if (SequenceRead.start(reads.reads[r]) == thisChrProbes[p].start() && SequenceRead.end(reads.reads[r]) == thisChrProbes[p].end()) {
                            rawCount += reads.counts[r];
                        }
                    }
                }
                // We have the counts now work out any correction.
                float count = rawCount;
                if (logTransform && count == 0) {
                    count = 0.9f;
                }
                if (correctTotal) {
                    count *= corrections[d];
                }
                if (logTransform) {
                    count = (float) Math.log(count) / log2;
                }
                data[d].setValueForProbe(thisChrProbes[p], count);
            }
        }
    }
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 43 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class SmoothingSubtractionQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    if (!isReady()) {
        progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
    }
    Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
    Vector<DataStore> quantitatedStores = new Vector<DataStore>();
    DataSet[] sets = application.dataCollection().getAllDataSets();
    for (int s = 0; s < sets.length; s++) {
        if (sets[s].isQuantitated()) {
            quantitatedStores.add(sets[s]);
        }
    }
    DataGroup[] groups = application.dataCollection().getAllDataGroups();
    for (int g = 0; g < groups.length; g++) {
        if (groups[g].isQuantitated()) {
            quantitatedStores.add(groups[g]);
        }
    }
    DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
    for (int c = 0; c < chromosomes.length; c++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(c, chromosomes.length);
        Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
        float[][] newValues = new float[data.length][allProbes.length];
        try {
            for (int p = 0; p < allProbes.length; p++) {
                // See if we need to quit
                if (cancel) {
                    progressCancelled();
                    return;
                }
                // Find the min and max indices we're going to use.
                int minIndex = p;
                int maxIndex = p;
                if (correctionAction == ADJACENT) {
                    minIndex = p - (distance / 2);
                    maxIndex = minIndex + (distance - 1);
                    if (minIndex < 0)
                        minIndex = 0;
                    if (maxIndex > allProbes.length - 1)
                        maxIndex = allProbes.length - 1;
                } else if (correctionAction == WINDOW) {
                    for (int i = p; i >= 0; i--) {
                        if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
                            break;
                        }
                        minIndex = i;
                    }
                    for (int i = p; i < allProbes.length; i++) {
                        if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
                            break;
                        }
                        maxIndex = i;
                    }
                }
                // Now go through all of the datasets working out the new value for this range
                float[] tempValues = new float[(maxIndex - minIndex) + 1];
                for (int d = 0; d < data.length; d++) {
                    for (int i = minIndex; i <= maxIndex; i++) {
                        tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
                    }
                    newValues[d][p] = SimpleStats.mean(tempValues);
                }
            }
            // Now assign the values for the probes on this chromosome
            for (int d = 0; d < data.length; d++) {
                for (int p = 0; p < allProbes.length; p++) {
                    data[d].setValueForProbe(allProbes[p], data[d].getValueForProbe(allProbes[p]) - newValues[d][p]);
                }
            }
        } catch (SeqMonkException e) {
            progressExceptionReceived(e);
        }
    }
    quantitatonComplete();
}
Also used : DataGroup(uk.ac.babraham.SeqMonk.DataTypes.DataGroup) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 44 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class HiCCisTransQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            int cisCount = 0;
            int transCount = 0;
            HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
            String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
            for (int c = 0; c < chromosomeNames.length; c++) {
                long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
                long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
                for (int r = 0; r < sourceReads.length; r++) {
                    // Check if we can ignore this one
                    if (removeDuplicates) {
                        if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
                            continue;
                    }
                    if (!chromosomeNames[c].equals(probes[p].chromosome().name())) {
                        ++transCount;
                    } else {
                        if (includeFarCis) {
                            int distance = SequenceRead.fragmentLength(sourceReads[r], hitReads[r]);
                            if (distance > farCisDistance) {
                                ++transCount;
                            } else {
                                // System.err.println("Distance was "+distance);
                                ++cisCount;
                            }
                        } else {
                            ++cisCount;
                        }
                    }
                }
            }
            float percentage = ((transCount * 100f) / (cisCount + transCount));
            if (cisCount + transCount == 0) {
                percentage = 0;
            }
            // TODO: This is icky since the inheritance between HiCDataStore and DataStore
            // isn't properly sorted out.
            ((DataStore) data[d]).setValueForProbe(probes[p], percentage);
        }
    }
    if (correctPerChromosome) {
        Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
        for (int c = 0; c < chrs.length; c++) {
            Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
            float[] thisChrValues = new float[thisChrProbes.length];
            for (int d = 0; d < data.length; d++) {
                DataStore ds = (DataStore) data[d];
                for (int p = 0; p < thisChrProbes.length; p++) {
                    try {
                        thisChrValues[p] = ds.getValueForProbe(thisChrProbes[p]);
                    } catch (SeqMonkException e) {
                    }
                }
                float median = SimpleStats.median(thisChrValues);
                for (int p = 0; p < thisChrProbes.length; p++) {
                    try {
                        ds.setValueForProbe(thisChrProbes[p], ds.getValueForProbe(thisChrProbes[p]) - median);
                    } catch (SeqMonkException e) {
                    }
                }
            }
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 45 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class SmoothingQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    if (!isReady()) {
        progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
    }
    Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
    Vector<DataStore> quantitatedStores = new Vector<DataStore>();
    DataSet[] sets = application.dataCollection().getAllDataSets();
    for (int s = 0; s < sets.length; s++) {
        if (sets[s].isQuantitated()) {
            quantitatedStores.add(sets[s]);
        }
    }
    DataGroup[] groups = application.dataCollection().getAllDataGroups();
    for (int g = 0; g < groups.length; g++) {
        if (groups[g].isQuantitated()) {
            quantitatedStores.add(groups[g]);
        }
    }
    DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
    for (int c = 0; c < chromosomes.length; c++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(c, chromosomes.length);
        Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
        float[][] newValues = new float[data.length][allProbes.length];
        try {
            for (int p = 0; p < allProbes.length; p++) {
                // See if we need to quit
                if (cancel) {
                    progressCancelled();
                    return;
                }
                // Find the min and max indices we're going to use.
                int minIndex = p;
                int maxIndex = p;
                if (correctionAction == ADJACENT) {
                    minIndex = p - (distance / 2);
                    maxIndex = minIndex + (distance - 1);
                    if (minIndex < 0)
                        minIndex = 0;
                    if (maxIndex > allProbes.length - 1)
                        maxIndex = allProbes.length - 1;
                } else if (correctionAction == WINDOW) {
                    for (int i = p; i >= 0; i--) {
                        if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
                            break;
                        }
                        minIndex = i;
                    }
                    for (int i = p; i < allProbes.length; i++) {
                        if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
                            break;
                        }
                        maxIndex = i;
                    }
                }
                // Now go through all of the datasets working out the new value for this range
                float[] tempValues = new float[(maxIndex - minIndex) + 1];
                for (int d = 0; d < data.length; d++) {
                    for (int i = minIndex; i <= maxIndex; i++) {
                        tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
                    }
                    newValues[d][p] = SimpleStats.mean(tempValues);
                }
            }
            // Now assign the values for the probes on this chromosome
            for (int d = 0; d < data.length; d++) {
                for (int p = 0; p < allProbes.length; p++) {
                    data[d].setValueForProbe(allProbes[p], newValues[d][p]);
                }
            }
        } catch (SeqMonkException e) {
            progressExceptionReceived(e);
        }
    }
    quantitatonComplete();
}
Also used : DataGroup(uk.ac.babraham.SeqMonk.DataTypes.DataGroup) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Aggregations

Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)78 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)47 Vector (java.util.Vector)36 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)23 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)23 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)22 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)12 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)11 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)8 ReadsWithCounts (uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)8 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)7 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)7 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)7 IOException (java.io.IOException)6 File (java.io.File)5 Hashtable (java.util.Hashtable)5 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)5 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)5 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)4