Search in sources :

Example 46 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SmoothingSubtractionQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    if (!isReady()) {
        progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
    }
    Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
    Vector<DataStore> quantitatedStores = new Vector<DataStore>();
    DataSet[] sets = application.dataCollection().getAllDataSets();
    for (int s = 0; s < sets.length; s++) {
        if (sets[s].isQuantitated()) {
            quantitatedStores.add(sets[s]);
        }
    }
    DataGroup[] groups = application.dataCollection().getAllDataGroups();
    for (int g = 0; g < groups.length; g++) {
        if (groups[g].isQuantitated()) {
            quantitatedStores.add(groups[g]);
        }
    }
    DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
    for (int c = 0; c < chromosomes.length; c++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(c, chromosomes.length);
        Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
        float[][] newValues = new float[data.length][allProbes.length];
        try {
            for (int p = 0; p < allProbes.length; p++) {
                // See if we need to quit
                if (cancel) {
                    progressCancelled();
                    return;
                }
                // Find the min and max indices we're going to use.
                int minIndex = p;
                int maxIndex = p;
                if (correctionAction == ADJACENT) {
                    minIndex = p - (distance / 2);
                    maxIndex = minIndex + (distance - 1);
                    if (minIndex < 0)
                        minIndex = 0;
                    if (maxIndex > allProbes.length - 1)
                        maxIndex = allProbes.length - 1;
                } else if (correctionAction == WINDOW) {
                    for (int i = p; i >= 0; i--) {
                        if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
                            break;
                        }
                        minIndex = i;
                    }
                    for (int i = p; i < allProbes.length; i++) {
                        if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
                            break;
                        }
                        maxIndex = i;
                    }
                }
                // Now go through all of the datasets working out the new value for this range
                float[] tempValues = new float[(maxIndex - minIndex) + 1];
                for (int d = 0; d < data.length; d++) {
                    for (int i = minIndex; i <= maxIndex; i++) {
                        tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
                    }
                    newValues[d][p] = SimpleStats.mean(tempValues);
                }
            }
            // Now assign the values for the probes on this chromosome
            for (int d = 0; d < data.length; d++) {
                for (int p = 0; p < allProbes.length; p++) {
                    data[d].setValueForProbe(allProbes[p], data[d].getValueForProbe(allProbes[p]) - newValues[d][p]);
                }
            }
        } catch (SeqMonkException e) {
            progressExceptionReceived(e);
        }
    }
    quantitatonComplete();
}
Also used : DataGroup(uk.ac.babraham.SeqMonk.DataTypes.DataGroup) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 47 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class HiCCisTransQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    for (int p = 0; p < probes.length; p++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(p, probes.length);
        for (int d = 0; d < data.length; d++) {
            int cisCount = 0;
            int transCount = 0;
            HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
            String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
            for (int c = 0; c < chromosomeNames.length; c++) {
                long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
                long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
                for (int r = 0; r < sourceReads.length; r++) {
                    // Check if we can ignore this one
                    if (removeDuplicates) {
                        if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
                            continue;
                    }
                    if (!chromosomeNames[c].equals(probes[p].chromosome().name())) {
                        ++transCount;
                    } else {
                        if (includeFarCis) {
                            int distance = SequenceRead.fragmentLength(sourceReads[r], hitReads[r]);
                            if (distance > farCisDistance) {
                                ++transCount;
                            } else {
                                // System.err.println("Distance was "+distance);
                                ++cisCount;
                            }
                        } else {
                            ++cisCount;
                        }
                    }
                }
            }
            float percentage = ((transCount * 100f) / (cisCount + transCount));
            if (cisCount + transCount == 0) {
                percentage = 0;
            }
            // TODO: This is icky since the inheritance between HiCDataStore and DataStore
            // isn't properly sorted out.
            ((DataStore) data[d]).setValueForProbe(probes[p], percentage);
        }
    }
    if (correctPerChromosome) {
        Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
        for (int c = 0; c < chrs.length; c++) {
            Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
            float[] thisChrValues = new float[thisChrProbes.length];
            for (int d = 0; d < data.length; d++) {
                DataStore ds = (DataStore) data[d];
                for (int p = 0; p < thisChrProbes.length; p++) {
                    try {
                        thisChrValues[p] = ds.getValueForProbe(thisChrProbes[p]);
                    } catch (SeqMonkException e) {
                    }
                }
                float median = SimpleStats.median(thisChrValues);
                for (int p = 0; p < thisChrProbes.length; p++) {
                    try {
                        ds.setValueForProbe(thisChrProbes[p], ds.getValueForProbe(thisChrProbes[p]) - median);
                    } catch (SeqMonkException e) {
                    }
                }
            }
        }
    }
    quantitatonComplete();
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 48 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SmoothingQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    if (!isReady()) {
        progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
    }
    Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
    Vector<DataStore> quantitatedStores = new Vector<DataStore>();
    DataSet[] sets = application.dataCollection().getAllDataSets();
    for (int s = 0; s < sets.length; s++) {
        if (sets[s].isQuantitated()) {
            quantitatedStores.add(sets[s]);
        }
    }
    DataGroup[] groups = application.dataCollection().getAllDataGroups();
    for (int g = 0; g < groups.length; g++) {
        if (groups[g].isQuantitated()) {
            quantitatedStores.add(groups[g]);
        }
    }
    DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
    for (int c = 0; c < chromosomes.length; c++) {
        // See if we need to quit
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated(c, chromosomes.length);
        Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
        float[][] newValues = new float[data.length][allProbes.length];
        try {
            for (int p = 0; p < allProbes.length; p++) {
                // See if we need to quit
                if (cancel) {
                    progressCancelled();
                    return;
                }
                // Find the min and max indices we're going to use.
                int minIndex = p;
                int maxIndex = p;
                if (correctionAction == ADJACENT) {
                    minIndex = p - (distance / 2);
                    maxIndex = minIndex + (distance - 1);
                    if (minIndex < 0)
                        minIndex = 0;
                    if (maxIndex > allProbes.length - 1)
                        maxIndex = allProbes.length - 1;
                } else if (correctionAction == WINDOW) {
                    for (int i = p; i >= 0; i--) {
                        if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
                            break;
                        }
                        minIndex = i;
                    }
                    for (int i = p; i < allProbes.length; i++) {
                        if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
                            break;
                        }
                        maxIndex = i;
                    }
                }
                // Now go through all of the datasets working out the new value for this range
                float[] tempValues = new float[(maxIndex - minIndex) + 1];
                for (int d = 0; d < data.length; d++) {
                    for (int i = minIndex; i <= maxIndex; i++) {
                        tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
                    }
                    newValues[d][p] = SimpleStats.mean(tempValues);
                }
            }
            // Now assign the values for the probes on this chromosome
            for (int d = 0; d < data.length; d++) {
                for (int p = 0; p < allProbes.length; p++) {
                    data[d].setValueForProbe(allProbes[p], newValues[d][p]);
                }
            }
        } catch (SeqMonkException e) {
            progressExceptionReceived(e);
        }
    }
    quantitatonComplete();
}
Also used : DataGroup(uk.ac.babraham.SeqMonk.DataTypes.DataGroup) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 49 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class GffFileParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    File[] probeFiles = getFiles();
    DataSet[] newData = new DataSet[probeFiles.length];
    int extendBy = prefs.extendReads();
    try {
        for (int f = 0; f < probeFiles.length; f++) {
            BufferedReader br;
            if (probeFiles[f].getName().toLowerCase().endsWith(".gz")) {
                br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(probeFiles[f]))));
            } else {
                br = new BufferedReader(new FileReader(probeFiles[f]));
            }
            String line;
            if (prefs.isHiC()) {
                newData[f] = new PairedDataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), prefs.removeDuplicates(), prefs.hiCDistance(), prefs.hiCIgnoreTrans());
            } else {
                newData[f] = new DataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), prefs.removeDuplicates());
            }
            int lineCount = 0;
            // Now process the file
            while ((line = br.readLine()) != null) {
                if (cancel) {
                    br.close();
                    progressCancelled();
                    return;
                }
                // Ignore blank lines
                if (line.trim().length() == 0)
                    continue;
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + probeFiles[f].getName(), f, probeFiles.length);
                }
                String[] sections = line.split("\t");
                // Check to see if we've got enough data to work with
                if (sections.length < 5) {
                    progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
                    // Skip this line...
                    continue;
                }
                int strand;
                int start;
                int end;
                try {
                    start = Integer.parseInt(sections[3]);
                    end = Integer.parseInt(sections[4]);
                    // End must always be later than start
                    if (start > end) {
                        int temp = start;
                        start = end;
                        end = temp;
                    }
                    if (sections.length >= 7) {
                        if (sections[6].equals("+")) {
                            strand = Location.FORWARD;
                        } else if (sections[6].equals("-")) {
                            strand = Location.REVERSE;
                        } else if (sections[6].equals(".")) {
                            strand = Location.UNKNOWN;
                        } else {
                            progressWarningReceived(new SeqMonkException("Unknown strand character '" + sections[6] + "' marked as unknown strand"));
                            strand = Location.UNKNOWN;
                        }
                    } else {
                        strand = Location.UNKNOWN;
                    }
                    if (extendBy > 0) {
                        if (strand == Location.FORWARD) {
                            end += extendBy;
                        } else if (strand == Location.REVERSE) {
                            start -= extendBy;
                        }
                    }
                } catch (NumberFormatException e) {
                    progressWarningReceived(new SeqMonkException("Location " + sections[3] + "-" + sections[4] + " was not an integer"));
                    continue;
                }
                ChromosomeWithOffset c;
                try {
                    c = dataCollection().genome().getChromosome(sections[0]);
                } catch (IllegalArgumentException sme) {
                    progressWarningReceived(sme);
                    continue;
                }
                start = c.position(start);
                end = c.position(end);
                // We also don't allow readings which are beyond the end of the chromosome
                if (end > c.chromosome().length()) {
                    int overrun = end - c.chromosome().length();
                    progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
                    continue;
                }
                // We can now make the new reading
                try {
                    long read = SequenceRead.packPosition(start, end, strand);
                    newData[f].addData(c.chromosome(), read);
                } catch (SeqMonkException e) {
                    progressWarningReceived(e);
                    continue;
                }
            }
            // We're finished with the file.
            br.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + probeFiles[f].getName(), f, probeFiles.length);
            newData[f].finalise();
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
    }
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) InputStreamReader(java.io.InputStreamReader) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) FileInputStream(java.io.FileInputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File)

Example 50 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SeqMonkParser method parseGroups.

/**
 * Parses the list of sample groups.
 *
 * @param sections The tab split values from the initial groups line
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void parseGroups(String[] sections) throws SeqMonkException, IOException {
    if (sections.length != 2) {
        throw new SeqMonkException("Data Groups line didn't contain 2 sections");
    }
    if (!sections[0].equals("Data Groups")) {
        throw new SeqMonkException("Couldn't find expected data groups line");
    }
    int n = Integer.parseInt(sections[1]);
    dataGroups = new DataGroup[n];
    for (int i = 0; i < n; i++) {
        String[] group = br.readLine().split("\\t");
        DataSet[] groupMembers = new DataSet[group.length - 1];
        if (thisDataVersion < 4) {
            DataSet[] allSets = application.dataCollection().getAllDataSets();
            // In the bad old days we used to refer to datasets by their name
            for (int j = 1; j < group.length; j++) {
                boolean seen = false;
                for (int d = 0; d < allSets.length; d++) {
                    if (allSets[d].name().equals(group[j])) {
                        if (seen) {
                            // System.out.println("Seen this before - abort abort");
                            throw new SeqMonkException("Name " + group[j] + " is ambiguous in group " + group[0]);
                        }
                        // System.out.println("Seen for the first time");
                        groupMembers[j - 1] = allSets[d];
                        seen = true;
                    }
                }
            }
        } else {
            for (int j = 1; j < group.length; j++) {
                // The more modern and safer way to make up a group is by its index
                groupMembers[j - 1] = application.dataCollection().getDataSet(Integer.parseInt(group[j]));
                if (groupMembers[j - 1] == null) {
                    throw new SeqMonkException("Couldn't find dataset at position " + group[j]);
                }
            }
        }
        DataGroup g = new DataGroup(group[0], groupMembers);
        application.dataCollection().addDataGroup(g);
        dataGroups[i] = g;
    }
}
Also used : DataGroup(uk.ac.babraham.SeqMonk.DataTypes.DataGroup) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Aggregations

SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)91 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)49 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)30 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)22 Vector (java.util.Vector)21 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)20 File (java.io.File)19 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)17 BufferedReader (java.io.BufferedReader)16 FileReader (java.io.FileReader)16 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)14 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)13 FileInputStream (java.io.FileInputStream)11 IOException (java.io.IOException)11 InputStreamReader (java.io.InputStreamReader)11 GZIPInputStream (java.util.zip.GZIPInputStream)11 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)8 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)8 FileNotFoundException (java.io.FileNotFoundException)7 SequenceReadWithChromosome (uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)7