Search in sources :

Example 86 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class GenomeParser method parseChromosome.

/**
 * Parses the chromosome.
 *
 * @param br the br
 * @return the chromosome
 * @throws SeqMonkException the seq monk exception
 * @throws IOException Signals that an I/O exception has occurred.
 */
private Chromosome parseChromosome(BufferedReader br, SingleGenome genome) throws SeqMonkException, IOException {
    String line;
    while ((line = br.readLine()) != null) {
        if (line.startsWith("AC")) {
            String[] sections = line.split(":");
            if (sections.length != 6) {
                // It's not a chromosome file.  We probably just want to
                // skip it and move onto the next entry
                progressWarningReceived(new SeqMonkException("AC line didn't have 6 sections '" + line + "'"));
                skipToEntryEnd(br);
                continue;
            }
            if (line.indexOf("supercontig") >= 0) {
                // It's not a chromosome file.  We probably just want to
                // skip it and move onto the next entry
                skipToEntryEnd(br);
                continue;
            }
            // This will return the existing chromosome of this
            // name if it exists already, but will create a new
            // one if it doesn't.
            Chromosome c = genome.addChromosome(sections[2]);
            c.setLength(Integer.parseInt(sections[4]));
            // Since the positions of all features are given relative
            // to the current sequence we need to add the current
            // start position to all locations as an offset.
            currentOffset = Integer.parseInt(sections[3]) - 1;
            return c;
        }
        if (line.startsWith("//")) {
            throw new SeqMonkException("Couldn't find AC line");
        }
    }
    return null;
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 87 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class ActiveProbeListParser method processNormalDataStore.

private DataSet processNormalDataStore(ProbeList activeList) {
    int extendBy = prefs.extendReads();
    boolean reverse = prefs.reverseReads();
    boolean modifyStrand = false;
    int forcedStrand = 0;
    if (!prefs.strandOptionBox.getSelectedItem().equals("From probes")) {
        modifyStrand = true;
        if (prefs.strandOptionBox.getSelectedItem().equals("Forward")) {
            forcedStrand = Location.FORWARD;
        } else if (prefs.strandOptionBox.getSelectedItem().equals("Reverse")) {
            forcedStrand = Location.REVERSE;
        } else if (prefs.strandOptionBox.getSelectedItem().equals("Unknown")) {
            forcedStrand = Location.UNKNOWN;
        } else {
            throw new IllegalArgumentException("Unknown forced strand option " + prefs.strandOptionBox.getSelectedItem());
        }
    }
    DataSet newData = new DataSet(activeList.name(), "Reimported from " + activeList.name(), prefs.removeDuplicates());
    // Now process the data
    Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        progressUpdated("Processing " + activeList.name() + " chr " + chrs[c].name(), c, chrs.length);
        Probe[] probes = activeList.getProbesForChromosome(chrs[c]);
        for (int r = 0; r < probes.length; r++) {
            if (cancel) {
                progressCancelled();
                return null;
            }
            long read;
            int start = probes[r].start();
            int end = probes[r].end();
            int strand = probes[r].strand();
            if (reverse) {
                if (strand == Location.FORWARD) {
                    strand = Location.REVERSE;
                } else if (strand == Location.REVERSE) {
                    strand = Location.FORWARD;
                }
            }
            if (extendBy != 0) {
                // We now allow negative extensions to shorten reads
                if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
                    end += extendBy;
                    if (end < start)
                        end = start;
                } else if (strand == Location.REVERSE) {
                    start -= extendBy;
                    if (start > end)
                        start = end;
                }
            }
            // We don't allow reads before the start of the chromosome
            if (start < 1) {
                int overrun = (0 - start) + 1;
                progressWarningReceived(new SeqMonkException("Reading position " + start + " was " + overrun + "bp before the start of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                continue;
            }
            // We also don't allow readings which are beyond the end of the chromosome
            if (end > chrs[c].length()) {
                int overrun = end - chrs[c].length();
                progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                continue;
            }
            // Force the strand to what they specified if they want this.
            if (modifyStrand) {
                strand = forcedStrand;
            }
            // We can now make the new reading
            try {
                read = SequenceRead.packPosition(start, end, strand);
                newData.addData(chrs[c], read);
            } catch (SeqMonkException e) {
                progressWarningReceived(e);
                continue;
            }
        }
    }
    return newData;
}
Also used : DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 88 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class BismarkCovFileParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    try {
        File[] covFiles = getFiles();
        DataSet[] newData = new DataSet[covFiles.length];
        for (int f = 0; f < covFiles.length; f++) {
            BufferedReader br;
            if (covFiles[f].getName().toLowerCase().endsWith(".gz")) {
                br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(covFiles[f]))));
            } else {
                br = new BufferedReader(new FileReader(covFiles[f]));
            }
            String line;
            newData[f] = new DataSet(covFiles[f].getName(), covFiles[f].getCanonicalPath(), prefs.removeDuplicates());
            int lineCount = 0;
            // Now process the file
            while ((line = br.readLine()) != null) {
                if (cancel) {
                    br.close();
                    progressCancelled();
                    return;
                }
                // Ignore blank lines
                if (line.trim().length() == 0)
                    continue;
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + covFiles[f].getName(), f, covFiles.length);
                }
                String[] sections = line.split("\t");
                // Check to see if we've got enough data to work with
                if (sections.length < 6) {
                    progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
                    // Skip this line...
                    continue;
                }
                int start;
                int end;
                int methCount;
                int unmethCount;
                try {
                    start = Integer.parseInt(sections[1]);
                    end = Integer.parseInt(sections[2]);
                    methCount = Integer.parseInt(sections[4]);
                    unmethCount = Integer.parseInt(sections[5]);
                    // End must always be later than start
                    if (start > end) {
                        progressWarningReceived(new SeqMonkException("End position " + end + " was lower than start position " + start));
                        int temp = start;
                        start = end;
                        end = temp;
                    }
                } catch (NumberFormatException e) {
                    progressWarningReceived(new SeqMonkException("Location " + sections[0] + "-" + sections[1] + " was not an integer"));
                    continue;
                }
                try {
                    ChromosomeWithOffset c = dataCollection().genome().getChromosome(sections[0]);
                    // We also don't allow readings which are beyond the end of the chromosome
                    start = c.position(start);
                    end = c.position(end);
                    if (end > c.chromosome().length()) {
                        int overrun = end - c.chromosome().length();
                        progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
                        continue;
                    }
                    // We can now make the new reads
                    long methRead = SequenceRead.packPosition(start, end, Location.FORWARD);
                    long unmethRead = SequenceRead.packPosition(start, end, Location.REVERSE);
                    newData[f].addData(c.chromosome(), methRead, methCount);
                    newData[f].addData(c.chromosome(), unmethRead, unmethCount);
                } catch (IllegalArgumentException iae) {
                    progressWarningReceived(iae);
                } catch (SeqMonkException sme) {
                    progressWarningReceived(sme);
                    continue;
                }
            }
            // We're finished with the file.
            br.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + covFiles[f].getName(), f, covFiles.length);
            newData[f].finalise();
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
        return;
    }
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) InputStreamReader(java.io.InputStreamReader) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) FileInputStream(java.io.FileInputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File)

Example 89 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class QuasRFileParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    try {
        File[] quasrFiles = getFiles();
        DataSet[] newData = new DataSet[quasrFiles.length];
        for (int f = 0; f < quasrFiles.length; f++) {
            BufferedReader br;
            if (quasrFiles[f].getName().toLowerCase().endsWith(".gz")) {
                br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(quasrFiles[f]))));
            } else {
                br = new BufferedReader(new FileReader(quasrFiles[f]));
            }
            String line;
            newData[f] = new DataSet(quasrFiles[f].getName(), quasrFiles[f].getCanonicalPath(), prefs.removeDuplicates());
            int lineCount = 0;
            // Now process the file
            while ((line = br.readLine()) != null) {
                if (cancel) {
                    br.close();
                    progressCancelled();
                    return;
                }
                // Ignore blank lines
                if (line.trim().length() == 0)
                    continue;
                // In case it has comments
                if (line.startsWith("#"))
                    continue;
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + quasrFiles[f].getName(), f, quasrFiles.length);
                }
                String[] sections = line.split("\t");
                // Check to see if we've got enough data to work with
                if (sections.length < 5) {
                    progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
                    // Skip this line...
                    continue;
                }
                int start;
                int end;
                int totalCount;
                int methCount;
                int unmethCount;
                try {
                    start = Integer.parseInt(sections[1]);
                    end = Integer.parseInt(sections[2]);
                    totalCount = Integer.parseInt(sections[3]);
                    methCount = Integer.parseInt(sections[4]);
                    unmethCount = totalCount - methCount;
                    // End must always be later than start
                    if (start > end) {
                        progressWarningReceived(new SeqMonkException("End position " + end + " was lower than start position " + start));
                        int temp = start;
                        start = end;
                        end = temp;
                    }
                } catch (NumberFormatException e) {
                    progressWarningReceived(new SeqMonkException("Location " + sections[0] + "-" + sections[1] + " was not an integer"));
                    continue;
                }
                try {
                    ChromosomeWithOffset c = dataCollection().genome().getChromosome(sections[0]);
                    // We also don't allow readings which are beyond the end of the chromosome
                    start = c.position(start);
                    end = c.position(end);
                    if (end > c.chromosome().length()) {
                        int overrun = end - c.chromosome().length();
                        progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
                        continue;
                    }
                    // We can now make the new reads
                    long methRead = SequenceRead.packPosition(start, end, Location.FORWARD);
                    long unmethRead = SequenceRead.packPosition(start, end, Location.REVERSE);
                    for (int i = 0; i < methCount; i++) {
                        newData[f].addData(c.chromosome(), methRead);
                    }
                    for (int i = 0; i < unmethCount; i++) {
                        newData[f].addData(c.chromosome(), unmethRead);
                    }
                } catch (IllegalArgumentException iae) {
                    progressWarningReceived(iae);
                } catch (SeqMonkException sme) {
                    progressWarningReceived(sme);
                    continue;
                }
            }
            // We're finished with the file.
            br.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + quasrFiles[f].getName(), f, quasrFiles.length);
            newData[f].finalise();
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
        return;
    }
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) InputStreamReader(java.io.InputStreamReader) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) FileInputStream(java.io.FileInputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File)

Example 90 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class PairedDataSet method addData.

/**
 * This method is used to add data to the paired data set and should be called
 * by all parsers which create a new set.  Pairs of reads should be added
 * sequentially to the data set and will be paired by it internally.
 *
 * Only the SeqMonk parser should ever set the noReverse parameter to true.  Specifying
 * this for all data lets the dataset skip the sorting step when caching which is
 * otherwise really slow, but if this is incorrectly skipped then subsequent results
 * returned by the data set will be wrong.
 *
 * @param c
 * @param read
 * @param noReverse
 */
public void addData(Chromosome c, long read, boolean skipSorting) {
    if (!skipSorting)
        needToSort = true;
    if (lastChromosome == null) {
        // We'll just store this read for now until we can pair it with
        // the next read which is submitted
        lastRead = read;
        lastChromosome = c;
    } else if (lastChromosome != c && ignoreTrans) {
        // Skip this all together.
        lastRead = 0;
        lastChromosome = null;
    } else // Skip cis reads which are too close together
    if (filterOnMinDistance && c == lastChromosome && SequenceRead.fragmentLength(lastRead, read) < minDistance) {
        lastRead = 0;
        lastChromosome = null;
    } else {
        // We're actually going to add this pair
        int increment = 2;
        if (skipSorting)
            increment = 1;
        if (lastChromosome != c) {
            // Increment the trans counts for each chromosome
            transCount += increment;
            if (transChromosomeCounts.containsKey(lastChromosome)) {
                transChromosomeCounts.get(lastChromosome).increment();
            } else {
                transChromosomeCounts.put(lastChromosome, new NonThreadSafeIntCounter());
                transChromosomeCounts.get(lastChromosome).increment();
            }
            if (!skipSorting) {
                if (transChromosomeCounts.containsKey(c)) {
                    transChromosomeCounts.get(c).increment();
                } else {
                    transChromosomeCounts.put(c, new NonThreadSafeIntCounter());
                    transChromosomeCounts.get(c).increment();
                }
            }
        } else {
            // Increment the cis counts for each chromosome
            cisCount += increment;
            if (cisChromosomeCounts.containsKey(c)) {
                cisChromosomeCounts.get(lastChromosome).increment();
            } else {
                cisChromosomeCounts.put(c, new NonThreadSafeIntCounter());
                cisChromosomeCounts.get(lastChromosome).increment();
            }
        }
        try {
            if (isFinalised) {
                throw new SeqMonkException("This data set is finalised.  No more data can be added");
            }
            // Add the forward pair.
            if (!readData.containsKey(lastChromosome)) {
                ChromosomeDataStore cds = new ChromosomeDataStore(lastChromosome);
                readData.put(lastChromosome, cds);
            }
            readData.get(lastChromosome).hitCollection.addHit(c.name(), lastRead, read);
            if (!skipSorting) {
                // Add the reverse pair.
                if (!readData.containsKey(c)) {
                    ChromosomeDataStore cds = new ChromosomeDataStore(c);
                    readData.put(c, cds);
                }
                readData.get(c).hitCollection.addHit(lastChromosome.name(), read, lastRead);
            }
        } catch (SeqMonkException sme) {
            throw new IllegalStateException(sme);
        }
        lastRead = 0;
        lastChromosome = null;
    }
}
Also used : NonThreadSafeIntCounter(uk.ac.babraham.SeqMonk.Utilities.NonThreadSafeIntCounter) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Aggregations

SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)91 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)49 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)30 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)22 Vector (java.util.Vector)21 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)20 File (java.io.File)19 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)17 BufferedReader (java.io.BufferedReader)16 FileReader (java.io.FileReader)16 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)14 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)13 FileInputStream (java.io.FileInputStream)11 IOException (java.io.IOException)11 InputStreamReader (java.io.InputStreamReader)11 GZIPInputStream (java.util.zip.GZIPInputStream)11 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)8 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)8 FileNotFoundException (java.io.FileNotFoundException)7 SequenceReadWithChromosome (uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)7