Search in sources :

Example 11 with ChromosomeWithOffset

use of uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset in project SeqMonk by s-andrews.

the class BAMFileParser method getSingleEndRead.

/**
 * Gets a single end read.
 *
 * @param sections The tab split sections from the SAM file
 * @param flag The binary flag field from the file
 * @return The read which was read
 * @throws SeqMonkException
 */
private SequenceReadWithChromosome getSingleEndRead(SAMRecord samRecord) throws SeqMonkException {
    int strand;
    int start;
    int end;
    start = samRecord.getAlignmentStart();
    end = samRecord.getAlignmentEnd();
    if (samRecord.getReadNegativeStrandFlag()) {
        strand = Location.REVERSE;
    } else {
        strand = Location.FORWARD;
    }
    if (extendBy > 0) {
        if (strand == Location.FORWARD) {
            end += extendBy;
        } else if (strand == Location.REVERSE) {
            start -= extendBy;
        }
    }
    ChromosomeWithOffset c;
    try {
        c = collection.genome().getChromosome(samRecord.getReferenceName());
    } catch (Exception iae) {
        throw new SeqMonkException(iae.getLocalizedMessage());
    }
    start = c.position(start);
    end = c.position(end);
    // We also don't allow readings which are beyond the end of the chromosome
    if (end > c.chromosome().length()) {
        int overrun = end - c.chromosome().length();
        throw new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")");
    }
    if (start < 1) {
        throw new SeqMonkException("Reading position " + start + " was before the start of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")");
    }
    // We can now make the new reading
    SequenceReadWithChromosome read = new SequenceReadWithChromosome(c.chromosome(), SequenceRead.packPosition(start, end, strand));
    return read;
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) SequenceReadWithChromosome(uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)

Example 12 with ChromosomeWithOffset

use of uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset in project SeqMonk by s-andrews.

the class BowtieFileParser method getPairedEndRead.

/**
 * Gets a paired end read.
 *
 * @param sections1 The tab split bowtie output sections for the first read
 * @param sections2 The tab split bowtie output sections for the second read
 * @return The paired end read which was read
 * @throws SeqMonkException
 */
private SequenceReadWithChromosome getPairedEndRead(String[] sections1, String[] sections2) throws SeqMonkException {
    // We can get the lines with read two first, in which case we'll reverse things
    boolean readsAreReversed = false;
    if (sections1[0].substring(sections1[0].length() - 1).equals("2")) {
        readsAreReversed = true;
    }
    int strand;
    int start;
    int end;
    try {
        /*
			 * This convention isn't true in newer bowtie files so we can't rely on this any more.
			 */
        // if (! sections1[0].substring(0, sections1[0].length()-2).equals(sections2[0].substring(0, sections2[0].length()-2))) {
        // throw new SeqMonkException("Paired reads '"+sections1[0]+"' and '"+sections2[0]+"' did not match names");
        // }
        int read1start = Integer.parseInt(sections1[3]) + 1;
        int read1end = read1start + (sections1[4].length() - 1);
        int read2start = Integer.parseInt(sections2[3]) + 1;
        int read2end = read2start + (sections2[4].length() - 1);
        if (read1start < read2start) {
            start = read1start;
        } else {
            start = read2start;
        }
        if (read2end > read1end) {
            end = read2end;
        } else {
            end = read1end;
        }
        if (sections1[1].equals("+") && sections2[1].equals("-")) {
            if (readsAreReversed) {
                strand = Location.REVERSE;
            } else {
                strand = Location.FORWARD;
            }
        } else if (sections1[1].equals("-") && sections2[1].equals("+")) {
            if (readsAreReversed) {
                strand = Location.FORWARD;
            } else {
                strand = Location.REVERSE;
            }
        } else {
            strand = Location.UNKNOWN;
        }
    } catch (NumberFormatException e) {
        throw new SeqMonkException("Location " + sections1[3] + " or " + sections2[3] + " was not an integer");
    }
    if ((end - start) + 1 > pairedEndDistance) {
        throw new SeqMonkException("Distance between ends " + ((end - start) + 1) + " was larger than cutoff (" + pairedEndDistance + ")");
    }
    if (!sections1[2].equals(sections2[2])) {
        throw new SeqMonkException("Paried end read was on a different chromosome");
    }
    ChromosomeWithOffset c;
    try {
        c = dataCollection().genome().getChromosome(sections1[2]);
    } catch (Exception e) {
        throw new SeqMonkException(e.getLocalizedMessage());
    }
    start = c.position(start);
    end = c.position(end);
    // We also don't allow readings which are beyond the end of the chromosome
    if (end > c.chromosome().length()) {
        int overrun = end - c.chromosome().length();
        throw new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")");
    }
    if (start < 1) {
        throw new SeqMonkException("Reading position " + start + " was before the start of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")");
    }
    // We can now make the new reading
    SequenceReadWithChromosome read = new SequenceReadWithChromosome(c.chromosome(), SequenceRead.packPosition(start, end, strand));
    return read;
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) SequenceReadWithChromosome(uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)

Example 13 with ChromosomeWithOffset

use of uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset in project SeqMonk by s-andrews.

the class BismarkCovFileParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    try {
        File[] covFiles = getFiles();
        DataSet[] newData = new DataSet[covFiles.length];
        for (int f = 0; f < covFiles.length; f++) {
            BufferedReader br;
            if (covFiles[f].getName().toLowerCase().endsWith(".gz")) {
                br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(covFiles[f]))));
            } else {
                br = new BufferedReader(new FileReader(covFiles[f]));
            }
            String line;
            newData[f] = new DataSet(covFiles[f].getName(), covFiles[f].getCanonicalPath(), prefs.removeDuplicates());
            int lineCount = 0;
            // Now process the file
            while ((line = br.readLine()) != null) {
                if (cancel) {
                    br.close();
                    progressCancelled();
                    return;
                }
                // Ignore blank lines
                if (line.trim().length() == 0)
                    continue;
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + covFiles[f].getName(), f, covFiles.length);
                }
                String[] sections = line.split("\t");
                // Check to see if we've got enough data to work with
                if (sections.length < 6) {
                    progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
                    // Skip this line...
                    continue;
                }
                int start;
                int end;
                int methCount;
                int unmethCount;
                try {
                    start = Integer.parseInt(sections[1]);
                    end = Integer.parseInt(sections[2]);
                    methCount = Integer.parseInt(sections[4]);
                    unmethCount = Integer.parseInt(sections[5]);
                    // End must always be later than start
                    if (start > end) {
                        progressWarningReceived(new SeqMonkException("End position " + end + " was lower than start position " + start));
                        int temp = start;
                        start = end;
                        end = temp;
                    }
                } catch (NumberFormatException e) {
                    progressWarningReceived(new SeqMonkException("Location " + sections[0] + "-" + sections[1] + " was not an integer"));
                    continue;
                }
                try {
                    ChromosomeWithOffset c = dataCollection().genome().getChromosome(sections[0]);
                    // We also don't allow readings which are beyond the end of the chromosome
                    start = c.position(start);
                    end = c.position(end);
                    if (end > c.chromosome().length()) {
                        int overrun = end - c.chromosome().length();
                        progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
                        continue;
                    }
                    // We can now make the new reads
                    long methRead = SequenceRead.packPosition(start, end, Location.FORWARD);
                    long unmethRead = SequenceRead.packPosition(start, end, Location.REVERSE);
                    newData[f].addData(c.chromosome(), methRead, methCount);
                    newData[f].addData(c.chromosome(), unmethRead, unmethCount);
                } catch (IllegalArgumentException iae) {
                    progressWarningReceived(iae);
                } catch (SeqMonkException sme) {
                    progressWarningReceived(sme);
                    continue;
                }
            }
            // We're finished with the file.
            br.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + covFiles[f].getName(), f, covFiles.length);
            newData[f].finalise();
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
        return;
    }
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) InputStreamReader(java.io.InputStreamReader) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) FileInputStream(java.io.FileInputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File)

Example 14 with ChromosomeWithOffset

use of uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset in project SeqMonk by s-andrews.

the class QuasRFileParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    try {
        File[] quasrFiles = getFiles();
        DataSet[] newData = new DataSet[quasrFiles.length];
        for (int f = 0; f < quasrFiles.length; f++) {
            BufferedReader br;
            if (quasrFiles[f].getName().toLowerCase().endsWith(".gz")) {
                br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(quasrFiles[f]))));
            } else {
                br = new BufferedReader(new FileReader(quasrFiles[f]));
            }
            String line;
            newData[f] = new DataSet(quasrFiles[f].getName(), quasrFiles[f].getCanonicalPath(), prefs.removeDuplicates());
            int lineCount = 0;
            // Now process the file
            while ((line = br.readLine()) != null) {
                if (cancel) {
                    br.close();
                    progressCancelled();
                    return;
                }
                // Ignore blank lines
                if (line.trim().length() == 0)
                    continue;
                // In case it has comments
                if (line.startsWith("#"))
                    continue;
                ++lineCount;
                if (lineCount % 100000 == 0) {
                    progressUpdated("Read " + lineCount + " lines from " + quasrFiles[f].getName(), f, quasrFiles.length);
                }
                String[] sections = line.split("\t");
                // Check to see if we've got enough data to work with
                if (sections.length < 5) {
                    progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
                    // Skip this line...
                    continue;
                }
                int start;
                int end;
                int totalCount;
                int methCount;
                int unmethCount;
                try {
                    start = Integer.parseInt(sections[1]);
                    end = Integer.parseInt(sections[2]);
                    totalCount = Integer.parseInt(sections[3]);
                    methCount = Integer.parseInt(sections[4]);
                    unmethCount = totalCount - methCount;
                    // End must always be later than start
                    if (start > end) {
                        progressWarningReceived(new SeqMonkException("End position " + end + " was lower than start position " + start));
                        int temp = start;
                        start = end;
                        end = temp;
                    }
                } catch (NumberFormatException e) {
                    progressWarningReceived(new SeqMonkException("Location " + sections[0] + "-" + sections[1] + " was not an integer"));
                    continue;
                }
                try {
                    ChromosomeWithOffset c = dataCollection().genome().getChromosome(sections[0]);
                    // We also don't allow readings which are beyond the end of the chromosome
                    start = c.position(start);
                    end = c.position(end);
                    if (end > c.chromosome().length()) {
                        int overrun = end - c.chromosome().length();
                        progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
                        continue;
                    }
                    // We can now make the new reads
                    long methRead = SequenceRead.packPosition(start, end, Location.FORWARD);
                    long unmethRead = SequenceRead.packPosition(start, end, Location.REVERSE);
                    for (int i = 0; i < methCount; i++) {
                        newData[f].addData(c.chromosome(), methRead);
                    }
                    for (int i = 0; i < unmethCount; i++) {
                        newData[f].addData(c.chromosome(), unmethRead);
                    }
                } catch (IllegalArgumentException iae) {
                    progressWarningReceived(iae);
                } catch (SeqMonkException sme) {
                    progressWarningReceived(sme);
                    continue;
                }
            }
            // We're finished with the file.
            br.close();
            // Cache the data in the new dataset
            progressUpdated("Caching data from " + quasrFiles[f].getName(), f, quasrFiles.length);
            newData[f].finalise();
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
        return;
    }
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) InputStreamReader(java.io.InputStreamReader) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) FileInputStream(java.io.FileInputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File)

Aggregations

SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)14 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)14 BufferedReader (java.io.BufferedReader)9 FileInputStream (java.io.FileInputStream)9 FileReader (java.io.FileReader)9 InputStreamReader (java.io.InputStreamReader)9 GZIPInputStream (java.util.zip.GZIPInputStream)9 File (java.io.File)7 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)7 SequenceReadWithChromosome (uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)5 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)4 Vector (java.util.Vector)3 AnnotationSet (uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet)2 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)2 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)2 Enumeration (java.util.Enumeration)1 HashSet (java.util.HashSet)1 Hashtable (java.util.Hashtable)1 JDialog (javax.swing.JDialog)1 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)1