use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class GenomeParser method parseChromosome.
/**
* Parses the chromosome.
*
* @param br the br
* @return the chromosome
* @throws SeqMonkException the seq monk exception
* @throws IOException Signals that an I/O exception has occurred.
*/
private Chromosome parseChromosome(BufferedReader br, SingleGenome genome) throws SeqMonkException, IOException {
String line;
while ((line = br.readLine()) != null) {
if (line.startsWith("AC")) {
String[] sections = line.split(":");
if (sections.length != 6) {
// It's not a chromosome file. We probably just want to
// skip it and move onto the next entry
progressWarningReceived(new SeqMonkException("AC line didn't have 6 sections '" + line + "'"));
skipToEntryEnd(br);
continue;
}
if (line.indexOf("supercontig") >= 0) {
// It's not a chromosome file. We probably just want to
// skip it and move onto the next entry
skipToEntryEnd(br);
continue;
}
// This will return the existing chromosome of this
// name if it exists already, but will create a new
// one if it doesn't.
Chromosome c = genome.addChromosome(sections[2]);
c.setLength(Integer.parseInt(sections[4]));
// Since the positions of all features are given relative
// to the current sequence we need to add the current
// start position to all locations as an offset.
currentOffset = Integer.parseInt(sections[3]) - 1;
return c;
}
if (line.startsWith("//")) {
throw new SeqMonkException("Couldn't find AC line");
}
}
return null;
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class ActiveProbeListParser method processNormalDataStore.
private DataSet processNormalDataStore(ProbeList activeList) {
int extendBy = prefs.extendReads();
boolean reverse = prefs.reverseReads();
boolean modifyStrand = false;
int forcedStrand = 0;
if (!prefs.strandOptionBox.getSelectedItem().equals("From probes")) {
modifyStrand = true;
if (prefs.strandOptionBox.getSelectedItem().equals("Forward")) {
forcedStrand = Location.FORWARD;
} else if (prefs.strandOptionBox.getSelectedItem().equals("Reverse")) {
forcedStrand = Location.REVERSE;
} else if (prefs.strandOptionBox.getSelectedItem().equals("Unknown")) {
forcedStrand = Location.UNKNOWN;
} else {
throw new IllegalArgumentException("Unknown forced strand option " + prefs.strandOptionBox.getSelectedItem());
}
}
DataSet newData = new DataSet(activeList.name(), "Reimported from " + activeList.name(), prefs.removeDuplicates());
// Now process the data
Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
progressUpdated("Processing " + activeList.name() + " chr " + chrs[c].name(), c, chrs.length);
Probe[] probes = activeList.getProbesForChromosome(chrs[c]);
for (int r = 0; r < probes.length; r++) {
if (cancel) {
progressCancelled();
return null;
}
long read;
int start = probes[r].start();
int end = probes[r].end();
int strand = probes[r].strand();
if (reverse) {
if (strand == Location.FORWARD) {
strand = Location.REVERSE;
} else if (strand == Location.REVERSE) {
strand = Location.FORWARD;
}
}
if (extendBy != 0) {
// We now allow negative extensions to shorten reads
if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
end += extendBy;
if (end < start)
end = start;
} else if (strand == Location.REVERSE) {
start -= extendBy;
if (start > end)
start = end;
}
}
// We don't allow reads before the start of the chromosome
if (start < 1) {
int overrun = (0 - start) + 1;
progressWarningReceived(new SeqMonkException("Reading position " + start + " was " + overrun + "bp before the start of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
// We also don't allow readings which are beyond the end of the chromosome
if (end > chrs[c].length()) {
int overrun = end - chrs[c].length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
// Force the strand to what they specified if they want this.
if (modifyStrand) {
strand = forcedStrand;
}
// We can now make the new reading
try {
read = SequenceRead.packPosition(start, end, strand);
newData.addData(chrs[c], read);
} catch (SeqMonkException e) {
progressWarningReceived(e);
continue;
}
}
}
return newData;
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class BismarkCovFileParser method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
try {
File[] covFiles = getFiles();
DataSet[] newData = new DataSet[covFiles.length];
for (int f = 0; f < covFiles.length; f++) {
BufferedReader br;
if (covFiles[f].getName().toLowerCase().endsWith(".gz")) {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(covFiles[f]))));
} else {
br = new BufferedReader(new FileReader(covFiles[f]));
}
String line;
newData[f] = new DataSet(covFiles[f].getName(), covFiles[f].getCanonicalPath(), prefs.removeDuplicates());
int lineCount = 0;
// Now process the file
while ((line = br.readLine()) != null) {
if (cancel) {
br.close();
progressCancelled();
return;
}
// Ignore blank lines
if (line.trim().length() == 0)
continue;
++lineCount;
if (lineCount % 100000 == 0) {
progressUpdated("Read " + lineCount + " lines from " + covFiles[f].getName(), f, covFiles.length);
}
String[] sections = line.split("\t");
// Check to see if we've got enough data to work with
if (sections.length < 6) {
progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
// Skip this line...
continue;
}
int start;
int end;
int methCount;
int unmethCount;
try {
start = Integer.parseInt(sections[1]);
end = Integer.parseInt(sections[2]);
methCount = Integer.parseInt(sections[4]);
unmethCount = Integer.parseInt(sections[5]);
// End must always be later than start
if (start > end) {
progressWarningReceived(new SeqMonkException("End position " + end + " was lower than start position " + start));
int temp = start;
start = end;
end = temp;
}
} catch (NumberFormatException e) {
progressWarningReceived(new SeqMonkException("Location " + sections[0] + "-" + sections[1] + " was not an integer"));
continue;
}
try {
ChromosomeWithOffset c = dataCollection().genome().getChromosome(sections[0]);
// We also don't allow readings which are beyond the end of the chromosome
start = c.position(start);
end = c.position(end);
if (end > c.chromosome().length()) {
int overrun = end - c.chromosome().length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
continue;
}
// We can now make the new reads
long methRead = SequenceRead.packPosition(start, end, Location.FORWARD);
long unmethRead = SequenceRead.packPosition(start, end, Location.REVERSE);
newData[f].addData(c.chromosome(), methRead, methCount);
newData[f].addData(c.chromosome(), unmethRead, unmethCount);
} catch (IllegalArgumentException iae) {
progressWarningReceived(iae);
} catch (SeqMonkException sme) {
progressWarningReceived(sme);
continue;
}
}
// We're finished with the file.
br.close();
// Cache the data in the new dataset
progressUpdated("Caching data from " + covFiles[f].getName(), f, covFiles.length);
newData[f].finalise();
}
processingFinished(newData);
} catch (Exception ex) {
progressExceptionReceived(ex);
return;
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class QuasRFileParser method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
try {
File[] quasrFiles = getFiles();
DataSet[] newData = new DataSet[quasrFiles.length];
for (int f = 0; f < quasrFiles.length; f++) {
BufferedReader br;
if (quasrFiles[f].getName().toLowerCase().endsWith(".gz")) {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(quasrFiles[f]))));
} else {
br = new BufferedReader(new FileReader(quasrFiles[f]));
}
String line;
newData[f] = new DataSet(quasrFiles[f].getName(), quasrFiles[f].getCanonicalPath(), prefs.removeDuplicates());
int lineCount = 0;
// Now process the file
while ((line = br.readLine()) != null) {
if (cancel) {
br.close();
progressCancelled();
return;
}
// Ignore blank lines
if (line.trim().length() == 0)
continue;
// In case it has comments
if (line.startsWith("#"))
continue;
++lineCount;
if (lineCount % 100000 == 0) {
progressUpdated("Read " + lineCount + " lines from " + quasrFiles[f].getName(), f, quasrFiles.length);
}
String[] sections = line.split("\t");
// Check to see if we've got enough data to work with
if (sections.length < 5) {
progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
// Skip this line...
continue;
}
int start;
int end;
int totalCount;
int methCount;
int unmethCount;
try {
start = Integer.parseInt(sections[1]);
end = Integer.parseInt(sections[2]);
totalCount = Integer.parseInt(sections[3]);
methCount = Integer.parseInt(sections[4]);
unmethCount = totalCount - methCount;
// End must always be later than start
if (start > end) {
progressWarningReceived(new SeqMonkException("End position " + end + " was lower than start position " + start));
int temp = start;
start = end;
end = temp;
}
} catch (NumberFormatException e) {
progressWarningReceived(new SeqMonkException("Location " + sections[0] + "-" + sections[1] + " was not an integer"));
continue;
}
try {
ChromosomeWithOffset c = dataCollection().genome().getChromosome(sections[0]);
// We also don't allow readings which are beyond the end of the chromosome
start = c.position(start);
end = c.position(end);
if (end > c.chromosome().length()) {
int overrun = end - c.chromosome().length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
continue;
}
// We can now make the new reads
long methRead = SequenceRead.packPosition(start, end, Location.FORWARD);
long unmethRead = SequenceRead.packPosition(start, end, Location.REVERSE);
for (int i = 0; i < methCount; i++) {
newData[f].addData(c.chromosome(), methRead);
}
for (int i = 0; i < unmethCount; i++) {
newData[f].addData(c.chromosome(), unmethRead);
}
} catch (IllegalArgumentException iae) {
progressWarningReceived(iae);
} catch (SeqMonkException sme) {
progressWarningReceived(sme);
continue;
}
}
// We're finished with the file.
br.close();
// Cache the data in the new dataset
progressUpdated("Caching data from " + quasrFiles[f].getName(), f, quasrFiles.length);
newData[f].finalise();
}
processingFinished(newData);
} catch (Exception ex) {
progressExceptionReceived(ex);
return;
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class PairedDataSet method addData.
/**
* This method is used to add data to the paired data set and should be called
* by all parsers which create a new set. Pairs of reads should be added
* sequentially to the data set and will be paired by it internally.
*
* Only the SeqMonk parser should ever set the noReverse parameter to true. Specifying
* this for all data lets the dataset skip the sorting step when caching which is
* otherwise really slow, but if this is incorrectly skipped then subsequent results
* returned by the data set will be wrong.
*
* @param c
* @param read
* @param noReverse
*/
public void addData(Chromosome c, long read, boolean skipSorting) {
if (!skipSorting)
needToSort = true;
if (lastChromosome == null) {
// We'll just store this read for now until we can pair it with
// the next read which is submitted
lastRead = read;
lastChromosome = c;
} else if (lastChromosome != c && ignoreTrans) {
// Skip this all together.
lastRead = 0;
lastChromosome = null;
} else // Skip cis reads which are too close together
if (filterOnMinDistance && c == lastChromosome && SequenceRead.fragmentLength(lastRead, read) < minDistance) {
lastRead = 0;
lastChromosome = null;
} else {
// We're actually going to add this pair
int increment = 2;
if (skipSorting)
increment = 1;
if (lastChromosome != c) {
// Increment the trans counts for each chromosome
transCount += increment;
if (transChromosomeCounts.containsKey(lastChromosome)) {
transChromosomeCounts.get(lastChromosome).increment();
} else {
transChromosomeCounts.put(lastChromosome, new NonThreadSafeIntCounter());
transChromosomeCounts.get(lastChromosome).increment();
}
if (!skipSorting) {
if (transChromosomeCounts.containsKey(c)) {
transChromosomeCounts.get(c).increment();
} else {
transChromosomeCounts.put(c, new NonThreadSafeIntCounter());
transChromosomeCounts.get(c).increment();
}
}
} else {
// Increment the cis counts for each chromosome
cisCount += increment;
if (cisChromosomeCounts.containsKey(c)) {
cisChromosomeCounts.get(lastChromosome).increment();
} else {
cisChromosomeCounts.put(c, new NonThreadSafeIntCounter());
cisChromosomeCounts.get(lastChromosome).increment();
}
}
try {
if (isFinalised) {
throw new SeqMonkException("This data set is finalised. No more data can be added");
}
// Add the forward pair.
if (!readData.containsKey(lastChromosome)) {
ChromosomeDataStore cds = new ChromosomeDataStore(lastChromosome);
readData.put(lastChromosome, cds);
}
readData.get(lastChromosome).hitCollection.addHit(c.name(), lastRead, read);
if (!skipSorting) {
// Add the reverse pair.
if (!readData.containsKey(c)) {
ChromosomeDataStore cds = new ChromosomeDataStore(c);
readData.put(c, cds);
}
readData.get(c).hitCollection.addHit(lastChromosome.name(), read, lastRead);
}
} catch (SeqMonkException sme) {
throw new IllegalStateException(sme);
}
lastRead = 0;
lastChromosome = null;
}
}
Aggregations