use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SeqMonkParser method parseGenome.
/**
* Parses the genome line.
*
* @param sections The tab split sections from the genome line
* @throws SeqMonkException
*/
private void parseGenome(String[] sections) throws Exception {
if (sections.length != 3) {
throw new SeqMonkException("Genome line didn't contain 3 sections");
}
if (!sections[0].equals("Genome")) {
throw new SeqMonkException("First line of file was not the genome description");
}
// If we have a multi genome then sections 1 and 2 will be sub-divided with bar characters
String[] speciesStrings = sections[1].split("\\|");
String[] assemblyStrings = sections[2].split("\\|");
if (speciesStrings.length != assemblyStrings.length) {
throw new SeqMonkException("Got different number of species and assembly names from '" + sections[1] + "' and '" + sections[2] + "'");
}
File[] files = new File[speciesStrings.length];
for (int g = 0; g < speciesStrings.length; g++) {
try {
files[g] = new File(SeqMonkPreferences.getInstance().getGenomeBase().getAbsoluteFile() + "/" + speciesStrings[g] + "/" + assemblyStrings[g]);
} catch (FileNotFoundException e) {
throw new SeqMonkException("Couldn't find the folder which should contain the genome files. Please check your file preferences.");
}
if (!files[g].exists()) {
// The user doesn't have this genome - yet...
// If the user has lost network connection or is using a custom genome
// this can generate errors which we don't want to put up a crash reporter
// for. We therefore disable the crash reporter in our progress dialog and
// we'll have custom code in our exceptionReceived method to handle these
// cases nicely.
// This variable is only used for error messages
genomeToLoad = speciesStrings[g] + " " + assemblyStrings[g];
GenomeDownloader d = new GenomeDownloader();
d.addProgressListener(this);
ProgressDialog pd = new ProgressDialog(application, "Downloading genome...");
pd.setIgnoreExceptions(true);
d.addProgressListener(pd);
// Bit of a hack here, since we don't know the size of the genome
// being downloaded we have to put an approximate value in so the
// progress bar does something sensible.
d.downloadGenome(speciesStrings[g], assemblyStrings[g], 25000000, true);
pd.requestFocus();
pauseWhilstLoadingGenome = true;
while (pauseWhilstLoadingGenome) {
if (exceptionReceived != null)
throw exceptionReceived;
// This sleep *has* to be left in place. If it doesn't then this thread gets
// deadlocked and the flag is never reset once the new genome is downloaded
// and the processing of the downloaded genome never starts.
Thread.sleep(200);
}
if (exceptionReceived != null) {
// Do we need to say we cancelled?
return;
}
}
}
GenomeParser parser = new GenomeParser();
parser.addProgressListener(this);
parser.parseGenome(files);
while (!genomeLoaded) {
if (exceptionReceived != null)
throw exceptionReceived;
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
}
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SeqMonkParser method parseDisplayPreferences.
/**
* Parses the display preferences.
*
* @param sections The tab split initial display preferences line
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private void parseDisplayPreferences(String[] sections) throws SeqMonkException, IOException {
int linesToParse;
try {
linesToParse = Integer.parseInt(sections[1]);
} catch (Exception e) {
throw new SeqMonkException("Couldn't see the number of display preference lines to parse");
}
String[] prefs;
for (int i = 0; i < linesToParse; i++) {
prefs = br.readLine().split("\\t");
if (prefs[0].equals("DataZoom")) {
DisplayPreferences.getInstance().setMaxDataValue(Double.parseDouble(prefs[1]));
} else if (prefs[0].equals("ScaleMode")) {
DisplayPreferences.getInstance().setScaleType(Integer.parseInt(prefs[1]));
} else if (prefs[0].equals("DisplayMode")) {
DisplayPreferences.getInstance().setDisplayMode(Integer.parseInt(prefs[1]));
} else if (prefs[0].equals("ReplicateExpansion")) {
DisplayPreferences.getInstance().setReplicateSetExpansion(Integer.parseInt(prefs[1]));
} else if (prefs[0].equals("Variation")) {
DisplayPreferences.getInstance().setVariation(Integer.parseInt(prefs[1]));
} else if (prefs[0].equals("CurrentView")) {
DisplayPreferences.getInstance().setLocation(application.dataCollection().genome().getChromosome(prefs[1]).chromosome(), SequenceRead.packPosition(Integer.parseInt(prefs[2]), Integer.parseInt(prefs[3]), Location.UNKNOWN));
} else if (prefs[0].equals("ReadDensity")) {
DisplayPreferences.getInstance().setReadDensity(Integer.parseInt(prefs[1]));
} else if (prefs[0].equals("SplitMode")) {
DisplayPreferences.getInstance().setReadDisplay(Integer.parseInt(prefs[1]));
} else if (prefs[0].equals("QuantitationColour")) {
DisplayPreferences.getInstance().setColourType(Integer.parseInt(prefs[1]));
} else if (prefs[0].equals("Gradient")) {
DisplayPreferences.getInstance().setGradient(Integer.parseInt(prefs[1]));
} else if (prefs[0].equals("InvertGradient")) {
if (prefs[1].equals("1")) {
DisplayPreferences.getInstance().setInvertGradient(true);
} else {
DisplayPreferences.getInstance().setInvertGradient(false);
}
} else if (prefs[0].equals("GraphType")) {
DisplayPreferences.getInstance().setGraphType(Integer.parseInt(prefs[1]));
} else {
throw new SeqMonkException("Didn't know how to process display preference '" + prefs[0] + "'");
}
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class VisibleStoresParser method processHiCDataStore.
private DataSet processHiCDataStore(DataStore store) throws SeqMonkException {
int extendBy = prefs.extendReads();
boolean reverse = prefs.reverseReads();
boolean removeStrand = prefs.removeStrandInfo();
PairedDataSet newData = new PairedDataSet(store.name() + "_reimport", "Reimported from " + store.name(), prefs.removeDuplicates(), prefs.hiCDistance(), prefs.hiCIgnoreTrans());
// Now process the data
Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
progressUpdated("Processing " + store.name() + " chr " + chrs[c].name(), c, chrs.length);
// We make the call to get exportable reads so we don't duplicate reads
// when we export things
HiCHitCollection hitCollection = ((HiCDataStore) store).getExportableReadsForChromosome(chrs[c]);
String[] localChromosomes = hitCollection.getChromosomeNamesWithHits();
for (int c2 = 0; c2 < localChromosomes.length; c2++) {
Chromosome localChromosome = SeqMonkApplication.getInstance().dataCollection().genome().getChromosome(localChromosomes[c2]).chromosome();
long[] sourceReads = hitCollection.getSourcePositionsForChromosome(localChromosomes[c2]);
long[] hitReads = hitCollection.getHitPositionsForChromosome(localChromosomes[c2]);
for (int r = 0; r < sourceReads.length; r++) {
if (cancel) {
progressCancelled();
return null;
}
if (downsample && downsampleProbabilty < 1) {
if (Math.random() > downsampleProbabilty) {
continue;
}
}
if ((!(reverse || removeStrand)) && extendBy == 0 && (!filterByFeature)) {
// Just add them as they are
newData.addData(chrs[c], sourceReads[r]);
newData.addData(localChromosome, hitReads[r]);
}
Feature[] features = null;
if (filterByFeature) {
features = collection.genome().annotationCollection().getFeaturesForType(chrs[c], featureType);
Arrays.sort(features);
}
int currentFeaturePostion = 0;
if (filterByFeature) {
// See if we're comparing against the right feature
while (SequenceRead.start(sourceReads[r]) > features[currentFeaturePostion].location().end() && currentFeaturePostion < (features.length - 1)) {
currentFeaturePostion++;
}
// Test to see if we overlap
if (SequenceRead.overlaps(sourceReads[r], features[currentFeaturePostion].location().packedPosition())) {
if (excludeFeature)
continue;
} else {
if (!excludeFeature)
continue;
}
}
int sourceStart = SequenceRead.start(sourceReads[r]);
int sourceEend = SequenceRead.end(sourceReads[r]);
int sourceStrand = SequenceRead.strand(sourceReads[r]);
int hitStart = SequenceRead.start(sourceReads[r]);
int hitEend = SequenceRead.end(hitReads[r]);
int hitStrand = SequenceRead.strand(hitReads[r]);
if (reverse) {
if (sourceStrand == Location.FORWARD) {
sourceStrand = Location.REVERSE;
} else if (sourceStrand == Location.REVERSE) {
sourceStrand = Location.FORWARD;
}
if (hitStrand == Location.FORWARD) {
hitStrand = Location.REVERSE;
} else if (hitStrand == Location.REVERSE) {
hitStrand = Location.FORWARD;
}
}
if (removeStrand) {
sourceStrand = Location.UNKNOWN;
hitStrand = Location.UNKNOWN;
}
if (extendBy > 0) {
if (sourceStrand == Location.FORWARD) {
sourceEend += extendBy;
} else if (sourceStrand == Location.REVERSE) {
sourceStart -= extendBy;
}
if (hitStrand == Location.FORWARD) {
hitEend += extendBy;
} else if (hitStrand == Location.REVERSE) {
hitStart -= extendBy;
}
}
// We also don't allow readings which are beyond the end of the chromosome
if (sourceEend > chrs[c].length()) {
int overrun = sourceEend - chrs[c].length();
progressWarningReceived(new SeqMonkException("Reading position " + sourceEend + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
if (hitEend > localChromosome.length()) {
int overrun = hitEend - SeqMonkApplication.getInstance().dataCollection().genome().getChromosome(localChromosomes[c2]).chromosome().length();
progressWarningReceived(new SeqMonkException("Reading position " + hitEend + " was " + overrun + "bp beyond the end of chr" + localChromosome.name() + " (" + chrs[c].length() + ")"));
continue;
}
// We can now make the new readings
long sourceRead = SequenceRead.packPosition(sourceStart, sourceEend, sourceStrand);
long hitRead = SequenceRead.packPosition(hitStart, hitEend, hitStrand);
if (!prefs.isHiC()) {
// HiC additions are deferred until we know the other end is OK too.
newData.addData(chrs[c], sourceRead);
newData.addData(localChromosome, hitRead);
}
}
}
}
return newData;
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class BAMFileParser method getPairedEndRead.
/**
* Gets a paired end read. This method assumes that it will only be passed reads which map
* to the reverse strand since these are the ones which contain enough information to
* unambiguously locate both ends of the pair.
*
* @param sections The tab split sections from the SAM file
* @param flag The binary flag field
* @return The read which was read
* @throws SeqMonkException
*/
private SequenceReadWithChromosome getPairedEndRead(SAMRecord samRecord) throws SeqMonkException {
int strand;
int start;
int end;
if (!samRecord.getReadNegativeStrandFlag()) {
throw new SeqMonkException("Read passed to parse pair was not on the negative strand");
}
if (samRecord.getMateNegativeStrandFlag()) {
throw new SeqMonkException("Ignored discordantly stranded read pair");
}
end = samRecord.getAlignmentEnd();
start = samRecord.getMateAlignmentStart();
if (start > end) {
throw new SeqMonkException("Ignored discordantly stranded read pair");
}
if (samRecord.getFirstOfPairFlag()) {
strand = Location.REVERSE;
} else {
strand = Location.FORWARD;
}
if ((end - start) + 1 > pairedEndDistance) {
throw new SeqMonkException("Distance between ends " + ((end - start) + 1) + " was larger than cutoff (" + pairedEndDistance + ")");
}
ChromosomeWithOffset c;
try {
c = dataCollection().genome().getChromosome(samRecord.getReferenceName());
} catch (Exception e) {
throw new SeqMonkException(e.getLocalizedMessage());
}
start = c.position(start);
end = c.position(end);
// We also don't allow readings which are beyond the end of the chromosome
if (end > c.chromosome().length()) {
int overrun = end - c.chromosome().length();
throw new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")");
}
if (start < 1) {
throw new SeqMonkException("Reading position " + start + " was before the start of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")");
}
// We can now make the new reading
SequenceReadWithChromosome read = new SequenceReadWithChromosome(c.chromosome(), SequenceRead.packPosition(start, end, strand));
return read;
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class BedPEFileParser method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// System.err.println("Started parsing BED files");
int extendBy = prefs.extendReads();
try {
File[] probeFiles = getFiles();
DataSet[] newData = new DataSet[probeFiles.length];
for (int f = 0; f < probeFiles.length; f++) {
BufferedReader br;
if (probeFiles[f].getName().toLowerCase().endsWith(".gz")) {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(probeFiles[f]))));
} else {
br = new BufferedReader(new FileReader(probeFiles[f]));
}
String line;
if (prefs.isHiC()) {
newData[f] = new PairedDataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), prefs.removeDuplicates(), prefs.hiCDistance(), prefs.hiCIgnoreTrans());
} else {
newData[f] = new DataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), prefs.removeDuplicates());
}
int lineCount = 0;
// Now process the file
while ((line = br.readLine()) != null) {
if (cancel) {
br.close();
progressCancelled();
return;
}
// Ignore blank lines
if (line.trim().length() == 0)
continue;
// System.err.println(line);
// Thread.sleep(200);
++lineCount;
if (lineCount % 100000 == 0) {
progressUpdated("Read " + lineCount + " lines from " + probeFiles[f].getName(), f, probeFiles.length);
}
String[] sections = line.split("\t");
// Check to see if we've got enough data to work with
if (sections.length < 6) {
progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
// Skip this line...
continue;
}
// move on quickly.
if (sections[0].equals(".") || sections[3].equals("."))
continue;
int strand1;
int start1;
int end1;
int strand2;
int start2;
int end2;
try {
// The start is zero indexed so we need to add 1 to get genomic positions
start1 = Integer.parseInt(sections[1]) + 1;
start2 = Integer.parseInt(sections[4]) + 1;
// The end is zero indexed, but not included in the feature position so
// we need to add one to get genomic coordinates, but subtract one to not
// include the final base.
end1 = Integer.parseInt(sections[2]);
end2 = Integer.parseInt(sections[5]);
// End must always be later than start
if (start1 > end1) {
progressWarningReceived(new SeqMonkException("End position1 " + end1 + " was lower than start position " + start1));
int temp = start1;
start1 = end1;
end1 = temp;
}
if (start2 > end2) {
progressWarningReceived(new SeqMonkException("End position2 " + end2 + " was lower than start position " + start2));
int temp = start2;
start2 = end2;
end2 = temp;
}
if (sections.length >= 10) {
if (sections[8].equals("+")) {
strand1 = Location.FORWARD;
} else if (sections[8].equals("-")) {
strand1 = Location.REVERSE;
} else if (sections[8].equals(".")) {
strand1 = Location.UNKNOWN;
} else {
progressWarningReceived(new SeqMonkException("Unknown strand character '" + sections[8] + "' marked as unknown strand"));
strand1 = Location.UNKNOWN;
}
if (sections[9].equals("+")) {
strand2 = Location.FORWARD;
} else if (sections[9].equals("-")) {
strand2 = Location.REVERSE;
} else if (sections[9].equals(".")) {
strand2 = Location.UNKNOWN;
} else {
progressWarningReceived(new SeqMonkException("Unknown strand character '" + sections[9] + "' marked as unknown strand"));
strand2 = Location.UNKNOWN;
}
// if (extendBy > 0) {
// if (strand==Location.REVERSE) {
// start -=extendBy;
// }
// else if (strand==Location.FORWARD) {
// end+=extendBy;
// }
// }
} else {
strand1 = Location.UNKNOWN;
strand2 = Location.UNKNOWN;
}
} catch (NumberFormatException e) {
progressWarningReceived(new SeqMonkException("Location " + sections[0] + "-" + sections[1] + " was not an integer"));
continue;
}
try {
ChromosomeWithOffset c1 = dataCollection().genome().getChromosome(sections[0]);
// We also don't allow readings which are beyond the end of the chromosome
start1 = c1.position(start1);
end1 = c1.position(end1);
if (end1 > c1.chromosome().length()) {
int overrun = end1 - c1.chromosome().length();
progressWarningReceived(new SeqMonkException("Reading position " + end1 + " was " + overrun + "bp beyond the end of chr" + c1.chromosome().name() + " (" + c1.chromosome().length() + ")"));
continue;
}
ChromosomeWithOffset c2 = dataCollection().genome().getChromosome(sections[3]);
// We also don't allow readings which are beyond the end of the chromosome
start2 = c2.position(start2);
end2 = c2.position(end2);
if (end2 > c2.chromosome().length()) {
int overrun = end2 - c2.chromosome().length();
progressWarningReceived(new SeqMonkException("Reading position " + end2 + " was " + overrun + "bp beyond the end of chr" + c2.chromosome().name() + " (" + c2.chromosome().length() + ")"));
continue;
}
// add them. There's nothing clever to do.
if (prefs.isHiC()) {
long read1 = SequenceRead.packPosition(start1, end1, strand1);
newData[f].addData(c1.chromosome(), read1);
long read2 = SequenceRead.packPosition(start2, end2, strand2);
newData[f].addData(c2.chromosome(), read2);
} else {
// If they're on different chromosomes then we kick them out
if (!c1.chromosome().name().equals(c2.chromosome().name())) {
progressWarningReceived(new SeqMonkException("Paried reads were on different chromosomes - discarding"));
continue;
}
if (strand1 == Location.FORWARD && strand2 != Location.REVERSE) {
progressWarningReceived(new SeqMonkException("Invalid strand orientation - discarding"));
continue;
}
if (strand1 == Location.REVERSE && strand2 != Location.FORWARD) {
progressWarningReceived(new SeqMonkException("Invalid strand orientation - discarding"));
continue;
}
// If they're too far apart we kick them out
int start = 1;
int end = 0;
// We take the strand from read1
int strand = strand1;
if (strand == Location.FORWARD) {
start = start1;
end = end2;
} else if (strand == Location.REVERSE) {
start = start2;
end = end1;
} else if (strand == Location.UNKNOWN) {
start = Math.min(start1, start2);
end = Math.max(end1, end2);
}
if (end <= start) {
progressWarningReceived(new SeqMonkException("Incorrectly oriented reads - discarding"));
continue;
}
if ((end - start) + 1 > prefs.pairDistanceCutoff()) {
progressWarningReceived(new SeqMonkException("Distance between reads too great (" + (((end - start) + 1) - prefs.pairDistanceCutoff()) + ")"));
continue;
}
long read = SequenceRead.packPosition(start, end, strand);
newData[f].addData(c1.chromosome(), read);
}
} catch (IllegalArgumentException iae) {
progressWarningReceived(iae);
} catch (SeqMonkException sme) {
progressWarningReceived(sme);
continue;
}
}
// We're finished with the file.
br.close();
// Cache the data in the new dataset
progressUpdated("Caching data from " + probeFiles[f].getName(), f, probeFiles.length);
newData[f].finalise();
}
processingFinished(newData);
} catch (Exception ex) {
progressExceptionReceived(ex);
return;
}
}
Aggregations