use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SeqMonkParser method parseAnnotation.
/**
* Parses an external set of annotations
*
* @param sections The tab split initial annotation line
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private AnnotationSet parseAnnotation(String[] sections) throws SeqMonkException, IOException {
if (sections.length != 3) {
throw new SeqMonkException("Annotation line didn't contain 3 sections");
}
AnnotationSet set = new AnnotationSet(application.dataCollection().genome(), sections[1]);
int featureCount = Integer.parseInt(sections[2]);
for (int i = 0; i < featureCount; i++) {
if (i % 1000 == 0) {
progressUpdated("Parsing annotation in " + set.name(), i, featureCount);
}
sections = br.readLine().split("\\t");
Chromosome c;
try {
c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
} catch (Exception sme) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressWarningReceived(new SeqMonkException("Annotation feature could not be mapped to chromosome '" + sections[1] + "'"));
}
continue;
}
Feature f = new Feature(sections[0], c.name());
// TODO: Can we improve this to not use a Split Location each time?
f.setLocation(new SplitLocation(sections[2]));
for (int a = 3; a + 1 < sections.length; a += 2) {
f.addAttribute(sections[a], sections[a + 1]);
}
set.addFeature(f);
}
set.finalise();
return set;
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SeqMonkParser method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
genomeLoaded = false;
exceptionReceived = null;
try {
String line;
String[] sections;
Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
while ((line = br.readLine()) != null) {
sections = line.split("\\t");
// Now we look where to send this...
if (sections[0].equals("SeqMonk Data Version")) {
parseDataVersion(sections);
} else if (sections[0].equals("Features")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
parseFeatures(sections);
} else if (sections[0].equals("Samples")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
parseSamples(sections);
} else if (sections[0].equals("Annotation")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
annotationSets.add(parseAnnotation(sections));
} else if (sections[0].equals("Data Groups")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
try {
parseGroups(sections);
} catch (SeqMonkException ex) {
if (ex.getMessage().contains("ambiguous")) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressWarningReceived(ex);
}
} else {
throw ex;
}
}
} else if (sections[0].equals("Replicate Sets")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
parseReplicates(sections);
} else if (sections[0].equals("Probes")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
parseProbes(sections);
} else if (sections[0].equals("Lists")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
parseLists(sections);
} else if (sections[0].equals("Genome")) {
if (forcedAssembly) {
genomeLoaded = true;
continue;
}
parseGenome(sections);
if (exceptionReceived != null) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
// In this case we put out a dummy empty dataset since
// we've already entered the data into the collection by now
e.nextElement().progressCancelled();
}
return;
}
} else if (sections[0].equals("Visible Stores")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
parseVisibleStores(sections);
} else if (sections[0].equals("Display Preferences")) {
if (!genomeLoaded) {
throw new SeqMonkException("No genome definition found before data");
}
// Add any annotation sets we've parsed at this point
application.dataCollection().genome().annotationCollection().addAnnotationSets(annotationSets.toArray(new AnnotationSet[0]));
parseDisplayPreferences(sections);
} else {
throw new SeqMonkException("Didn't recognise section '" + sections[0] + "' in seqmonk file");
}
}
// We're finished with the file
br.close();
cleanUpFeatureTracks();
} catch (Exception ex) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressExceptionReceived(ex);
}
try {
br.close();
} catch (IOException e1) {
throw new IllegalStateException(e1);
}
return;
}
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
// In this case we put out a dummy empty dataset since
// we've already entered the data into the collection by now
e.nextElement().progressComplete("datasets_loaded", new DataSet[0]);
}
application.resetChangesWereMade();
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SeqMonkParser method parseReplicates.
/**
* Parses the list of replicate sets.
*
* @param sections The tab split values from the initial replicates line
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private void parseReplicates(String[] sections) throws SeqMonkException, IOException {
if (sections.length != 2) {
throw new SeqMonkException("Data Groups line didn't contain 2 sections");
}
if (!sections[0].equals("Replicate Sets")) {
throw new SeqMonkException("Couldn't find expected replicates line");
}
int n = Integer.parseInt(sections[1]);
for (int i = 0; i < n; i++) {
String[] replicateLine = br.readLine().split("\\t");
DataStore[] groupMembers = new DataStore[replicateLine.length - 1];
for (int j = 1; j < replicateLine.length; j++) {
if (replicateLine[j].startsWith("g")) {
replicateLine[j] = replicateLine[j].substring(1);
groupMembers[j - 1] = application.dataCollection().getDataGroup(Integer.parseInt(replicateLine[j]));
} else if (replicateLine[j].startsWith("s")) {
replicateLine[j] = replicateLine[j].substring(1);
groupMembers[j - 1] = application.dataCollection().getDataSet(Integer.parseInt(replicateLine[j]));
} else {
throw new SeqMonkException("Replicate member id " + replicateLine[j] + " didn't start with g or s");
}
if (groupMembers[j - 1] == null) {
throw new SeqMonkException("Couldn't find replicate member from position " + replicateLine[j]);
}
}
ReplicateSet r = new ReplicateSet(replicateLine[0], groupMembers);
application.dataCollection().addReplicateSet(r);
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SeqMonkParser method parseVisibleStores.
/**
* Parses the list of dataStores which should initially be visible
*
* @param sections The tab split initial line from the visible stores section
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private void parseVisibleStores(String[] sections) throws SeqMonkException, IOException {
if (sections.length != 2) {
throw new SeqMonkException("Visible stores line didn't contain 2 sections");
}
int n = Integer.parseInt(sections[1]);
/*
* Collect the drawn stores in an array. We used to add them as we found
* them but this was inefficient since we had to redo a calculation for
* every one we added. This way we only need to calculate once.
*/
DataStore[] drawnStores = new DataStore[n];
if (thisDataVersion < 4) {
// In the bad old days we used to refer to datasets and stores by name
// which caused problems when names were duplicated. We do our best
// with these cases.
DataSet[] sets = application.dataCollection().getAllDataSets();
DataGroup[] groups = application.dataCollection().getAllDataGroups();
for (int i = 0; i < n; i++) {
String line = br.readLine();
if (line == null) {
throw new SeqMonkException("Ran out of visible store data at line " + i + " (expected " + n + " stores)");
}
String[] storeSections = line.split("\\t");
if (storeSections.length != 2) {
throw new SeqMonkException("Expected 2 sections in visible store line but got " + storeSections.length);
}
if (storeSections[1].equals("set")) {
for (int s = 0; s < sets.length; s++) {
if (sets[s].name().equals(storeSections[0])) {
drawnStores[i] = sets[s];
break;
}
}
} else if (storeSections[1].equals("group")) {
for (int g = 0; g < groups.length; g++) {
if (groups[g].name().equals(storeSections[0])) {
drawnStores[i] = groups[g];
break;
}
}
} else {
throw new SeqMonkException("Didn't recognise data type '" + storeSections[1] + "' when adding visible stores from line '" + line + "'");
}
}
} else {
for (int i = 0; i < n; i++) {
String line = br.readLine();
if (line == null) {
throw new SeqMonkException("Ran out of visible store data at line " + i + " (expected " + n + " stores)");
}
String[] storeSections = line.split("\\t");
if (storeSections.length != 2) {
throw new SeqMonkException("Expected 2 sections in visible store line but got " + storeSections.length);
}
if (storeSections[1].equals("set")) {
drawnStores[i] = application.dataCollection().getDataSet(Integer.parseInt(storeSections[0]));
} else if (storeSections[1].equals("group")) {
drawnStores[i] = application.dataCollection().getDataGroup(Integer.parseInt(storeSections[0]));
} else if (storeSections[1].equals("replicate")) {
drawnStores[i] = application.dataCollection().getReplicateSet(Integer.parseInt(storeSections[0]));
} else {
throw new SeqMonkException("Didn't recognise data type '" + storeSections[1] + "' when adding visible stores from line '" + line + "'");
}
}
}
application.addToDrawnDataStores(drawnStores);
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SeqMonkParser method parseProbes.
/**
* Parses the list of probes.
*
* @param sections The tab split initial line from the probes section
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private void parseProbes(String[] sections) throws SeqMonkException, IOException {
if (sections.length < 2) {
throw new SeqMonkException("Probe line didn't contain at least 2 sections");
}
if (!sections[0].equals("Probes")) {
throw new SeqMonkException("Couldn't find expected probes line");
}
int n = Integer.parseInt(sections[1]);
probes = new Probe[n];
String description = "No generator description available";
if (sections.length > 2) {
description = sections[2];
}
ProbeSet probeSet = new ProbeSet(description, n);
if (sections.length > 3) {
if (sections[3].length() > 0) {
probeSet.setCurrentQuantitation(sections[3]);
}
}
if (sections.length > 4) {
probeSet.setComments(sections[4].replaceAll("`", "\n"));
}
// We need to save the probeset to the dataset at this point so we can add the probe
// lists as we get to them.
application.dataCollection().setProbeSet(probeSet);
int positionOffset;
// We used to store chr start and end
if (thisDataVersion < 8) {
positionOffset = 4;
} else // We now store chr and packed position (to give start end and strand)
{
positionOffset = 3;
}
int expectedSectionLength = 3 + dataSets.length + dataGroups.length;
String line;
for (int i = 0; i < n; i++) {
line = br.readLine();
if (line == null) {
throw new SeqMonkException("Ran out of probe data at line " + i + " (expected " + n + " probes)");
}
// Since the probes section can have blank trailing sections we need
// to not trim these, hence the -1 limit.
sections = line.split("\\t", -1);
if (i == 0) {
/*
* Older versions of this format put down data for just
* datasets. Newer versions include data for datagroups
* as well. We need to figure out which one we're looking
* at
*/
if (sections.length == positionOffset + dataSets.length) {
expectedSectionLength = positionOffset + dataSets.length;
} else if (sections.length == positionOffset + dataSets.length + dataGroups.length) {
expectedSectionLength = positionOffset + dataSets.length + dataGroups.length;
}
}
if (sections.length != expectedSectionLength) {
throw new SeqMonkException("Expected " + expectedSectionLength + " sections in data file for " + sections[0] + " but got " + sections.length);
}
if (i % 10000 == 0) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Processed data for " + i + " probes", i, n);
}
}
Chromosome c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
// Sanity check
if (c == null) {
throw new SeqMonkException("Couldn't find a chromosome called " + sections[1]);
}
Probe p;
if (thisDataVersion < 8) {
int start = Integer.parseInt(sections[2]);
int end = Integer.parseInt(sections[3]);
p = new Probe(c, start, end);
} else {
long packedValue = Long.parseLong(sections[2]);
p = new Probe(c, packedValue);
}
if (!sections[0].equals("null")) {
p.setName(sections[0]);
}
probes[i] = p;
probeSet.addProbe(probes[i], null);
for (int j = positionOffset; j < sections.length; j++) {
if (sections[j].length() == 0)
continue;
if ((j - positionOffset) >= dataSets.length) {
dataGroups[j - (positionOffset + dataSets.length)].setValueForProbe(p, Float.parseFloat(sections[j]));
} else {
dataSets[j - positionOffset].setValueForProbe(p, Float.parseFloat(sections[j]));
}
}
}
application.dataCollection().activeProbeListChanged(probeSet);
// This rename doesn't actually change the name. We put this in because
// the All Probes group is drawn in the data view before probes have been
// added to it. This means that it's name isn't updated when the probes have
// been added and it appears labelled with 0 probes. This doesn't happen if
// there are any probe lists under all probes as they cause it to be refreshed,
// but if you only have the probe set then you need this to make the display show
// the correct information.
probeSet.setName("All Probes");
}
Aggregations