Search in sources :

Example 51 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SeqMonkParser method parseAnnotation.

/**
 * Parses an external set of annotations
 *
 * @param sections The tab split initial annotation line
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private AnnotationSet parseAnnotation(String[] sections) throws SeqMonkException, IOException {
    if (sections.length != 3) {
        throw new SeqMonkException("Annotation line didn't contain 3 sections");
    }
    AnnotationSet set = new AnnotationSet(application.dataCollection().genome(), sections[1]);
    int featureCount = Integer.parseInt(sections[2]);
    for (int i = 0; i < featureCount; i++) {
        if (i % 1000 == 0) {
            progressUpdated("Parsing annotation in " + set.name(), i, featureCount);
        }
        sections = br.readLine().split("\\t");
        Chromosome c;
        try {
            c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
        } catch (Exception sme) {
            Enumeration<ProgressListener> e = listeners.elements();
            while (e.hasMoreElements()) {
                e.nextElement().progressWarningReceived(new SeqMonkException("Annotation feature could not be mapped to chromosome '" + sections[1] + "'"));
            }
            continue;
        }
        Feature f = new Feature(sections[0], c.name());
        // TODO: Can we improve this to not use a Split Location each time?
        f.setLocation(new SplitLocation(sections[2]));
        for (int a = 3; a + 1 < sections.length; a += 2) {
            f.addAttribute(sections[a], sections[a + 1]);
        }
        set.addFeature(f);
    }
    set.finalise();
    return set;
}
Also used : Enumeration(java.util.Enumeration) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) FileNotFoundException(java.io.FileNotFoundException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 52 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SeqMonkParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    genomeLoaded = false;
    exceptionReceived = null;
    try {
        String line;
        String[] sections;
        Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
        while ((line = br.readLine()) != null) {
            sections = line.split("\\t");
            // Now we look where to send this...
            if (sections[0].equals("SeqMonk Data Version")) {
                parseDataVersion(sections);
            } else if (sections[0].equals("Features")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                parseFeatures(sections);
            } else if (sections[0].equals("Samples")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                parseSamples(sections);
            } else if (sections[0].equals("Annotation")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                annotationSets.add(parseAnnotation(sections));
            } else if (sections[0].equals("Data Groups")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                try {
                    parseGroups(sections);
                } catch (SeqMonkException ex) {
                    if (ex.getMessage().contains("ambiguous")) {
                        Enumeration<ProgressListener> e = listeners.elements();
                        while (e.hasMoreElements()) {
                            e.nextElement().progressWarningReceived(ex);
                        }
                    } else {
                        throw ex;
                    }
                }
            } else if (sections[0].equals("Replicate Sets")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                parseReplicates(sections);
            } else if (sections[0].equals("Probes")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                parseProbes(sections);
            } else if (sections[0].equals("Lists")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                parseLists(sections);
            } else if (sections[0].equals("Genome")) {
                if (forcedAssembly) {
                    genomeLoaded = true;
                    continue;
                }
                parseGenome(sections);
                if (exceptionReceived != null) {
                    Enumeration<ProgressListener> e = listeners.elements();
                    while (e.hasMoreElements()) {
                        // In this case we put out a dummy empty dataset since
                        // we've already entered the data into the collection by now
                        e.nextElement().progressCancelled();
                    }
                    return;
                }
            } else if (sections[0].equals("Visible Stores")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                parseVisibleStores(sections);
            } else if (sections[0].equals("Display Preferences")) {
                if (!genomeLoaded) {
                    throw new SeqMonkException("No genome definition found before data");
                }
                // Add any annotation sets we've parsed at this point
                application.dataCollection().genome().annotationCollection().addAnnotationSets(annotationSets.toArray(new AnnotationSet[0]));
                parseDisplayPreferences(sections);
            } else {
                throw new SeqMonkException("Didn't recognise section '" + sections[0] + "' in seqmonk file");
            }
        }
        // We're finished with the file
        br.close();
        cleanUpFeatureTracks();
    } catch (Exception ex) {
        Enumeration<ProgressListener> e = listeners.elements();
        while (e.hasMoreElements()) {
            e.nextElement().progressExceptionReceived(ex);
        }
        try {
            br.close();
        } catch (IOException e1) {
            throw new IllegalStateException(e1);
        }
        return;
    }
    Enumeration<ProgressListener> e = listeners.elements();
    while (e.hasMoreElements()) {
        // In this case we put out a dummy empty dataset since
        // we've already entered the data into the collection by now
        e.nextElement().progressComplete("datasets_loaded", new DataSet[0]);
    }
    application.resetChangesWereMade();
}
Also used : Enumeration(java.util.Enumeration) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) IOException(java.io.IOException) Vector(java.util.Vector) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) FileNotFoundException(java.io.FileNotFoundException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 53 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SeqMonkParser method parseReplicates.

/**
 * Parses the list of replicate sets.
 *
 * @param sections The tab split values from the initial replicates line
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void parseReplicates(String[] sections) throws SeqMonkException, IOException {
    if (sections.length != 2) {
        throw new SeqMonkException("Data Groups line didn't contain 2 sections");
    }
    if (!sections[0].equals("Replicate Sets")) {
        throw new SeqMonkException("Couldn't find expected replicates line");
    }
    int n = Integer.parseInt(sections[1]);
    for (int i = 0; i < n; i++) {
        String[] replicateLine = br.readLine().split("\\t");
        DataStore[] groupMembers = new DataStore[replicateLine.length - 1];
        for (int j = 1; j < replicateLine.length; j++) {
            if (replicateLine[j].startsWith("g")) {
                replicateLine[j] = replicateLine[j].substring(1);
                groupMembers[j - 1] = application.dataCollection().getDataGroup(Integer.parseInt(replicateLine[j]));
            } else if (replicateLine[j].startsWith("s")) {
                replicateLine[j] = replicateLine[j].substring(1);
                groupMembers[j - 1] = application.dataCollection().getDataSet(Integer.parseInt(replicateLine[j]));
            } else {
                throw new SeqMonkException("Replicate member id " + replicateLine[j] + " didn't start with g or s");
            }
            if (groupMembers[j - 1] == null) {
                throw new SeqMonkException("Couldn't find replicate member from position " + replicateLine[j]);
            }
        }
        ReplicateSet r = new ReplicateSet(replicateLine[0], groupMembers);
        application.dataCollection().addReplicateSet(r);
    }
}
Also used : DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) ReplicateSet(uk.ac.babraham.SeqMonk.DataTypes.ReplicateSet) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 54 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SeqMonkParser method parseVisibleStores.

/**
 * Parses the list of dataStores which should initially be visible
 *
 * @param sections The tab split initial line from the visible stores section
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void parseVisibleStores(String[] sections) throws SeqMonkException, IOException {
    if (sections.length != 2) {
        throw new SeqMonkException("Visible stores line didn't contain 2 sections");
    }
    int n = Integer.parseInt(sections[1]);
    /*
		 * Collect the drawn stores in an array.  We used to add them as we found
		 * them but this was inefficient since we had to redo a calculation for
		 * every one we added.  This way we only need to calculate once.
		 */
    DataStore[] drawnStores = new DataStore[n];
    if (thisDataVersion < 4) {
        // In the bad old days we used to refer to datasets and stores by name
        // which caused problems when names were duplicated.  We do our best
        // with these cases.
        DataSet[] sets = application.dataCollection().getAllDataSets();
        DataGroup[] groups = application.dataCollection().getAllDataGroups();
        for (int i = 0; i < n; i++) {
            String line = br.readLine();
            if (line == null) {
                throw new SeqMonkException("Ran out of visible store data at line " + i + " (expected " + n + " stores)");
            }
            String[] storeSections = line.split("\\t");
            if (storeSections.length != 2) {
                throw new SeqMonkException("Expected 2 sections in visible store line but got " + storeSections.length);
            }
            if (storeSections[1].equals("set")) {
                for (int s = 0; s < sets.length; s++) {
                    if (sets[s].name().equals(storeSections[0])) {
                        drawnStores[i] = sets[s];
                        break;
                    }
                }
            } else if (storeSections[1].equals("group")) {
                for (int g = 0; g < groups.length; g++) {
                    if (groups[g].name().equals(storeSections[0])) {
                        drawnStores[i] = groups[g];
                        break;
                    }
                }
            } else {
                throw new SeqMonkException("Didn't recognise data type '" + storeSections[1] + "' when adding visible stores from line '" + line + "'");
            }
        }
    } else {
        for (int i = 0; i < n; i++) {
            String line = br.readLine();
            if (line == null) {
                throw new SeqMonkException("Ran out of visible store data at line " + i + " (expected " + n + " stores)");
            }
            String[] storeSections = line.split("\\t");
            if (storeSections.length != 2) {
                throw new SeqMonkException("Expected 2 sections in visible store line but got " + storeSections.length);
            }
            if (storeSections[1].equals("set")) {
                drawnStores[i] = application.dataCollection().getDataSet(Integer.parseInt(storeSections[0]));
            } else if (storeSections[1].equals("group")) {
                drawnStores[i] = application.dataCollection().getDataGroup(Integer.parseInt(storeSections[0]));
            } else if (storeSections[1].equals("replicate")) {
                drawnStores[i] = application.dataCollection().getReplicateSet(Integer.parseInt(storeSections[0]));
            } else {
                throw new SeqMonkException("Didn't recognise data type '" + storeSections[1] + "' when adding visible stores from line '" + line + "'");
            }
        }
    }
    application.addToDrawnDataStores(drawnStores);
}
Also used : DataGroup(uk.ac.babraham.SeqMonk.DataTypes.DataGroup) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 55 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SeqMonkParser method parseProbes.

/**
 * Parses the list of probes.
 *
 * @param sections The tab split initial line from the probes section
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void parseProbes(String[] sections) throws SeqMonkException, IOException {
    if (sections.length < 2) {
        throw new SeqMonkException("Probe line didn't contain at least 2 sections");
    }
    if (!sections[0].equals("Probes")) {
        throw new SeqMonkException("Couldn't find expected probes line");
    }
    int n = Integer.parseInt(sections[1]);
    probes = new Probe[n];
    String description = "No generator description available";
    if (sections.length > 2) {
        description = sections[2];
    }
    ProbeSet probeSet = new ProbeSet(description, n);
    if (sections.length > 3) {
        if (sections[3].length() > 0) {
            probeSet.setCurrentQuantitation(sections[3]);
        }
    }
    if (sections.length > 4) {
        probeSet.setComments(sections[4].replaceAll("`", "\n"));
    }
    // We need to save the probeset to the dataset at this point so we can add the probe
    // lists as we get to them.
    application.dataCollection().setProbeSet(probeSet);
    int positionOffset;
    // We used to store chr start and end
    if (thisDataVersion < 8) {
        positionOffset = 4;
    } else // We now store chr and packed position (to give start end and strand)
    {
        positionOffset = 3;
    }
    int expectedSectionLength = 3 + dataSets.length + dataGroups.length;
    String line;
    for (int i = 0; i < n; i++) {
        line = br.readLine();
        if (line == null) {
            throw new SeqMonkException("Ran out of probe data at line " + i + " (expected " + n + " probes)");
        }
        // Since the probes section can have blank trailing sections we need
        // to not trim these, hence the -1 limit.
        sections = line.split("\\t", -1);
        if (i == 0) {
            /*
				 * Older versions of this format put down data for just
				 * datasets.  Newer versions include data for datagroups
				 * as well.  We need to figure out which one we're looking
				 * at
				 */
            if (sections.length == positionOffset + dataSets.length) {
                expectedSectionLength = positionOffset + dataSets.length;
            } else if (sections.length == positionOffset + dataSets.length + dataGroups.length) {
                expectedSectionLength = positionOffset + dataSets.length + dataGroups.length;
            }
        }
        if (sections.length != expectedSectionLength) {
            throw new SeqMonkException("Expected " + expectedSectionLength + " sections in data file for " + sections[0] + " but got " + sections.length);
        }
        if (i % 10000 == 0) {
            Enumeration<ProgressListener> e = listeners.elements();
            while (e.hasMoreElements()) {
                e.nextElement().progressUpdated("Processed data for " + i + " probes", i, n);
            }
        }
        Chromosome c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
        // Sanity check
        if (c == null) {
            throw new SeqMonkException("Couldn't find a chromosome called " + sections[1]);
        }
        Probe p;
        if (thisDataVersion < 8) {
            int start = Integer.parseInt(sections[2]);
            int end = Integer.parseInt(sections[3]);
            p = new Probe(c, start, end);
        } else {
            long packedValue = Long.parseLong(sections[2]);
            p = new Probe(c, packedValue);
        }
        if (!sections[0].equals("null")) {
            p.setName(sections[0]);
        }
        probes[i] = p;
        probeSet.addProbe(probes[i], null);
        for (int j = positionOffset; j < sections.length; j++) {
            if (sections[j].length() == 0)
                continue;
            if ((j - positionOffset) >= dataSets.length) {
                dataGroups[j - (positionOffset + dataSets.length)].setValueForProbe(p, Float.parseFloat(sections[j]));
            } else {
                dataSets[j - positionOffset].setValueForProbe(p, Float.parseFloat(sections[j]));
            }
        }
    }
    application.dataCollection().activeProbeListChanged(probeSet);
    // This rename doesn't actually change the name.  We put this in because
    // the All Probes group is drawn in the data view before probes have been
    // added to it.  This means that it's name isn't updated when the probes have
    // been added and it appears labelled with 0 probes.  This doesn't happen if
    // there are any probe lists under all probes as they cause it to be refreshed,
    // but if you only have the probe set then you need this to make the display show
    // the correct information.
    probeSet.setName("All Probes");
}
Also used : ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Aggregations

SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)91 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)49 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)30 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)22 Vector (java.util.Vector)21 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)20 File (java.io.File)19 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)17 BufferedReader (java.io.BufferedReader)16 FileReader (java.io.FileReader)16 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)14 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)13 FileInputStream (java.io.FileInputStream)11 IOException (java.io.IOException)11 InputStreamReader (java.io.InputStreamReader)11 GZIPInputStream (java.util.zip.GZIPInputStream)11 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)8 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)8 FileNotFoundException (java.io.FileNotFoundException)7 SequenceReadWithChromosome (uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)7