Search in sources :

Example 1 with ProgressListener

use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.

the class GenomeParser method reloadCacheFiles.

private void reloadCacheFiles(SingleGenome genome, File baseLocation) {
    Enumeration<ProgressListener> el = listeners.elements();
    while (el.hasMoreElements()) {
        el.nextElement().progressUpdated("Reloading cache files", 0, 1);
    }
    CoreAnnotationSet coreAnnotation = new CoreAnnotationSet(genome);
    File cacheDir = new File(baseLocation.getAbsoluteFile() + "/cache/");
    // First we need to get the list of chromosomes and set those
    // up before we go on to add the actual feature sets.
    File chrListFile = new File(baseLocation.getAbsoluteFile() + "/cache/chr_list");
    try {
        BufferedReader br = new BufferedReader(new FileReader(chrListFile));
        String line;
        while ((line = br.readLine()) != null) {
            String[] chrLen = line.split("\\t");
            Chromosome c = genome.addChromosome(chrLen[0]);
            c.setLength(Integer.parseInt(chrLen[1]));
        }
        br.close();
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
    File[] cacheFiles = cacheDir.listFiles(new FileFilter() {

        public boolean accept(File pathname) {
            return pathname.getName().toLowerCase().endsWith(".cache");
        }
    });
    for (int i = 0; i < cacheFiles.length; i++) {
        // Update the listeners
        String name = cacheFiles[i].getName();
        name = name.replaceAll("\\.cache$", "");
        String[] chrType = name.split("%", 2);
        if (chrType.length != 2) {
            throw new IllegalStateException("Cache name '" + name + "' didn't split into chr and type");
        }
        // If the feature name had a forward slash in it we've replaced it with 3 underscores
        chrType[1] = chrType[1].replaceAll("___", "/");
        coreAnnotation.addPreCachedFile(chrType[1], chrType[0], cacheFiles[i]);
    }
    genome.annotationCollection().addAnnotationSets(new AnnotationSet[] { coreAnnotation });
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) IOException(java.io.IOException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) CoreAnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.CoreAnnotationSet) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) FileFilter(java.io.FileFilter) File(java.io.File)

Example 2 with ProgressListener

use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.

the class GenomeParser method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    try {
        singleGenomes = new SingleGenome[baseLocations.length];
        for (int i = 0; i < baseLocations.length; i++) {
            singleGenomes[i] = new SingleGenome(baseLocations[i]);
        }
    } catch (SeqMonkException ex) {
        Enumeration<ProgressListener> en = listeners.elements();
        while (en.hasMoreElements()) {
            en.nextElement().progressExceptionReceived(ex);
            return;
        }
    }
    for (int g = 0; g < singleGenomes.length; g++) {
        File cacheCompleteFile = new File(baseLocations[g].getAbsoluteFile() + "/cache/cache.complete");
        if (cacheCompleteFile.exists()) {
            boolean cacheFailed = false;
            try {
                // Check the version inside the cache.complete file
                BufferedReader br = new BufferedReader(new FileReader(cacheCompleteFile));
                String line = br.readLine();
                br.close();
                if (line == null || line.length() == 0) {
                    // If there's no version in there then re-parse
                    cacheFailed = true;
                }
                // We re-parse if the cache was made by a different version
                if (!SeqMonkApplication.VERSION.equals(line)) {
                    System.err.println("Version mismatch between cache ('" + line + "') and current version ('" + SeqMonkApplication.VERSION + "') - reparsing");
                    cacheFailed = true;
                }
            } catch (IOException ioe) {
                cacheFailed = true;
            }
            // Check to see if the .dat files have changed since the cache
            // file was saved
            File[] files = baseLocations[g].listFiles(new FileFilter() {

                public boolean accept(File f) {
                    if (f.getName().toLowerCase().endsWith(".dat") || f.getName().toLowerCase().endsWith(".gff") || f.getName().toLowerCase().endsWith(".gff3") || f.getName().toLowerCase().endsWith(".gtf") || f.getName().toLowerCase().endsWith(".gff.gz") || f.getName().toLowerCase().endsWith(".gff3.gz") || f.getName().toLowerCase().endsWith(".gtf.gz")) {
                        return true;
                    } else {
                        return false;
                    }
                }
            });
            boolean datFilesUpdated = false;
            for (int f = 0; f < files.length; f++) {
                if (files[f].lastModified() > cacheCompleteFile.lastModified()) {
                    System.err.println("Modification on " + files[f] + " is newer than on " + cacheCompleteFile + " " + files[f].lastModified() + " vs " + cacheCompleteFile.lastModified());
                    datFilesUpdated = true;
                    break;
                }
            }
            if (cacheFailed || datFilesUpdated) {
                if (!cacheCompleteFile.delete()) {
                    System.err.println("Failed to delete the existing cache.complete file");
                }
                // System.err.println("Dat files updated - reparsing");
                parseGenomeFiles(singleGenomes[g], baseLocations[g]);
            } else {
                reloadCacheFiles(singleGenomes[g], baseLocations[g]);
            }
        } else {
            System.err.println("File '" + cacheCompleteFile + "' doesn't exist - reparsing");
            parseGenomeFiles(singleGenomes[g], baseLocations[g]);
        }
        File aliasesFile = new File(baseLocations[g].getAbsoluteFile() + "/aliases.txt");
        if (aliasesFile.exists()) {
            try {
                readAliases(aliasesFile, singleGenomes[g]);
            } catch (IOException e) {
                throw new IllegalStateException(e);
            }
        }
    }
    Genome genomeToReturn;
    if (singleGenomes.length == 1) {
        genomeToReturn = singleGenomes[0];
    } else {
        genomeToReturn = new MultiGenome(singleGenomes);
    }
    Enumeration<ProgressListener> en = listeners.elements();
    while (en.hasMoreElements()) {
        en.nextElement().progressComplete("load_genome", genomeToReturn);
    }
}
Also used : SingleGenome(uk.ac.babraham.SeqMonk.DataTypes.Genome.SingleGenome) Enumeration(java.util.Enumeration) MultiGenome(uk.ac.babraham.SeqMonk.DataTypes.Genome.MultiGenome) IOException(java.io.IOException) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) FileFilter(java.io.FileFilter) SingleGenome(uk.ac.babraham.SeqMonk.DataTypes.Genome.SingleGenome) Genome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Genome) MultiGenome(uk.ac.babraham.SeqMonk.DataTypes.Genome.MultiGenome) File(java.io.File)

Example 3 with ProgressListener

use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.

the class GenomeParser method parseGenomeFiles.

private void parseGenomeFiles(SingleGenome genome, File baseLocation) {
    // which defines the size and extent of the chromosomes
    try {
        parseChrListFile(genome, baseLocation);
    } catch (Exception ex) {
        Enumeration<ProgressListener> en = listeners.elements();
        while (en.hasMoreElements()) {
            en.nextElement().progressExceptionReceived(ex);
        }
        return;
    }
    // We need a list of all of the .dat files inside the baseLocation
    File[] files = baseLocation.listFiles(new FileFilter() {

        public boolean accept(File f) {
            if (f.getName().toLowerCase().endsWith(".dat")) {
                return true;
            } else {
                return false;
            }
        }
    });
    AnnotationSet coreAnnotation = new CoreAnnotationSet(genome);
    for (int i = 0; i < files.length; i++) {
        // Update the listeners
        Enumeration<ProgressListener> e = listeners.elements();
        while (e.hasMoreElements()) {
            e.nextElement().progressUpdated("Loading Genome File " + files[i].getName(), i, files.length);
        }
        try {
            processEMBLFile(files[i], coreAnnotation, genome);
        } catch (Exception ex) {
            Enumeration<ProgressListener> en = listeners.elements();
            while (en.hasMoreElements()) {
                en.nextElement().progressExceptionReceived(ex);
            }
            return;
        }
    }
    // Update the listeners
    Enumeration<ProgressListener> e = listeners.elements();
    while (e.hasMoreElements()) {
        e.nextElement().progressUpdated("Caching annotation data", 1, 1);
    }
    // Now do the same thing for gff files.
    // We need a list of all of the .gff/gtf files inside the baseLocation
    files = baseLocation.listFiles(new FileFilter() {

        public boolean accept(File f) {
            if (f.getName().toLowerCase().endsWith(".gff") || f.getName().toLowerCase().endsWith(".gtf") || f.getName().toLowerCase().endsWith(".gff3") || f.getName().toLowerCase().endsWith(".gff.gz") || f.getName().toLowerCase().endsWith(".gtf.gz") || f.getName().toLowerCase().endsWith(".gff3.gz")) {
                return true;
            } else {
                return false;
            }
        }
    });
    GFF3AnnotationParser gffParser = new GFF3AnnotationParser(genome);
    for (int i = 0; i < files.length; i++) {
        // System.err.println("Parsing "+files[i]);
        // Update the listeners
        e = listeners.elements();
        while (e.hasMoreElements()) {
            e.nextElement().progressUpdated("Loading Genome File " + files[i].getName(), i, files.length);
        }
        try {
            AnnotationSet[] newSets = gffParser.parseAnnotation(files[i], genome, "");
            for (int s = 0; s < newSets.length; s++) {
                Feature[] features = newSets[s].getAllFeatures();
                for (int f = 0; f < features.length; f++) {
                    coreAnnotation.addFeature(features[f]);
                }
            }
        } catch (Exception ex) {
            Enumeration<ProgressListener> en = listeners.elements();
            while (en.hasMoreElements()) {
                en.nextElement().progressExceptionReceived(ex);
            }
            return;
        }
    }
    // Update the listeners
    e = listeners.elements();
    while (e.hasMoreElements()) {
        e.nextElement().progressUpdated("Caching annotation data", 1, 1);
    }
    genome.annotationCollection().addAnnotationSets(new AnnotationSet[] { coreAnnotation });
// Debugging - put out some stats
// System.err.println("Made genome with "+genome.getAllChromosomes().length+" chromosomes");
// System.err.println("There are "+genome.annotationCollection().listAvailableFeatureTypes().length+" different feature types");
}
Also used : Enumeration(java.util.Enumeration) AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) CoreAnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.CoreAnnotationSet) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) IOException(java.io.IOException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) CoreAnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.CoreAnnotationSet) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) FileFilter(java.io.FileFilter) File(java.io.File)

Example 4 with ProgressListener

use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.

the class SeqMonkParser method parseLists.

/**
 * Parses the set of probe lists.
 *
 * @param sections The tab split initial line from the probe lists section
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void parseLists(String[] sections) throws SeqMonkException, IOException {
    if (sections.length != 2) {
        throw new SeqMonkException("Probe Lists line didn't contain 2 sections");
    }
    int n = Integer.parseInt(sections[1]);
    ProbeList[] lists = new ProbeList[n];
    // We also store the probe lists in their appropriate linkage position
    // to recreate the links between probe lists.  The worst case scenario
    // is that we have one big chain of linked lists so we make a linkage
    // list which is the same size as the number of probe lists.
    ProbeList[] linkage = new ProbeList[n + 1];
    // The 0 linkage list will always be the full ProbeSet
    linkage[0] = application.dataCollection().probeSet();
    for (int i = 0; i < n; i++) {
        String line = br.readLine();
        if (line == null) {
            throw new SeqMonkException("Ran out of probe data at line " + i + " (expected " + n + " probes)");
        }
        String[] listSections = line.split("\\t", -1);
        // we allow for that not being present.
        if (thisDataVersion < 5) {
            lists[i] = new ProbeList(application.dataCollection().probeSet(), listSections[0], "", listSections[1]);
            if (listSections.length > 2) {
                lists[i].setDescription(listSections[2]);
            } else {
                lists[i].setDescription("No description");
            }
        } else {
            lists[i] = new ProbeList(linkage[Integer.parseInt(listSections[0]) - 1], listSections[1], listSections[3], listSections[2]);
            if (listSections.length > 4) {
                lists[i].setComments(listSections[4].replaceAll("`", "\n"));
            }
            linkage[Integer.parseInt(listSections[0])] = lists[i];
        }
    }
    // Next we reach the probe list data.  These comes as a long list of values
    // the first of which is the probe name, then either a numerical value if
    // the probe is contained in that list, or a blank if it isn't.
    String line = br.readLine();
    if (line == null) {
        throw new SeqMonkException("Couldn't find probe line for list data");
    }
    sections = line.split("\\t");
    if (sections.length != 2) {
        throw new SeqMonkException("Probe line didn't contain 2 sections");
    }
    if (!sections[0].equals("Probes")) {
        throw new SeqMonkException("Couldn't find expected probe lists probe line");
    }
    n = Integer.parseInt(sections[1]);
    for (int i = 0; i < n; i++) {
        sections = br.readLine().split("\\t", -1);
        if (sections.length != lists.length + 1) {
            throw new SeqMonkException("Expected " + (lists.length + 1) + " sections in probe list section of data file for " + sections[0] + " but got " + sections.length);
        }
        if (i % 1000 == 0) {
            Enumeration<ProgressListener> e = listeners.elements();
            while (e.hasMoreElements()) {
                e.nextElement().progressUpdated("Processed list data for " + i + " probes", i, n);
            }
        }
        Probe p = probes[i];
        if (p == null) {
            continue;
        }
        for (int j = 0; j < lists.length; j++) {
            if (sections[j + 1].length() > 0) {
                if (sections[j + 1].equals("NaN")) {
                    lists[j].addProbe(p, null);
                } else {
                    lists[j].addProbe(p, new Float(Float.parseFloat(sections[j + 1])));
                }
            }
        }
    }
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 5 with ProgressListener

use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.

the class PCA method main.

public static void main(String[] args) {
    double[][] matrix = new double[][] { { 2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2, 1, 1.5, 1.1 }, { 2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9 } };
    // double [][] matrix = new double [100][100];
    // 
    // for (int i=0;i<matrix.length;i++) {
    // for (int j=i;j<matrix.length;j++) {
    // if (j==i) {
    // matrix[i][j] = 1;
    // }
    // else {
    // double corr = Math.random();
    // matrix[i][j] = corr;
    // matrix[j][i] = corr;
    // }
    // }
    // }
    PCA pca = new PCA(matrix);
    pca.addProgressListener(new ProgressListener() {

        public void progressWarningReceived(Exception e) {
            e.printStackTrace();
        }

        public void progressUpdated(String message, int current, int max) {
            System.out.println(message);
        }

        public void progressExceptionReceived(Exception e) {
            e.printStackTrace();
        }

        public void progressComplete(String command, Object result) {
            System.out.println("Complete");
        }

        public void progressCancelled() {
            System.err.println("Cancelled");
        }
    });
    pca.startCalculating();
    while (pca.extractedEigenValues == null) {
        try {
            Thread.sleep(100);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    for (int i = 0; i < pca.extractedEigenValues.length; i++) {
        System.out.println("Eigenvalue for " + i + " is " + pca.extractedEigenValues[i]);
    }
}
Also used : ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)

Aggregations

ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)18 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)12 IOException (java.io.IOException)10 Enumeration (java.util.Enumeration)7 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)7 File (java.io.File)6 BufferedReader (java.io.BufferedReader)4 FileNotFoundException (java.io.FileNotFoundException)4 FileReader (java.io.FileReader)4 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)4 FileFilter (java.io.FileFilter)3 AnnotationSet (uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet)3 CoreAnnotationSet (uk.ac.babraham.SeqMonk.DataTypes.Genome.CoreAnnotationSet)3 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)3 BufferedOutputStream (java.io.BufferedOutputStream)2 FileOutputStream (java.io.FileOutputStream)2 PrintWriter (java.io.PrintWriter)2 UnknownHostException (java.net.UnknownHostException)2 Vector (java.util.Vector)2 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)2