use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.
the class GenomeParser method reloadCacheFiles.
private void reloadCacheFiles(SingleGenome genome, File baseLocation) {
Enumeration<ProgressListener> el = listeners.elements();
while (el.hasMoreElements()) {
el.nextElement().progressUpdated("Reloading cache files", 0, 1);
}
CoreAnnotationSet coreAnnotation = new CoreAnnotationSet(genome);
File cacheDir = new File(baseLocation.getAbsoluteFile() + "/cache/");
// First we need to get the list of chromosomes and set those
// up before we go on to add the actual feature sets.
File chrListFile = new File(baseLocation.getAbsoluteFile() + "/cache/chr_list");
try {
BufferedReader br = new BufferedReader(new FileReader(chrListFile));
String line;
while ((line = br.readLine()) != null) {
String[] chrLen = line.split("\\t");
Chromosome c = genome.addChromosome(chrLen[0]);
c.setLength(Integer.parseInt(chrLen[1]));
}
br.close();
} catch (Exception e) {
throw new IllegalStateException(e);
}
File[] cacheFiles = cacheDir.listFiles(new FileFilter() {
public boolean accept(File pathname) {
return pathname.getName().toLowerCase().endsWith(".cache");
}
});
for (int i = 0; i < cacheFiles.length; i++) {
// Update the listeners
String name = cacheFiles[i].getName();
name = name.replaceAll("\\.cache$", "");
String[] chrType = name.split("%", 2);
if (chrType.length != 2) {
throw new IllegalStateException("Cache name '" + name + "' didn't split into chr and type");
}
// If the feature name had a forward slash in it we've replaced it with 3 underscores
chrType[1] = chrType[1].replaceAll("___", "/");
coreAnnotation.addPreCachedFile(chrType[1], chrType[0], cacheFiles[i]);
}
genome.annotationCollection().addAnnotationSets(new AnnotationSet[] { coreAnnotation });
}
use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.
the class GenomeParser method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
try {
singleGenomes = new SingleGenome[baseLocations.length];
for (int i = 0; i < baseLocations.length; i++) {
singleGenomes[i] = new SingleGenome(baseLocations[i]);
}
} catch (SeqMonkException ex) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressExceptionReceived(ex);
return;
}
}
for (int g = 0; g < singleGenomes.length; g++) {
File cacheCompleteFile = new File(baseLocations[g].getAbsoluteFile() + "/cache/cache.complete");
if (cacheCompleteFile.exists()) {
boolean cacheFailed = false;
try {
// Check the version inside the cache.complete file
BufferedReader br = new BufferedReader(new FileReader(cacheCompleteFile));
String line = br.readLine();
br.close();
if (line == null || line.length() == 0) {
// If there's no version in there then re-parse
cacheFailed = true;
}
// We re-parse if the cache was made by a different version
if (!SeqMonkApplication.VERSION.equals(line)) {
System.err.println("Version mismatch between cache ('" + line + "') and current version ('" + SeqMonkApplication.VERSION + "') - reparsing");
cacheFailed = true;
}
} catch (IOException ioe) {
cacheFailed = true;
}
// Check to see if the .dat files have changed since the cache
// file was saved
File[] files = baseLocations[g].listFiles(new FileFilter() {
public boolean accept(File f) {
if (f.getName().toLowerCase().endsWith(".dat") || f.getName().toLowerCase().endsWith(".gff") || f.getName().toLowerCase().endsWith(".gff3") || f.getName().toLowerCase().endsWith(".gtf") || f.getName().toLowerCase().endsWith(".gff.gz") || f.getName().toLowerCase().endsWith(".gff3.gz") || f.getName().toLowerCase().endsWith(".gtf.gz")) {
return true;
} else {
return false;
}
}
});
boolean datFilesUpdated = false;
for (int f = 0; f < files.length; f++) {
if (files[f].lastModified() > cacheCompleteFile.lastModified()) {
System.err.println("Modification on " + files[f] + " is newer than on " + cacheCompleteFile + " " + files[f].lastModified() + " vs " + cacheCompleteFile.lastModified());
datFilesUpdated = true;
break;
}
}
if (cacheFailed || datFilesUpdated) {
if (!cacheCompleteFile.delete()) {
System.err.println("Failed to delete the existing cache.complete file");
}
// System.err.println("Dat files updated - reparsing");
parseGenomeFiles(singleGenomes[g], baseLocations[g]);
} else {
reloadCacheFiles(singleGenomes[g], baseLocations[g]);
}
} else {
System.err.println("File '" + cacheCompleteFile + "' doesn't exist - reparsing");
parseGenomeFiles(singleGenomes[g], baseLocations[g]);
}
File aliasesFile = new File(baseLocations[g].getAbsoluteFile() + "/aliases.txt");
if (aliasesFile.exists()) {
try {
readAliases(aliasesFile, singleGenomes[g]);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
}
Genome genomeToReturn;
if (singleGenomes.length == 1) {
genomeToReturn = singleGenomes[0];
} else {
genomeToReturn = new MultiGenome(singleGenomes);
}
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressComplete("load_genome", genomeToReturn);
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.
the class GenomeParser method parseGenomeFiles.
private void parseGenomeFiles(SingleGenome genome, File baseLocation) {
// which defines the size and extent of the chromosomes
try {
parseChrListFile(genome, baseLocation);
} catch (Exception ex) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressExceptionReceived(ex);
}
return;
}
// We need a list of all of the .dat files inside the baseLocation
File[] files = baseLocation.listFiles(new FileFilter() {
public boolean accept(File f) {
if (f.getName().toLowerCase().endsWith(".dat")) {
return true;
} else {
return false;
}
}
});
AnnotationSet coreAnnotation = new CoreAnnotationSet(genome);
for (int i = 0; i < files.length; i++) {
// Update the listeners
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Loading Genome File " + files[i].getName(), i, files.length);
}
try {
processEMBLFile(files[i], coreAnnotation, genome);
} catch (Exception ex) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressExceptionReceived(ex);
}
return;
}
}
// Update the listeners
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Caching annotation data", 1, 1);
}
// Now do the same thing for gff files.
// We need a list of all of the .gff/gtf files inside the baseLocation
files = baseLocation.listFiles(new FileFilter() {
public boolean accept(File f) {
if (f.getName().toLowerCase().endsWith(".gff") || f.getName().toLowerCase().endsWith(".gtf") || f.getName().toLowerCase().endsWith(".gff3") || f.getName().toLowerCase().endsWith(".gff.gz") || f.getName().toLowerCase().endsWith(".gtf.gz") || f.getName().toLowerCase().endsWith(".gff3.gz")) {
return true;
} else {
return false;
}
}
});
GFF3AnnotationParser gffParser = new GFF3AnnotationParser(genome);
for (int i = 0; i < files.length; i++) {
// System.err.println("Parsing "+files[i]);
// Update the listeners
e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Loading Genome File " + files[i].getName(), i, files.length);
}
try {
AnnotationSet[] newSets = gffParser.parseAnnotation(files[i], genome, "");
for (int s = 0; s < newSets.length; s++) {
Feature[] features = newSets[s].getAllFeatures();
for (int f = 0; f < features.length; f++) {
coreAnnotation.addFeature(features[f]);
}
}
} catch (Exception ex) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressExceptionReceived(ex);
}
return;
}
}
// Update the listeners
e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Caching annotation data", 1, 1);
}
genome.annotationCollection().addAnnotationSets(new AnnotationSet[] { coreAnnotation });
// Debugging - put out some stats
// System.err.println("Made genome with "+genome.getAllChromosomes().length+" chromosomes");
// System.err.println("There are "+genome.annotationCollection().listAvailableFeatureTypes().length+" different feature types");
}
use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.
the class SeqMonkParser method parseLists.
/**
* Parses the set of probe lists.
*
* @param sections The tab split initial line from the probe lists section
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private void parseLists(String[] sections) throws SeqMonkException, IOException {
if (sections.length != 2) {
throw new SeqMonkException("Probe Lists line didn't contain 2 sections");
}
int n = Integer.parseInt(sections[1]);
ProbeList[] lists = new ProbeList[n];
// We also store the probe lists in their appropriate linkage position
// to recreate the links between probe lists. The worst case scenario
// is that we have one big chain of linked lists so we make a linkage
// list which is the same size as the number of probe lists.
ProbeList[] linkage = new ProbeList[n + 1];
// The 0 linkage list will always be the full ProbeSet
linkage[0] = application.dataCollection().probeSet();
for (int i = 0; i < n; i++) {
String line = br.readLine();
if (line == null) {
throw new SeqMonkException("Ran out of probe data at line " + i + " (expected " + n + " probes)");
}
String[] listSections = line.split("\\t", -1);
// we allow for that not being present.
if (thisDataVersion < 5) {
lists[i] = new ProbeList(application.dataCollection().probeSet(), listSections[0], "", listSections[1]);
if (listSections.length > 2) {
lists[i].setDescription(listSections[2]);
} else {
lists[i].setDescription("No description");
}
} else {
lists[i] = new ProbeList(linkage[Integer.parseInt(listSections[0]) - 1], listSections[1], listSections[3], listSections[2]);
if (listSections.length > 4) {
lists[i].setComments(listSections[4].replaceAll("`", "\n"));
}
linkage[Integer.parseInt(listSections[0])] = lists[i];
}
}
// Next we reach the probe list data. These comes as a long list of values
// the first of which is the probe name, then either a numerical value if
// the probe is contained in that list, or a blank if it isn't.
String line = br.readLine();
if (line == null) {
throw new SeqMonkException("Couldn't find probe line for list data");
}
sections = line.split("\\t");
if (sections.length != 2) {
throw new SeqMonkException("Probe line didn't contain 2 sections");
}
if (!sections[0].equals("Probes")) {
throw new SeqMonkException("Couldn't find expected probe lists probe line");
}
n = Integer.parseInt(sections[1]);
for (int i = 0; i < n; i++) {
sections = br.readLine().split("\\t", -1);
if (sections.length != lists.length + 1) {
throw new SeqMonkException("Expected " + (lists.length + 1) + " sections in probe list section of data file for " + sections[0] + " but got " + sections.length);
}
if (i % 1000 == 0) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Processed list data for " + i + " probes", i, n);
}
}
Probe p = probes[i];
if (p == null) {
continue;
}
for (int j = 0; j < lists.length; j++) {
if (sections[j + 1].length() > 0) {
if (sections[j + 1].equals("NaN")) {
lists[j].addProbe(p, null);
} else {
lists[j].addProbe(p, new Float(Float.parseFloat(sections[j + 1])));
}
}
}
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.ProgressListener in project SeqMonk by s-andrews.
the class PCA method main.
public static void main(String[] args) {
double[][] matrix = new double[][] { { 2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2, 1, 1.5, 1.1 }, { 2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9 } };
// double [][] matrix = new double [100][100];
//
// for (int i=0;i<matrix.length;i++) {
// for (int j=i;j<matrix.length;j++) {
// if (j==i) {
// matrix[i][j] = 1;
// }
// else {
// double corr = Math.random();
// matrix[i][j] = corr;
// matrix[j][i] = corr;
// }
// }
// }
PCA pca = new PCA(matrix);
pca.addProgressListener(new ProgressListener() {
public void progressWarningReceived(Exception e) {
e.printStackTrace();
}
public void progressUpdated(String message, int current, int max) {
System.out.println(message);
}
public void progressExceptionReceived(Exception e) {
e.printStackTrace();
}
public void progressComplete(String command, Object result) {
System.out.println("Complete");
}
public void progressCancelled() {
System.err.println("Cancelled");
}
});
pca.startCalculating();
while (pca.extractedEigenValues == null) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
for (int i = 0; i < pca.extractedEigenValues.length; i++) {
System.out.println("Eigenvalue for " + i + " is " + pca.extractedEigenValues[i]);
}
}
Aggregations