use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class DataSet method finalise.
/**
* This call optimises the data structure from a flexible structure
* which can accept more data, to a fixed structure optimised for
* size and speed of access. If required it can also cache the data
* to disk.
*
* This call should only be made by DataParsers who know that no
* more data will be added.
*/
public synchronized void finalise() {
if (isFinalised)
return;
// To make querying the data more efficient we're going to convert
// all of the vectors in our data structure into SequenceRead arrays
// which are sorted by start position. This means that subsequent
// access will be a lot more efficient.
Enumeration<Chromosome> e = readData.keys();
chromosomesStillToFinalise = new ThreadSafeIntCounter();
while (e.hasMoreElements()) {
while (chromosomesStillToFinalise.value() >= MAX_CONCURRENT_FINALISE) {
try {
Thread.sleep(20);
} catch (InterruptedException ex) {
}
}
Chromosome c = e.nextElement();
chromosomesStillToFinalise.increment();
readData.get(c).finalise();
}
while (chromosomesStillToFinalise.value() > 0) {
try {
Thread.sleep(20);
} catch (InterruptedException ex) {
}
}
isFinalised = true;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class DataSet method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// We need to delete any cache files we're still holding
Enumeration<Chromosome> e = readData.keys();
while (e.hasMoreElements()) {
Chromosome c = e.nextElement();
File f = readData.get(c).readsWithCountsTempFile;
if (f != null) {
if (!f.delete())
System.err.println("Failed to delete cache file " + f.getAbsolutePath());
}
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class PairedDataSet method finalise.
/**
* This call optimises the data structure from a flexible structure
* which can accept more data, to a fixed structure optimised for
* size and speed of access. If required it can also cache the data
* to disk.
*
* This call should only be made by DataParsers who know that no
* more data will be added.
*/
public synchronized void finalise() {
if (isFinalised)
return;
// To make querying the data more efficient we're going to convert
// all of the vectors in our data structure into SequenceRead arrays
// which are sorted by start position. This means that subsequent
// access will be a lot more efficient.
long finaliseStartTime = System.currentTimeMillis();
Enumeration<Chromosome> e = readData.keys();
chromosomesStillToFinalise = new ThreadSafeIntCounter();
while (e.hasMoreElements()) {
while (chromosomesStillToFinalise.value() >= MAX_CONCURRENT_FINALISE) {
try {
Thread.sleep(20);
} catch (InterruptedException ex) {
}
}
Chromosome c = e.nextElement();
chromosomesStillToFinalise.increment();
readData.get(c).finalise();
}
while (chromosomesStillToFinalise.value() > 0) {
try {
Thread.sleep(20);
} catch (InterruptedException ex) {
}
}
isFinalised = true;
long finaliseEndTime = System.currentTimeMillis();
// Finally, now that we've sent all of the correct data up to the superclass
// we can let that finalise itself.
long superSubEndTime = System.currentTimeMillis();
super.finalise();
long superFinaliseEndTime = System.currentTimeMillis();
System.err.println("HiC finalise=" + ((finaliseEndTime - finaliseStartTime) / 1000d) + " super submit=" + ((superSubEndTime - finaliseEndTime) / 1000d) + " super finalise=" + ((superFinaliseEndTime - superSubEndTime) / 1000d));
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class DistanceToFeatureQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
Feature[] features = null;
Chromosome lastChromsome = null;
int lastIndex = 0;
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
// See if we're on the same chromosome as last time
if (lastChromsome == null || probes[p].chromosome() != lastChromsome) {
lastChromsome = probes[p].chromosome();
features = application.dataCollection().genome().annotationCollection().getFeaturesForType(lastChromsome, selectedFeature);
System.err.println("Found " + features.length + " features of type " + selectedFeature + " on chr " + lastChromsome);
lastIndex = 0;
}
int closestDistance = lastChromsome.length();
int bestIndex = lastIndex;
for (int i = lastIndex; i >= 0 && i < features.length; i--) {
int thisDistance = getDistanceToFeature(probes[p], features[i]);
if (thisDistance < closestDistance) {
closestDistance = thisDistance;
bestIndex = i;
}
if (features[i].location().end() < probes[p].start())
break;
}
// Now we go forward until we hit the end or we're after the end of the last best feature
for (int i = lastIndex + 1; i < features.length; i++) {
int thisDistance = getDistanceToFeature(probes[p], features[i]);
if (thisDistance < closestDistance) {
closestDistance = thisDistance;
bestIndex = i;
}
if (features[i].location().start() > Math.max(probes[p].end(), features[lastIndex].location().end()))
break;
}
lastIndex = bestIndex;
for (int d = 0; d < data.length; d++) {
if (logTransform.isSelected()) {
data[d].setValueForProbe(probes[p], (float) (Math.log(closestDistance + 1) / log2));
} else {
data[d].setValueForProbe(probes[p], closestDistance);
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class HiCPCADomainQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// We're going to go through the probes one chromosome at a time so we
// can reduce the complexity we have to deal with
Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chromosomes.length; c++) {
if (cancel) {
progressCancelled();
return;
}
currentChromosome = chromosomes[c];
Probe[] probes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
if (probes.length < 5) {
progressWarningReceived(new SeqMonkException("Too few probes on chromosome " + currentChromosome.name() + " - assigning zero to everything"));
// It's not worth trying to find domains
for (int d = 0; d < data.length; d++) {
for (int p = 0; p < probes.length; p++) {
((DataStore) data[d]).setValueForProbe(probes[p], 0f);
}
}
continue;
}
ProbeList thisChrProbes = new ProbeList(application.dataCollection().probeSet(), chromosomes[c].name(), "", null);
for (int p = 0; p < probes.length; p++) {
thisChrProbes.addProbe(probes[p], 0f);
}
for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
currentStore = data[d];
current = (d * chromosomes.length) + c;
total = chromosomes.length * data.length;
progressUpdated("Processing chromosome " + chromosomes[c].name() + " for " + data[d].name(), current, total);
HeatmapMatrix matrix = new HeatmapMatrix(data[d], new ProbeList[] { thisChrProbes }, application.dataCollection().genome(), optionsPanel.minDistance(), optionsPanel.maxDistance(), optionsPanel.minStrength(), optionsPanel.maxSignificance(), optionsPanel.minAbsolute(), optionsPanel.correctLinkage());
matrix.addProgressListener(this);
wait = true;
matrix.startCalculating();
while (wait) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
}
}
if (cancel) {
progressCancelled();
return;
}
if (matrix.filteredInteractions().length < 10) {
progressWarningReceived(new SeqMonkException("Too few interactions on chromosome " + currentChromosome.name() + " for " + data[d].name() + " - assigning zero to everything"));
// not going to get a sensible answer anyway.
for (int p = 0; p < probes.length; p++) {
((DataStore) data[d]).setValueForProbe(probes[p], 0f);
}
continue;
}
InteractionClusterMatrix clusterMatrix = new InteractionClusterMatrix(matrix.filteredInteractions(), probes.length);
clusterMatrix.addListener(this);
wait = true;
clusterMatrix.startCorrelating();
while (wait) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
}
}
float[][] correlationMatrix = clusterMatrix.correlationMatix();
// Annoyingly the PCA needs a double [][]
double[][] correlationMatrixDouble = new double[correlationMatrix.length][];
for (int i = 0; i < correlationMatrix.length; i++) {
double[] db = new double[correlationMatrix[i].length];
for (int j = 0; j < db.length; j++) {
db[j] = correlationMatrix[i][j];
}
correlationMatrixDouble[i] = db;
}
// Now we can calculate the PCA values from the correlation matrix
PCA pca = new PCA(correlationMatrixDouble);
pca.addProgressListener(this);
wait = true;
pca.startCalculating();
while (wait) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
}
}
double[] extractedEigenValues = pca.extractedEigenValues();
// for these probes
for (int p = 0; p < probes.length; p++) {
((DataStore) data[d]).setValueForProbe(probes[p], (float) extractedEigenValues[p]);
}
}
thisChrProbes.delete();
}
quantitatonComplete();
}
Aggregations