use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class SeqMonkParser method parseLists.
/**
* Parses the set of probe lists.
*
* @param sections The tab split initial line from the probe lists section
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private void parseLists(String[] sections) throws SeqMonkException, IOException {
if (sections.length != 2) {
throw new SeqMonkException("Probe Lists line didn't contain 2 sections");
}
int n = Integer.parseInt(sections[1]);
ProbeList[] lists = new ProbeList[n];
// We also store the probe lists in their appropriate linkage position
// to recreate the links between probe lists. The worst case scenario
// is that we have one big chain of linked lists so we make a linkage
// list which is the same size as the number of probe lists.
ProbeList[] linkage = new ProbeList[n + 1];
// The 0 linkage list will always be the full ProbeSet
linkage[0] = application.dataCollection().probeSet();
for (int i = 0; i < n; i++) {
String line = br.readLine();
if (line == null) {
throw new SeqMonkException("Ran out of probe data at line " + i + " (expected " + n + " probes)");
}
String[] listSections = line.split("\\t", -1);
// we allow for that not being present.
if (thisDataVersion < 5) {
lists[i] = new ProbeList(application.dataCollection().probeSet(), listSections[0], "", listSections[1]);
if (listSections.length > 2) {
lists[i].setDescription(listSections[2]);
} else {
lists[i].setDescription("No description");
}
} else {
lists[i] = new ProbeList(linkage[Integer.parseInt(listSections[0]) - 1], listSections[1], listSections[3], listSections[2]);
if (listSections.length > 4) {
lists[i].setComments(listSections[4].replaceAll("`", "\n"));
}
linkage[Integer.parseInt(listSections[0])] = lists[i];
}
}
// Next we reach the probe list data. These comes as a long list of values
// the first of which is the probe name, then either a numerical value if
// the probe is contained in that list, or a blank if it isn't.
String line = br.readLine();
if (line == null) {
throw new SeqMonkException("Couldn't find probe line for list data");
}
sections = line.split("\\t");
if (sections.length != 2) {
throw new SeqMonkException("Probe line didn't contain 2 sections");
}
if (!sections[0].equals("Probes")) {
throw new SeqMonkException("Couldn't find expected probe lists probe line");
}
n = Integer.parseInt(sections[1]);
for (int i = 0; i < n; i++) {
sections = br.readLine().split("\\t", -1);
if (sections.length != lists.length + 1) {
throw new SeqMonkException("Expected " + (lists.length + 1) + " sections in probe list section of data file for " + sections[0] + " but got " + sections.length);
}
if (i % 1000 == 0) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Processed list data for " + i + " probes", i, n);
}
}
Probe p = probes[i];
if (p == null) {
continue;
}
for (int j = 0; j < lists.length; j++) {
if (sections[j + 1].length() > 0) {
if (sections[j + 1].equals("NaN")) {
lists[j].addProbe(p, null);
} else {
lists[j].addProbe(p, new Float(Float.parseFloat(sections[j + 1])));
}
}
}
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class ProbeListAnnotationParser method parseAnnotation.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.AnnotationParsers.AnnotationParser#parseAnnotation(java.io.File, uk.ac.babraham.SeqMonk.DataTypes.Genome.Genome)
*/
protected AnnotationSet[] parseAnnotation(File file, Genome genome) throws Exception {
Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
AnnotationSet currentAnnotation = new AnnotationSet(genome, probeList.name());
annotationSets.add(currentAnnotation);
Probe[] probes = probeList.getAllProbes();
for (int p = 0; p < probes.length; p++) {
if (p % 1 + (probes.length / 100) == 0) {
progressUpdated("Converted " + p + " probes", p, probes.length);
}
if (p > 1000000 && p % 1000000 == 0) {
progressUpdated("Caching...", 0, 1);
currentAnnotation.finalise();
currentAnnotation = new AnnotationSet(genome, probeList.name() + "[" + annotationSets.size() + "]");
annotationSets.add(currentAnnotation);
}
Feature feature = new Feature(featureType, probes[p].chromosome().name());
if (probes[p].hasDefinedName()) {
feature.addAttribute("name", probes[p].name());
}
feature.setLocation(new Location(probes[p].start(), probes[p].end(), probes[p].strand()));
currentAnnotation.addFeature(feature);
}
return annotationSets.toArray(new AnnotationSet[0]);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class DistanceToFeatureQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
Feature[] features = null;
Chromosome lastChromsome = null;
int lastIndex = 0;
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
// See if we're on the same chromosome as last time
if (lastChromsome == null || probes[p].chromosome() != lastChromsome) {
lastChromsome = probes[p].chromosome();
features = application.dataCollection().genome().annotationCollection().getFeaturesForType(lastChromsome, selectedFeature);
System.err.println("Found " + features.length + " features of type " + selectedFeature + " on chr " + lastChromsome);
lastIndex = 0;
}
int closestDistance = lastChromsome.length();
int bestIndex = lastIndex;
for (int i = lastIndex; i >= 0 && i < features.length; i--) {
int thisDistance = getDistanceToFeature(probes[p], features[i]);
if (thisDistance < closestDistance) {
closestDistance = thisDistance;
bestIndex = i;
}
if (features[i].location().end() < probes[p].start())
break;
}
// Now we go forward until we hit the end or we're after the end of the last best feature
for (int i = lastIndex + 1; i < features.length; i++) {
int thisDistance = getDistanceToFeature(probes[p], features[i]);
if (thisDistance < closestDistance) {
closestDistance = thisDistance;
bestIndex = i;
}
if (features[i].location().start() > Math.max(probes[p].end(), features[lastIndex].location().end()))
break;
}
lastIndex = bestIndex;
for (int d = 0; d < data.length; d++) {
if (logTransform.isSelected()) {
data[d].setValueForProbe(probes[p], (float) (Math.log(closestDistance + 1) / log2));
} else {
data[d].setValueForProbe(probes[p], closestDistance);
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class HiCPCADomainQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// We're going to go through the probes one chromosome at a time so we
// can reduce the complexity we have to deal with
Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chromosomes.length; c++) {
if (cancel) {
progressCancelled();
return;
}
currentChromosome = chromosomes[c];
Probe[] probes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
if (probes.length < 5) {
progressWarningReceived(new SeqMonkException("Too few probes on chromosome " + currentChromosome.name() + " - assigning zero to everything"));
// It's not worth trying to find domains
for (int d = 0; d < data.length; d++) {
for (int p = 0; p < probes.length; p++) {
((DataStore) data[d]).setValueForProbe(probes[p], 0f);
}
}
continue;
}
ProbeList thisChrProbes = new ProbeList(application.dataCollection().probeSet(), chromosomes[c].name(), "", null);
for (int p = 0; p < probes.length; p++) {
thisChrProbes.addProbe(probes[p], 0f);
}
for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
currentStore = data[d];
current = (d * chromosomes.length) + c;
total = chromosomes.length * data.length;
progressUpdated("Processing chromosome " + chromosomes[c].name() + " for " + data[d].name(), current, total);
HeatmapMatrix matrix = new HeatmapMatrix(data[d], new ProbeList[] { thisChrProbes }, application.dataCollection().genome(), optionsPanel.minDistance(), optionsPanel.maxDistance(), optionsPanel.minStrength(), optionsPanel.maxSignificance(), optionsPanel.minAbsolute(), optionsPanel.correctLinkage());
matrix.addProgressListener(this);
wait = true;
matrix.startCalculating();
while (wait) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
}
}
if (cancel) {
progressCancelled();
return;
}
if (matrix.filteredInteractions().length < 10) {
progressWarningReceived(new SeqMonkException("Too few interactions on chromosome " + currentChromosome.name() + " for " + data[d].name() + " - assigning zero to everything"));
// not going to get a sensible answer anyway.
for (int p = 0; p < probes.length; p++) {
((DataStore) data[d]).setValueForProbe(probes[p], 0f);
}
continue;
}
InteractionClusterMatrix clusterMatrix = new InteractionClusterMatrix(matrix.filteredInteractions(), probes.length);
clusterMatrix.addListener(this);
wait = true;
clusterMatrix.startCorrelating();
while (wait) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
}
}
float[][] correlationMatrix = clusterMatrix.correlationMatix();
// Annoyingly the PCA needs a double [][]
double[][] correlationMatrixDouble = new double[correlationMatrix.length][];
for (int i = 0; i < correlationMatrix.length; i++) {
double[] db = new double[correlationMatrix[i].length];
for (int j = 0; j < db.length; j++) {
db[j] = correlationMatrix[i][j];
}
correlationMatrixDouble[i] = db;
}
// Now we can calculate the PCA values from the correlation matrix
PCA pca = new PCA(correlationMatrixDouble);
pca.addProgressListener(this);
wait = true;
pca.startCalculating();
while (wait) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
}
}
double[] extractedEigenValues = pca.extractedEigenValues();
// for these probes
for (int p = 0; p < probes.length; p++) {
((DataStore) data[d]).setValueForProbe(probes[p], (float) extractedEigenValues[p]);
}
}
thisChrProbes.delete();
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class HiCReadCountQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
float[] corrections = new float[data.length];
if (correctTotal) {
float largest = 0;
if (correctPerMillion) {
largest = 1000000;
}
for (int d = 0; d < data.length; d++) {
// if (correctOnlyInProbes) {
// corrections[d] = getTotalCountInProbes(data[d],probes);
// }
// else {
corrections[d] = ((DataStore) data[d]).getTotalReadCount();
// }
if (d == 0 && !correctPerMillion) {
largest = corrections[d];
} else {
if (!correctPerMillion && corrections[d] > largest) {
largest = corrections[d];
}
}
}
// We correct everything by the largest count
for (int d = 0; d < corrections.length; d++) {
corrections[d] = largest / corrections[d];
}
}
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
double lengthCorrection = 1;
if (correctLength) {
// We assume a 'normal' probe length of 1kb
lengthCorrection = (double) 1000 / probes[p].length();
}
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
double count = 0;
HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
for (int c = 0; c < chromosomeNames.length; c++) {
long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
for (int r = 0; r < sourceReads.length; r++) {
// Check if we can ignore this one
if (removeDuplicates) {
if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
continue;
}
if (SequenceRead.overlaps(hitReads[r], probes[p].packedPosition()) && SequenceRead.compare(hitReads[r], sourceReads[r]) > 0) {
continue;
}
++count;
}
}
if (logTransform && count == 0) {
count = 0.9;
}
if (correctTotal) {
count *= corrections[d];
}
if (correctLength) {
count *= lengthCorrection;
}
if (logTransform) {
count = (float) Math.log(count) / log2;
}
// TODO: This is icky since the inheritance between HiCDataStore and DataStore
// isn't properly sorted out.
((DataStore) data[d]).setValueForProbe(probes[p], (float) count);
}
}
quantitatonComplete();
}
Aggregations