use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class AnnotatedInteractionReport method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
String annotationTypeValue = (String) annotationType.getSelectedItem();
int distanceLimit = 0;
// Check what to do with unannotated probes
boolean includeAll = true;
if (((String) excludes.getSelectedItem()).equals("Exclude")) {
includeAll = false;
}
String annotationPositionValue = (String) annotationPosition.getSelectedItem();
// We're going to set up a set of booleans which tell us which kinds
// of relationships we're allowed to look for later.
boolean surrounding = true;
boolean upstream = true;
boolean downstream = true;
boolean matchname = false;
if (annotationPositionValue.equals("[Don't annotate]")) {
upstream = false;
downstream = false;
surrounding = false;
} else if (annotationPositionValue.equals("overlapping")) {
upstream = false;
downstream = false;
} else if (annotationPositionValue.equals("surrounding or upstream")) {
downstream = false;
} else if (annotationPositionValue.equals("surrounding or downstream")) {
upstream = false;
} else if (annotationPositionValue.equals("upstream")) {
surrounding = false;
downstream = false;
} else if (annotationPositionValue.equals("downstream")) {
surrounding = false;
upstream = false;
} else if (annotationPositionValue.equals("closest")) {
// Leave things as they are!
} else if (annotationPositionValue.equals("name matched")) {
matchname = true;
upstream = false;
surrounding = false;
downstream = false;
} else {
System.err.println("Didn't recognise position value '" + annotationPositionValue + "'");
}
// surrounding.
if (!annotationPositionValue.equals("surrounding")) {
if (annotationLimit.getText().length() > 0) {
distanceLimit = Integer.parseInt(annotationLimit.getText());
}
}
// Since we're going to be making the annotations on the
// basis of position we should go through all probes one
// chromosome at a time.
Feature[] features = null;
Chromosome lastChr = null;
// We can now step through the probes looking for the best feature match
for (int p = 0; p < probes.length; p++) {
if (cancel) {
progressCancelled();
return;
}
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (!probes[p].chromosome().equals(lastChr)) {
features = collection.genome().annotationCollection().getFeaturesForType(probes[p].chromosome(), annotationTypeValue);
lastChr = probes[p].chromosome();
}
String nameWithoutExtensions = "";
String nameWithoutTranscript = "";
if (matchname) {
nameWithoutExtensions = probes[p].name().replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
nameWithoutTranscript = nameWithoutExtensions.replaceAll("-\\d\\d\\d$", "");
}
Feature bestFeature = null;
int closestDistance = 0;
for (int f = 0; f < features.length; f++) {
if (matchname) {
// Simplest check is if the name matches exactly
if (features[f].name().equals(probes[p].name()) || features[f].name().equals(nameWithoutExtensions) || features[f].name().equals(nameWithoutTranscript)) {
bestFeature = features[f];
closestDistance = 0;
break;
}
}
if (surrounding) {
if (probes[p].start() <= features[f].location().end() && probes[p].end() >= features[f].location().start()) {
bestFeature = features[f];
closestDistance = 0;
// Once we've found an overlapping feature we quit.
break;
}
}
if (downstream) {
// Check if the feature is downstream
// Get the distance to the start
int d = 0;
if (features[f].location().strand() == Location.FORWARD) {
d = features[f].location().start() - probes[p].end();
} else {
d = probes[p].start() - features[f].location().end();
}
if (d >= 0) {
if (d > distanceLimit || (bestFeature != null && d > closestDistance)) {
continue;
}
// See if this is the closest feature we have so far...
if (bestFeature == null || d < closestDistance) {
bestFeature = features[f];
closestDistance = d;
}
continue;
}
}
if (upstream) {
// Check if the feature is upstream
// Get the distance to the start
int d = 0;
if (features[f].location().strand() == Location.FORWARD) {
d = probes[p].start() - features[f].location().end();
} else {
d = features[f].location().start() - probes[p].end();
}
if (d >= 0) {
if (d > distanceLimit || (bestFeature != null && d > closestDistance)) {
continue;
}
// See if this is the closest feature we have so far...
if (bestFeature == null || d < closestDistance) {
bestFeature = features[f];
closestDistance = d;
}
continue;
}
}
}
if (bestFeature == null) {
continue;
}
probeAnnotations.put(probes[p], bestFeature);
}
if (!includeAll) {
// We need to filter the interaction list to include only those which
// have annotations on both probes
Vector<InteractionProbePair> filteredInteractions = new Vector<InteractionProbePair>();
for (int i = 0; i < interactions.length; i++) {
if (probeAnnotations.containsKey(interactions[i].probe1()) && probeAnnotations.containsKey(interactions[i].probe2())) {
filteredInteractions.add(interactions[i]);
}
}
interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
}
TableModel model = new AnnotationTableModel();
reportComplete(model);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class ExactOverlapQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
float[] corrections = new float[data.length];
if (correctTotal) {
float largest = 0;
if (correctPerMillion) {
largest = 1000000;
}
for (int d = 0; d < data.length; d++) {
if (correctOnlyInProbes) {
corrections[d] = getTotalCountInProbes(data[d], probes);
} else {
corrections[d] = data[d].getTotalReadCount();
}
if (d == 0 && !correctPerMillion) {
largest = corrections[d];
} else {
if (!correctPerMillion && corrections[d] > largest) {
largest = corrections[d];
}
}
}
// We correct everything by the largest count
for (int d = 0; d < corrections.length; d++) {
corrections[d] = largest / corrections[d];
}
}
// To make this more efficient we'll do this chromosome by chromosome
Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
progressUpdated("Quantiating probes on " + chrs[c].name(), c, chrs.length);
Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
Arrays.sort(thisChrProbes);
for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
// We'll fetch all reads for this chr and then do a count per position
ReadsWithCounts reads = data[d].getReadsForChromosome(chrs[c]);
quantitationType.resetLastRead();
int startIndex = 0;
for (int p = 0; p < thisChrProbes.length; p++) {
int rawCount = 0;
for (int r = startIndex; r < reads.reads.length; r++) {
if (SequenceRead.start(reads.reads[r]) < thisChrProbes[p].start()) {
startIndex = r;
}
if (SequenceRead.start(reads.reads[r]) > thisChrProbes[p].start())
break;
if (quantitationType.useRead(thisChrProbes[p], reads.reads[r])) {
if (SequenceRead.start(reads.reads[r]) == thisChrProbes[p].start() && SequenceRead.end(reads.reads[r]) == thisChrProbes[p].end()) {
rawCount += reads.counts[r];
}
}
}
// We have the counts now work out any correction.
float count = rawCount;
if (logTransform && count == 0) {
count = 0.9f;
}
if (correctTotal) {
count *= corrections[d];
}
if (logTransform) {
count = (float) Math.log(count) / log2;
}
data[d].setValueForProbe(thisChrProbes[p], count);
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class SmoothingSubtractionQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
if (!isReady()) {
progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
}
Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
Vector<DataStore> quantitatedStores = new Vector<DataStore>();
DataSet[] sets = application.dataCollection().getAllDataSets();
for (int s = 0; s < sets.length; s++) {
if (sets[s].isQuantitated()) {
quantitatedStores.add(sets[s]);
}
}
DataGroup[] groups = application.dataCollection().getAllDataGroups();
for (int g = 0; g < groups.length; g++) {
if (groups[g].isQuantitated()) {
quantitatedStores.add(groups[g]);
}
}
DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
for (int c = 0; c < chromosomes.length; c++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(c, chromosomes.length);
Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
float[][] newValues = new float[data.length][allProbes.length];
try {
for (int p = 0; p < allProbes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
// Find the min and max indices we're going to use.
int minIndex = p;
int maxIndex = p;
if (correctionAction == ADJACENT) {
minIndex = p - (distance / 2);
maxIndex = minIndex + (distance - 1);
if (minIndex < 0)
minIndex = 0;
if (maxIndex > allProbes.length - 1)
maxIndex = allProbes.length - 1;
} else if (correctionAction == WINDOW) {
for (int i = p; i >= 0; i--) {
if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
break;
}
minIndex = i;
}
for (int i = p; i < allProbes.length; i++) {
if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
break;
}
maxIndex = i;
}
}
// Now go through all of the datasets working out the new value for this range
float[] tempValues = new float[(maxIndex - minIndex) + 1];
for (int d = 0; d < data.length; d++) {
for (int i = minIndex; i <= maxIndex; i++) {
tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
}
newValues[d][p] = SimpleStats.mean(tempValues);
}
}
// Now assign the values for the probes on this chromosome
for (int d = 0; d < data.length; d++) {
for (int p = 0; p < allProbes.length; p++) {
data[d].setValueForProbe(allProbes[p], data[d].getValueForProbe(allProbes[p]) - newValues[d][p]);
}
}
} catch (SeqMonkException e) {
progressExceptionReceived(e);
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class HiCCisTransQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
int cisCount = 0;
int transCount = 0;
HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
for (int c = 0; c < chromosomeNames.length; c++) {
long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
for (int r = 0; r < sourceReads.length; r++) {
// Check if we can ignore this one
if (removeDuplicates) {
if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
continue;
}
if (!chromosomeNames[c].equals(probes[p].chromosome().name())) {
++transCount;
} else {
if (includeFarCis) {
int distance = SequenceRead.fragmentLength(sourceReads[r], hitReads[r]);
if (distance > farCisDistance) {
++transCount;
} else {
// System.err.println("Distance was "+distance);
++cisCount;
}
} else {
++cisCount;
}
}
}
}
float percentage = ((transCount * 100f) / (cisCount + transCount));
if (cisCount + transCount == 0) {
percentage = 0;
}
// TODO: This is icky since the inheritance between HiCDataStore and DataStore
// isn't properly sorted out.
((DataStore) data[d]).setValueForProbe(probes[p], percentage);
}
}
if (correctPerChromosome) {
Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
float[] thisChrValues = new float[thisChrProbes.length];
for (int d = 0; d < data.length; d++) {
DataStore ds = (DataStore) data[d];
for (int p = 0; p < thisChrProbes.length; p++) {
try {
thisChrValues[p] = ds.getValueForProbe(thisChrProbes[p]);
} catch (SeqMonkException e) {
}
}
float median = SimpleStats.median(thisChrValues);
for (int p = 0; p < thisChrProbes.length; p++) {
try {
ds.setValueForProbe(thisChrProbes[p], ds.getValueForProbe(thisChrProbes[p]) - median);
} catch (SeqMonkException e) {
}
}
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class SmoothingQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
if (!isReady()) {
progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
}
Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
Vector<DataStore> quantitatedStores = new Vector<DataStore>();
DataSet[] sets = application.dataCollection().getAllDataSets();
for (int s = 0; s < sets.length; s++) {
if (sets[s].isQuantitated()) {
quantitatedStores.add(sets[s]);
}
}
DataGroup[] groups = application.dataCollection().getAllDataGroups();
for (int g = 0; g < groups.length; g++) {
if (groups[g].isQuantitated()) {
quantitatedStores.add(groups[g]);
}
}
DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
for (int c = 0; c < chromosomes.length; c++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(c, chromosomes.length);
Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
float[][] newValues = new float[data.length][allProbes.length];
try {
for (int p = 0; p < allProbes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
// Find the min and max indices we're going to use.
int minIndex = p;
int maxIndex = p;
if (correctionAction == ADJACENT) {
minIndex = p - (distance / 2);
maxIndex = minIndex + (distance - 1);
if (minIndex < 0)
minIndex = 0;
if (maxIndex > allProbes.length - 1)
maxIndex = allProbes.length - 1;
} else if (correctionAction == WINDOW) {
for (int i = p; i >= 0; i--) {
if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
break;
}
minIndex = i;
}
for (int i = p; i < allProbes.length; i++) {
if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
break;
}
maxIndex = i;
}
}
// Now go through all of the datasets working out the new value for this range
float[] tempValues = new float[(maxIndex - minIndex) + 1];
for (int d = 0; d < data.length; d++) {
for (int i = minIndex; i <= maxIndex; i++) {
tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
}
newValues[d][p] = SimpleStats.mean(tempValues);
}
}
// Now assign the values for the probes on this chromosome
for (int d = 0; d < data.length; d++) {
for (int p = 0; p < allProbes.length; p++) {
data[d].setValueForProbe(allProbes[p], newValues[d][p]);
}
}
} catch (SeqMonkException e) {
progressExceptionReceived(e);
}
}
quantitatonComplete();
}
Aggregations