use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class HiCPrevNextQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
Arrays.sort(probes);
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
if (probes[p].start() - distance < 0 || probes[p].end() + distance > probes[p].chromosome().length()) {
for (int d = 0; d < data.length; d++) {
((DataStore) data[d]).setValueForProbe(probes[p], 0);
}
continue;
}
// Make up a pseudo probe for the previous and next regions
Probe previousProbe = new Probe(probes[p].chromosome(), probes[p].start() - distance, probes[p].start());
Probe nextProbe = new Probe(probes[p].chromosome(), probes[p].end(), probes[p].end() + distance);
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
// Get the counts for the previous and next probes
int previousTotalCount = data[d].getHiCReadCountForProbe(previousProbe);
HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
int nextTotalCount = data[d].getHiCReadCountForProbe(nextProbe);
// any reads then give up on this one.
if (previousTotalCount == 0 || nextTotalCount == 0) {
((DataStore) data[d]).setValueForProbe(probes[p], 0);
continue;
}
int previousCount = 0;
int nextCount = 0;
long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
SequenceRead.sort(hitPositions);
for (int r = 0; r < hitPositions.length; r++) {
// Check if we can ignore this one
if (removeDuplicates) {
if (r > 0 && hitPositions[r] == hitPositions[r - 1])
continue;
}
// Check if the other end maps to either the prev or next probe
if (SequenceRead.overlaps(hitPositions[r], previousProbe.packedPosition())) {
++previousCount;
}
if (SequenceRead.overlaps(hitPositions[r], nextProbe.packedPosition())) {
++nextCount;
}
}
// If both of the actual counts are zero then don't try to calculate a real value
if (previousCount == 0 && nextCount == 0) {
((DataStore) data[d]).setValueForProbe(probes[p], 0);
continue;
}
// Basically what happens here is that we calculate a normal X^2 value and then
// turn it into a negative depending on the direction of the deviation from the
// expected proportion.
// We need to calculate expected values for the two directions based on the proportion
// of reads falling into those two regions in total.
// Correct the counts based on the proportions of the two totals
double expectedProportion = (previousTotalCount / (double) (nextTotalCount + previousTotalCount));
int totalObservedCount = previousCount + nextCount;
double expectedPreviousCount = totalObservedCount * expectedProportion;
double expectedNextCount = totalObservedCount - expectedPreviousCount;
// Now we can calculate the X^2 values to compare the expected and observed values
double chisquare = (Math.pow(previousCount - expectedPreviousCount, 2) / expectedPreviousCount) + (Math.pow(nextCount - expectedNextCount, 2) / expectedNextCount);
// We now negate this count if the proportions bias towards the next count
double observedProportion = (previousCount / (double) totalObservedCount);
if (observedProportion > expectedProportion) {
chisquare = 0 - chisquare;
}
// System.err.println("Raw counts "+previousCount+","+nextCount+" Totals "+previousTotalCount+","+nextTotalCount+" Expected "+expectedPreviousCount+","+expectedNextCount+" Chi "+chisquare);
// TODO: This is icky since the inheritance between HiCDataStore and DataStore
// isn't properly sorted out.
((DataStore) data[d]).setValueForProbe(probes[p], (float) chisquare);
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class SmoothingQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
if (!isReady()) {
progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
}
Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
Vector<DataStore> quantitatedStores = new Vector<DataStore>();
DataSet[] sets = application.dataCollection().getAllDataSets();
for (int s = 0; s < sets.length; s++) {
if (sets[s].isQuantitated()) {
quantitatedStores.add(sets[s]);
}
}
DataGroup[] groups = application.dataCollection().getAllDataGroups();
for (int g = 0; g < groups.length; g++) {
if (groups[g].isQuantitated()) {
quantitatedStores.add(groups[g]);
}
}
DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
for (int c = 0; c < chromosomes.length; c++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(c, chromosomes.length);
Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
float[][] newValues = new float[data.length][allProbes.length];
try {
for (int p = 0; p < allProbes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
// Find the min and max indices we're going to use.
int minIndex = p;
int maxIndex = p;
if (correctionAction == ADJACENT) {
minIndex = p - (distance / 2);
maxIndex = minIndex + (distance - 1);
if (minIndex < 0)
minIndex = 0;
if (maxIndex > allProbes.length - 1)
maxIndex = allProbes.length - 1;
} else if (correctionAction == WINDOW) {
for (int i = p; i >= 0; i--) {
if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
break;
}
minIndex = i;
}
for (int i = p; i < allProbes.length; i++) {
if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
break;
}
maxIndex = i;
}
}
// Now go through all of the datasets working out the new value for this range
float[] tempValues = new float[(maxIndex - minIndex) + 1];
for (int d = 0; d < data.length; d++) {
for (int i = minIndex; i <= maxIndex; i++) {
tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
}
newValues[d][p] = SimpleStats.mean(tempValues);
}
}
// Now assign the values for the probes on this chromosome
for (int d = 0; d < data.length; d++) {
for (int p = 0; p < allProbes.length; p++) {
data[d].setValueForProbe(allProbes[p], newValues[d][p]);
}
}
} catch (SeqMonkException e) {
progressExceptionReceived(e);
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class FeaturePercentileProbeGenerator method makeProbes.
/**
* Make a set of probes within an individual feature.
*
* @param feature The individual feature to make probes around
* @param chromosome
* @param location The actual location to use - must be the whole region
* @param newProbes The vector to add the new probes to
* @param sets DataSets to check for reads if we're ignoring blanks
*/
private void makeProbes(Feature feature, Chromosome chromosome, Location location, Vector<Probe> newProbes) {
int start;
int end;
int strand = location.strand();
if (ignoreDirection)
strand = Location.UNKNOWN;
if (lengthType == FeaturePercentileSelectorPanel.PROPORTIONAL_LENGTH) {
int probeLength = feature.location().length() / probesPerFeature;
if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
start = location.start();
end = location.start();
} else {
start = location.end();
end = location.end();
}
for (int i = 0; i < probesPerFeature; i++) {
int localStart;
int localEnd;
if (strand == Location.REVERSE) {
localStart = end - (probeLength * (i + 1));
localEnd = localStart + probeLength;
} else {
localStart = start + (probeLength * i);
localEnd = localStart + probeLength;
}
Probe p = makeProbe(feature.name() + "_" + (i + 1), chromosome, localStart, localEnd, strand);
if (p != null) {
// Add this probe
newProbes.add(p);
}
}
} else if (lengthType == FeaturePercentileSelectorPanel.FIXED_LENGTH) {
int proportionalLength;
if (includeStartEnd) {
proportionalLength = feature.location().length() / (probesPerFeature - 1);
} else {
proportionalLength = feature.location().length() / probesPerFeature;
}
if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
if (includeStartEnd) {
start = location.start();
end = location.end();
} else {
start = location.start() + (proportionalLength / 2);
end = location.end() - (proportionalLength / 2);
}
} else {
if (includeStartEnd) {
start = location.end();
end = location.end();
} else {
start = location.end() + (proportionalLength / 2);
end = location.end() - (proportionalLength / 2);
}
}
for (int i = 0; i < probesPerFeature; i++) {
int localStart;
int localEnd;
int localCentre;
if (strand == Location.REVERSE) {
localCentre = (end - (proportionalLength * i));
localStart = localCentre - (fixedLength / 2);
localEnd = localStart + fixedLength;
} else {
localCentre = start + (proportionalLength * i);
localStart = localCentre - (fixedLength / 2);
localEnd = localStart + fixedLength;
}
Probe p = makeProbe(feature.name() + "_" + (i + 1), chromosome, localStart, localEnd, strand);
if (p != null) {
// Add this probe
newProbes.add(p);
}
}
} else {
throw new IllegalStateException("Unknown length type " + optionPanel.lengthType());
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class FeaturePercentileProbeGenerator method makeProbe.
/**
* Makes an individual probe
*
* @param name The name for the probe
* @param c The Chromosome
* @param start Start position
* @param end End position
* @return The newly generated probe
*/
private Probe makeProbe(String name, Chromosome c, int start, int end, int strand) {
if (end > c.length())
end = c.length();
if (start < 1)
start = 1;
if (end < start)
return null;
Probe p = new Probe(c, start, end, strand);
p.setName(name);
return p;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class SeqMonkParser method parseProbes.
/**
* Parses the list of probes.
*
* @param sections The tab split initial line from the probes section
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private void parseProbes(String[] sections) throws SeqMonkException, IOException {
if (sections.length < 2) {
throw new SeqMonkException("Probe line didn't contain at least 2 sections");
}
if (!sections[0].equals("Probes")) {
throw new SeqMonkException("Couldn't find expected probes line");
}
int n = Integer.parseInt(sections[1]);
probes = new Probe[n];
String description = "No generator description available";
if (sections.length > 2) {
description = sections[2];
}
ProbeSet probeSet = new ProbeSet(description, n);
if (sections.length > 3) {
if (sections[3].length() > 0) {
probeSet.setCurrentQuantitation(sections[3]);
}
}
if (sections.length > 4) {
probeSet.setComments(sections[4].replaceAll("`", "\n"));
}
// We need to save the probeset to the dataset at this point so we can add the probe
// lists as we get to them.
application.dataCollection().setProbeSet(probeSet);
int positionOffset;
// We used to store chr start and end
if (thisDataVersion < 8) {
positionOffset = 4;
} else // We now store chr and packed position (to give start end and strand)
{
positionOffset = 3;
}
int expectedSectionLength = 3 + dataSets.length + dataGroups.length;
String line;
for (int i = 0; i < n; i++) {
line = br.readLine();
if (line == null) {
throw new SeqMonkException("Ran out of probe data at line " + i + " (expected " + n + " probes)");
}
// Since the probes section can have blank trailing sections we need
// to not trim these, hence the -1 limit.
sections = line.split("\\t", -1);
if (i == 0) {
/*
* Older versions of this format put down data for just
* datasets. Newer versions include data for datagroups
* as well. We need to figure out which one we're looking
* at
*/
if (sections.length == positionOffset + dataSets.length) {
expectedSectionLength = positionOffset + dataSets.length;
} else if (sections.length == positionOffset + dataSets.length + dataGroups.length) {
expectedSectionLength = positionOffset + dataSets.length + dataGroups.length;
}
}
if (sections.length != expectedSectionLength) {
throw new SeqMonkException("Expected " + expectedSectionLength + " sections in data file for " + sections[0] + " but got " + sections.length);
}
if (i % 10000 == 0) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Processed data for " + i + " probes", i, n);
}
}
Chromosome c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
// Sanity check
if (c == null) {
throw new SeqMonkException("Couldn't find a chromosome called " + sections[1]);
}
Probe p;
if (thisDataVersion < 8) {
int start = Integer.parseInt(sections[2]);
int end = Integer.parseInt(sections[3]);
p = new Probe(c, start, end);
} else {
long packedValue = Long.parseLong(sections[2]);
p = new Probe(c, packedValue);
}
if (!sections[0].equals("null")) {
p.setName(sections[0]);
}
probes[i] = p;
probeSet.addProbe(probes[i], null);
for (int j = positionOffset; j < sections.length; j++) {
if (sections[j].length() == 0)
continue;
if ((j - positionOffset) >= dataSets.length) {
dataGroups[j - (positionOffset + dataSets.length)].setValueForProbe(p, Float.parseFloat(sections[j]));
} else {
dataSets[j - positionOffset].setValueForProbe(p, Float.parseFloat(sections[j]));
}
}
}
application.dataCollection().activeProbeListChanged(probeSet);
// This rename doesn't actually change the name. We put this in because
// the All Probes group is drawn in the data view before probes have been
// added to it. This means that it's name isn't updated when the probes have
// been added and it appears labelled with 0 probes. This doesn't happen if
// there are any probe lists under all probes as they cause it to be refreshed,
// but if you only have the probe set then you need this to make the display show
// the correct information.
probeSet.setName("All Probes");
}
Aggregations