use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class VarianceValuesFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
Probe[] probes = startingList.getAllProbes();
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
locallyCorrect = optionsPanel.locallyCorrectBox.isSelected();
// If we're correcting our variances by the local trend then we'll need to store
// the smoothed values.
SmoothedVarianceDataset[] smoothedVariances = new SmoothedVarianceDataset[stores.length];
if (locallyCorrect) {
for (int s = 0; s < stores.length; s++) {
smoothedVariances[s] = new SmoothedVarianceDataset(stores[s], probes, varianceType, probes.length / 100);
}
}
for (int p = 0; p < probes.length; p++) {
progressUpdated(p, probes.length);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
int count = 0;
for (int s = 0; s < stores.length; s++) {
float d = 0;
if (!stores[s].hasValueForProbe(probes[p]))
continue;
try {
d = getVarianceMeasure(stores[s], probes[p]);
if (locallyCorrect) {
float localValue = smoothedVariances[s].getSmoothedValueForIndex(p);
// System.err.println("Raw value is "+d+" smoothed value is "+localValue);
d -= localValue;
}
} catch (SeqMonkException e) {
e.printStackTrace();
continue;
}
// NaN values always fail the filter.
if (Float.isNaN(d))
continue;
// Now we have the value we need to know if it passes the test
if (upperLimit != null)
if (d > upperLimit)
continue;
if (lowerLimit != null)
if (d < lowerLimit)
continue;
// This one passes, we can add it to the count
++count;
}
// probe to the probe set.
switch(limitType) {
case EXACTLY:
if (count == chosenNumber)
newList.addProbe(probes[p], null);
break;
case AT_LEAST:
if (count >= chosenNumber)
newList.addProbe(probes[p], null);
break;
case NO_MORE_THAN:
if (count <= chosenNumber)
newList.addProbe(probes[p], null);
break;
}
}
newList.setName(optionsPanel.varianceTypesBox.getSelectedItem().toString() + " between " + lowerLimit + "-" + upperLimit);
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class ProportionOfLibraryStatisticsFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
fromStores = optionsPanel.fromStores();
toStores = optionsPanel.toStores();
// System.err.println("Found "+fromStores.length+" from stores and "+toStores.length+" to stores");
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
testForIncreasesOnly = optionsPanel.increasesOnlyBox.isSelected();
Probe[] probes = startingList.getAllProbes();
// We'll pull the number of probes to sample from the preferences if they've changed it
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
// We'll build up a set of p-values as we go along
float[] lowestPValues = new float[probes.length];
for (int p = 0; p < lowestPValues.length; p++) {
lowestPValues[p] = 1;
}
// Put something in the progress whilst we're ordering the probe values to make
// the comparison.
progressUpdated("Generating background model", 0, 1);
try {
for (int f = 0; f < fromStores.length; f++) {
for (int t = 0; t < toStores.length; t++) {
progressUpdated("Comparing " + fromStores[f] + " to " + toStores[t], 0, 1);
// We need to work out the total counts in the probes we're using
int fromTotalCount = 0;
for (int p = 0; p < probes.length; p++) {
fromTotalCount += (int) fromStores[f].getValueForProbe(probes[p]);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
}
int toTotalCount = 0;
for (int p = 0; p < probes.length; p++) {
toTotalCount += (int) toStores[t].getValueForProbe(probes[p]);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
}
for (int p = 0; p < probes.length; p++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
int n11 = (int) fromStores[f].getValueForProbe(probes[p]);
int n12 = fromTotalCount - n11;
int n21 = (int) toStores[f].getValueForProbe(probes[p]);
int n22 = toTotalCount - n21;
double[] pValues = FishersExactTest.fishersExactTest(n11, n12, n21, n22);
// The values in the array are 0=2-sided p-value, 1=left-sided p-value, 2=right-sided p-value
if (testForIncreasesOnly) {
if (pValues[1] < lowestPValues[p])
lowestPValues[p] = (float) pValues[1];
} else {
if (pValues[0] < lowestPValues[p])
lowestPValues[p] = (float) pValues[0];
}
}
}
}
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
}
if (applyMultipleTestingCorrection) {
ProbeTTestValue[] statsValues = new ProbeTTestValue[probes.length];
for (int i = 0; i < probes.length; i++) {
statsValues[i] = new ProbeTTestValue(probes[i], lowestPValues[i]);
}
BenjHochFDR.calculateQValues(statsValues);
for (int i = 0; i < statsValues.length; i++) {
if (statsValues[i].q < pValueLimit) {
newList.addProbe(statsValues[i].probe, (float) statsValues[i].q);
}
}
} else {
for (int i = 0; i < lowestPValues.length; i++) {
if (lowestPValues[i] < pValueLimit) {
newList.addProbe(probes[i], lowestPValues[i]);
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class ReplicateSetStatsFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
// Make up the list of DataStores in each replicate set
DataStore[][] stores = new DataStore[replicateSets.length][];
for (int i = 0; i < replicateSets.length; i++) {
stores[i] = replicateSets[i].dataStores();
}
Vector<ProbeTTestValue> newListProbesVector = new Vector<ProbeTTestValue>();
for (int c = 0; c < chromosomes.length; c++) {
progressUpdated("Processing probes on Chr" + chromosomes[c].name(), c, chromosomes.length);
Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
for (int p = 0; p < probes.length; p++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
double[][] values = new double[replicateSets.length][];
for (int i = 0; i < replicateSets.length; i++) {
values[i] = new double[stores[i].length];
for (int j = 0; j < stores[i].length; j++) {
try {
values[i][j] = stores[i][j].getValueForProbe(probes[p]);
} catch (SeqMonkException e) {
}
}
}
double pValue = 0;
try {
if (replicateSets.length == 1) {
pValue = TTest.calculatePValue(values[0], 0);
} else if (replicateSets.length == 2) {
pValue = TTest.calculatePValue(values[0], values[1]);
} else {
pValue = AnovaTest.calculatePValue(values);
}
} catch (SeqMonkException e) {
throw new IllegalStateException(e);
}
newListProbesVector.add(new ProbeTTestValue(probes[p], pValue));
}
}
ProbeTTestValue[] newListProbes = newListProbesVector.toArray(new ProbeTTestValue[0]);
// Do the multi-testing correction if necessary
if (multiTest) {
BenjHochFDR.calculateQValues(newListProbes);
}
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "Q-value");
for (int i = 0; i < newListProbes.length; i++) {
if (newListProbes[i].q <= cutoff) {
newList.addProbe(newListProbes[i].probe, new Float(newListProbes[i].q));
}
}
} else {
newList = new ProbeList(startingList, "", "", "P-value");
for (int i = 0; i < newListProbes.length; i++) {
if (newListProbes[i].p <= cutoff) {
newList.addProbe(newListProbes[i].probe, new Float(newListProbes[i].p));
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class AlignedSummaryPanel method paint.
/* (non-Javadoc)
* @see javax.swing.JComponent#paint(java.awt.Graphics)
*/
public void paint(Graphics g) {
super.paint(g);
g.setColor(Color.WHITE);
g.fillRect(0, 0, getWidth(), getHeight());
FontMetrics metrics = getFontMetrics(g.getFont());
if (smoothedCounts == null) {
g.setColor(Color.GRAY);
String message;
if (rawCounts == null) {
message = "Processed " + percentCalculated + " percent of probes";
} else {
message = "Clustered " + percentCalculated + " percent of probes";
}
g.drawString(message, (getWidth() / 2) - (metrics.stringWidth(message) / 2), (getHeight() / 2 - 2));
return;
}
// If we're here then we can actually draw the plot
g.setColor(Color.BLACK);
g.drawString(store.name(), (getWidth() / 2) - (g.getFontMetrics().stringWidth(store.name()) / 2), g.getFontMetrics().getHeight());
g.drawString(list.name(), (getWidth() / 2) - (g.getFontMetrics().stringWidth(list.name()) / 2), g.getFontMetrics().getHeight() * 2);
// X axis
// g.drawLine(10, getHeight()-20, getWidth()-10, getHeight()-20);
// Y axis
// g.drawLine(9, 10, 9, getHeight()-20);
// X labels
String xLabel;
if (sameLength) {
xLabel = "Bases 1 - " + fixedLength;
} else {
xLabel = "Relative distance across probe";
}
g.drawString(xLabel, (getWidth() / 2) - (metrics.stringWidth(xLabel) / 2), getHeight() - 5);
int lastY = 30;
// If there are multiple lines with the same y value then we average across
// them. This variable keeps the total of the counts and the running count
// keeps the number of samples merged so we can average out from that.
float[] runningFloats = new float[smoothedCounts[0].length];
Vector<float[]> possibleLineData = new Vector<float[]>();
// Now go through the various probes
for (int p = 0; p < smoothedCounts.length; p++) {
int thisY = getY(p);
if (thisY != lastY && possibleLineData.size() > 0) {
// System.out.println("Drawing cache from "+lastY+" to "+thisY);
// We need to work out which data we're going to plot. To keep the scaling looking right
// the way we do this is to correlate each of the possible datasets to the average trace we
// produced and take the most representative one.
float[] dataToPlot;
// Take the easy case first
if (possibleLineData.size() == 1) {
dataToPlot = possibleLineData.elementAt(0);
} else {
// We have to work out the best one to take.
dataToPlot = possibleLineData.elementAt(0);
// Start with a value we'll have to be better than!
float bestR = -2;
for (int i = 0; i < possibleLineData.size(); i++) {
try {
float thisR = PearsonCorrelation.calculateCorrelation(runningFloats, possibleLineData.elementAt(i));
if (thisR > bestR) {
bestR = thisR;
dataToPlot = possibleLineData.elementAt(i);
}
} catch (SeqMonkException sme) {
}
}
}
int lastX = 10;
for (int c = 0; c < dataToPlot.length; c++) {
Color color;
if ((dataToPlot[c]) > maxIntensity) {
color = colors[255];
} else if (prefs.useLogScale) {
int index = (int) ((255 * Math.log((dataToPlot[c]) + 1)) / Math.log(maxIntensity + 1));
// System.out.println("Log index of "+(dataToPlot[c]/runningCount)+" vs "+maxCount+" is "+index);
color = colors[index];
} else {
int index = (int) (255 * (dataToPlot[c]) / maxIntensity);
// System.out.println("Index of "+(dataToPlot[c]/runningCount)+" vs "+maxCount+" is "+index);
color = colors[index];
}
g.setColor(color);
int thisX = 10;
double xProportion = (double) c / (smoothedCounts[p].length - 1);
thisX += (int) (xProportion * (getWidth() - 20));
if (thisX == lastX) {
// System.out.println("Skipping position with no space");
continue;
}
g.fillRect(lastX, lastY, thisX - lastX, thisY - lastY);
lastX = thisX;
}
lastY = thisY;
for (int i = 0; i < runningFloats.length; i++) {
runningFloats[i] = 0;
}
possibleLineData.clear();
} else {
for (int i = 0; i < runningFloats.length; i++) {
runningFloats[i] = smoothedCounts[p][i];
}
possibleLineData.add(smoothedCounts[p]);
}
}
// TODO: Print the left over probe?
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class HeatmapMatrix method run.
public void run() {
// First calculate chromosome offsets so we can access the
// relevant probes more quickly when we come to adding other
// end data.
Hashtable<String, Integer> offsets = new Hashtable<String, Integer>();
offsets.put(probes[0].probe.chromosome().name(), 0);
for (int i = 1; i < probes.length; i++) {
if (probes[i].probe.chromosome() != probes[i - 1].probe.chromosome()) {
offsets.put(probes[i].probe.chromosome().name(), i);
}
}
// We also need an initial list of total counts for all of our probes
// so we can do the O/E calculations. We can incorporate into this
// the ability to correct for linkage.
int[] totalCisCounts = new int[probes.length];
int[] totalTransCounts = new int[probes.length];
getTotalCounts(totalCisCounts, totalTransCounts);
if (cancel) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressCancelled();
}
return;
}
// We'll make up an ever expanding list of interactions which we
// care about
Vector<InteractionProbePair> filteredInteractions = new Vector<InteractionProbePair>();
// We'll need to correct for the number of tests we perform, but we're also able
// to skip a load of tests based on the initial filters we supplied (non-statistical ones)
// so we're going to keep a record of how many valid tests we actually need to correct for.
//
// The worse case scenario is that we do every possible comparison (but only one way round).
// After some testing this proved to be a bad idea. We skipped so many tests where the
// interaction wasn't observed that we ended up hugely under-correcting our final p-values
// and making a load of false positive predictions. We can do this more correctly later,
// but for now we're either going to correct for every possible test, or for just every
// possible cis test if we're excluding trans hits.
long numberOfTestsPerformed = 0;
if (initialMaxDistance > 0) {
// We're just counting cis interactions
int currentChrCount = 1;
Chromosome currentChr = probes[0].probe.chromosome();
for (int p = 1; p < probes.length; p++) {
if (probes[p].probe.chromosome() == currentChr) {
++currentChrCount;
} else {
numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
currentChrCount = 1;
currentChr = probes[p].probe.chromosome();
}
}
numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
} else {
numberOfTestsPerformed = (probes.length * ((long) probes.length - 1)) / 2;
}
for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressCancelled();
}
return;
}
}
// System.err.println("Getting interactions for "+probes[p].probe);
// We temporarily store the interactions with this probe which means we
// can make a decision about which ones we're keeping as we go along which
// drastically reduces the amount we need to store.
// This is going to be the data structure which holds the information
// on the pairs of probes which have any interactions. The key is the
// index position of the pair (x+(y*no of probes), and the value is the
// number of times this interaction was seen
Hashtable<Long, Integer> interactionCounts = new Hashtable<Long, Integer>();
HiCHitCollection hiCHits = dataSet.getHiCReadsForProbe(probes[p].probe);
String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
// System.err.println("Found hits on "+chromosomeNames.length+" chromosomes");
CHROM: for (int c = 0; c < chromosomeNames.length; c++) {
// Skip all trans reads if there is a max distance set.
if (initialMaxDistance > 0) {
if (!probes[p].probe.chromosome().name().equals(chromosomeNames[c])) {
continue;
}
}
long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
SequenceRead.sort(hitReads);
// We're going to start when we hit the probes for this chromosome
if (!offsets.containsKey(chromosomeNames[c])) {
// System.err.println("No probes on chr "+chromosomeNames[c]+" skipping");
continue;
}
int lastIndex = offsets.get(chromosomeNames[c]);
READ: for (int o = 0; o < hitReads.length; o++) {
if (cancel) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressCancelled();
}
return;
}
// Check against the relevant reads
int startIndex = lastIndex;
for (int x = startIndex; x < probes.length; x++) {
// Check that we're on the right chromosome
if (!probes[x].probe.chromosome().name().equals(chromosomeNames[c])) {
// System.err.println("Stopping searching at position "+x+" since we changed to chr "+probes[x].probe.chromosome());
continue CHROM;
}
if (probes[x].probe.start() > SequenceRead.end(hitReads[o])) {
// to the next read.
continue READ;
}
if (SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
// We overlap with this probe
// System.err.println("Found hit to probe "+probes[x].probe);
// For probe list probes we could have the same probe several times
// so we can add the read to all of the probes which are the same
// from this point.
// We can skip over interactions where the matched index is less than our index
// since we'll duplicate this later anyway.
long interactionKey = p + (x * (long) probes.length);
if (!interactionCounts.containsKey(interactionKey)) {
if (probes[p].index < probes[x].index) {
interactionCounts.put(interactionKey, 0);
}
// else {
// System.err.println("Skipping earlier probe hit");
// }
}
// Add one to our counts for this interaction
if (probes[p].index < probes[x].index) {
interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
}
// Since we found our first hit here we can start looking from here next time.
lastIndex = x;
// this one.
for (x = x + 1; x < probes.length; x++) {
if (probes[x].probe.chromosome() != probes[x - 1].probe.chromosome()) {
// We've reached the end for this chromosome
break;
}
if (probes[x].probe == probes[x - 1].probe || SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
if (probes[p].index >= probes[x].index)
continue;
interactionKey = p + (x * (long) probes.length);
if (!interactionCounts.containsKey(interactionKey)) {
if (interactionCounts.size() >= MAX_INTERACTIONS) {
continue READ;
}
interactionCounts.put(interactionKey, 0);
}
interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
} else {
break;
}
}
continue READ;
}
// End if overlaps
}
// End check each probe
}
// End each hit read
}
// End each hit chromosome
// We can now go through the interactions we saw and decide if we
// want to keep any of them.
HiCInteractionStrengthCalculator strengthCalc = new HiCInteractionStrengthCalculator(dataSet, correctLinkage);
Enumeration<Long> en = interactionCounts.keys();
while (en.hasMoreElements()) {
long index = en.nextElement();
int absoluteValue = interactionCounts.get(index);
if (absoluteValue < initialMinAbsolute)
continue;
ProbeWithIndex probe1 = probes[(int) (index % probes.length)];
ProbeWithIndex probe2 = probes[(int) (index / probes.length)];
// We calculate the obs/exp based on the total pair count
// and the relative counts at each end of the interaction
// Do the interaction strength calculation
strengthCalc.calculateInteraction(absoluteValue, totalCisCounts[probe1.index], totalTransCounts[probe1.index], totalCisCounts[probe2.index], totalTransCounts[probe2.index], probe1.probe, probe2.probe);
// We're not counting this here any more. We work out theoretical numbers at the top instead
// ++numberOfTestsPerformed;
float obsExp = (float) strengthCalc.obsExp();
float pValue = (float) strengthCalc.rawPValue();
// interaction objects we have to create.
if (obsExp < initialMinStrength)
continue;
// This isn't the final p-value check, but if the raw pvalue fails then the corrected value is never going to pass.
if (initialMaxSignificance < 1 && pValue > initialMaxSignificance)
continue;
InteractionProbePair interaction = new InteractionProbePair(probe1.probe, probe1.index, probe2.probe, probe2.index, obsExp, absoluteValue);
interaction.setSignificance(pValue);
// We check against the current list of filters
if (passesCurrentFilters(interaction)) {
// See if the strength of any of the interactions is bigger than our current max
if (obsExp > maxValue)
maxValue = obsExp;
if (filteredInteractions.size() >= MAX_INTERACTIONS) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressWarningReceived(new SeqMonkException("More than " + MAX_INTERACTIONS + " interactions passed the filters. Showing as many as I can"));
}
} else {
filteredInteractions.add(interaction);
}
}
}
}
// Put the interactions which worked into an Array
interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
// System.err.println("Found a set of "+interactions.length+" initially filtered interactions");
// Apply multiple testing correction
Arrays.sort(interactions, new Comparator<InteractionProbePair>() {
public int compare(InteractionProbePair o1, InteractionProbePair o2) {
return Float.compare(o1.signficance(), o2.signficance());
}
});
for (int i = 0; i < interactions.length; i++) {
interactions[i].setSignificance(interactions[i].signficance() * ((float) (numberOfTestsPerformed) / (i + 1)));
// We can't allow corrected p-values to end up in a different order to the uncorrected ones
if (i > 0 && interactions[i].signficance() < interactions[i - 1].signficance()) {
interactions[i].setSignificance(interactions[i - 1].signficance());
}
}
// Now re-filter the interactions to get those which finally pass the p-value filter
filteredInteractions.clear();
for (int i = 0; i < interactions.length; i++) {
if (initialMaxSignificance >= 1 || interactions[i].signficance() < initialMaxSignificance) {
filteredInteractions.add(interactions[i]);
} else {
break;
}
}
// Put the interactions which worked into an Array
interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
// System.err.println("Found a set of "+interactions.length+" interactions after multiple testing correction");
// We've processed all of the reads and the matrix is complete. We can therefore
// draw the plot
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressComplete("heatmap", null);
}
}
Aggregations