use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class ProbeNameFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
queries = optionsPanel.queriesArea.getText().split("\n");
stripSuffixes = optionsPanel.stripSuffixesBox.isSelected();
stripTranscript = optionsPanel.stripTranscriptBox.isSelected();
caseInsensitive = optionsPanel.caseInsensitiveBox.isSelected();
ProbeList passedProbes = new ProbeList(startingList, "", "", startingList.getValueName());
// We start by building a list of query strings we're going to
// check against.
HashSet<String> queryStrings = new HashSet<String>();
for (int q = 0; q < queries.length; q++) {
String query = queries[q].trim();
if (caseInsensitive) {
query = query.toLowerCase();
}
if (stripSuffixes) {
query = query.replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
}
if (stripTranscript) {
query = query.replaceAll("-\\d\\d\\d$", "");
}
// System.err.println("Adding query term "+query);
queryStrings.add(query);
}
Probe[] probes = startingList.getAllProbes();
// We can now step through the probes looking for a match to the stored feature names
for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
progressUpdated("Filtering probes", p, probes.length);
}
if (cancel) {
cancel = false;
progressCancelled();
return;
}
String name = probes[p].name();
if (caseInsensitive) {
name = name.toLowerCase();
}
if (stripSuffixes) {
name = name.replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
}
if (stripTranscript) {
name = name.replaceAll("-\\d\\d\\d$", "");
}
if (queryStrings.contains(name)) {
passedProbes.addProbe(probes[p], startingList.getValueForProbe(probes[p]));
} else {
// System.err.println("No match for "+name);
}
}
filterFinished(passedProbes);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class ProportionOfLibraryStatisticsFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
fromStores = optionsPanel.fromStores();
toStores = optionsPanel.toStores();
// System.err.println("Found "+fromStores.length+" from stores and "+toStores.length+" to stores");
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
testForIncreasesOnly = optionsPanel.increasesOnlyBox.isSelected();
Probe[] probes = startingList.getAllProbes();
// We'll pull the number of probes to sample from the preferences if they've changed it
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
// We'll build up a set of p-values as we go along
float[] lowestPValues = new float[probes.length];
for (int p = 0; p < lowestPValues.length; p++) {
lowestPValues[p] = 1;
}
// Put something in the progress whilst we're ordering the probe values to make
// the comparison.
progressUpdated("Generating background model", 0, 1);
try {
for (int f = 0; f < fromStores.length; f++) {
for (int t = 0; t < toStores.length; t++) {
progressUpdated("Comparing " + fromStores[f] + " to " + toStores[t], 0, 1);
// We need to work out the total counts in the probes we're using
int fromTotalCount = 0;
for (int p = 0; p < probes.length; p++) {
fromTotalCount += (int) fromStores[f].getValueForProbe(probes[p]);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
}
int toTotalCount = 0;
for (int p = 0; p < probes.length; p++) {
toTotalCount += (int) toStores[t].getValueForProbe(probes[p]);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
}
for (int p = 0; p < probes.length; p++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
int n11 = (int) fromStores[f].getValueForProbe(probes[p]);
int n12 = fromTotalCount - n11;
int n21 = (int) toStores[f].getValueForProbe(probes[p]);
int n22 = toTotalCount - n21;
double[] pValues = FishersExactTest.fishersExactTest(n11, n12, n21, n22);
// The values in the array are 0=2-sided p-value, 1=left-sided p-value, 2=right-sided p-value
if (testForIncreasesOnly) {
if (pValues[1] < lowestPValues[p])
lowestPValues[p] = (float) pValues[1];
} else {
if (pValues[0] < lowestPValues[p])
lowestPValues[p] = (float) pValues[0];
}
}
}
}
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
}
if (applyMultipleTestingCorrection) {
ProbeTTestValue[] statsValues = new ProbeTTestValue[probes.length];
for (int i = 0; i < probes.length; i++) {
statsValues[i] = new ProbeTTestValue(probes[i], lowestPValues[i]);
}
BenjHochFDR.calculateQValues(statsValues);
for (int i = 0; i < statsValues.length; i++) {
if (statsValues[i].q < pValueLimit) {
newList.addProbe(statsValues[i].probe, (float) statsValues[i].q);
}
}
} else {
for (int i = 0; i < lowestPValues.length; i++) {
if (lowestPValues[i] < pValueLimit) {
newList.addProbe(probes[i], lowestPValues[i]);
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class ReplicateSetStatsFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
// Make up the list of DataStores in each replicate set
DataStore[][] stores = new DataStore[replicateSets.length][];
for (int i = 0; i < replicateSets.length; i++) {
stores[i] = replicateSets[i].dataStores();
}
Vector<ProbeTTestValue> newListProbesVector = new Vector<ProbeTTestValue>();
for (int c = 0; c < chromosomes.length; c++) {
progressUpdated("Processing probes on Chr" + chromosomes[c].name(), c, chromosomes.length);
Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
for (int p = 0; p < probes.length; p++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
double[][] values = new double[replicateSets.length][];
for (int i = 0; i < replicateSets.length; i++) {
values[i] = new double[stores[i].length];
for (int j = 0; j < stores[i].length; j++) {
try {
values[i][j] = stores[i][j].getValueForProbe(probes[p]);
} catch (SeqMonkException e) {
}
}
}
double pValue = 0;
try {
if (replicateSets.length == 1) {
pValue = TTest.calculatePValue(values[0], 0);
} else if (replicateSets.length == 2) {
pValue = TTest.calculatePValue(values[0], values[1]);
} else {
pValue = AnovaTest.calculatePValue(values);
}
} catch (SeqMonkException e) {
throw new IllegalStateException(e);
}
newListProbesVector.add(new ProbeTTestValue(probes[p], pValue));
}
}
ProbeTTestValue[] newListProbes = newListProbesVector.toArray(new ProbeTTestValue[0]);
// Do the multi-testing correction if necessary
if (multiTest) {
BenjHochFDR.calculateQValues(newListProbes);
}
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "Q-value");
for (int i = 0; i < newListProbes.length; i++) {
if (newListProbes[i].q <= cutoff) {
newList.addProbe(newListProbes[i].probe, new Float(newListProbes[i].q));
}
}
} else {
newList = new ProbeList(startingList, "", "", "P-value");
for (int i = 0; i < newListProbes.length; i++) {
if (newListProbes[i].p <= cutoff) {
newList.addProbe(newListProbes[i].probe, new Float(newListProbes[i].p));
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class ChiSquareFilterFrontBack method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
if (options.stringencyField.getText().length() == 0) {
stringency = 0.05;
} else {
stringency = Double.parseDouble(options.stringencyField.getText());
}
if (options.minObservationsField.getText().length() == 0) {
minObservations = 10;
} else {
minObservations = Integer.parseInt(options.minObservationsField.getText());
}
if (options.minDifferenceField.getText().length() == 0) {
minPercentShift = 10;
} else {
minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
}
QuantitationStrandType readFilter = (QuantitationStrandType) options.strandLimitBox.getSelectedItem();
applyMultipleTestingCorrection = options.multiTestBox.isSelected();
ProbeList newList;
if (applyMultipleTestingCorrection) {
newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
} else {
newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
}
Probe[] probes = startingList.getAllProbes();
int[][] frontBackCounts = new int[stores.length][2];
// This is where we'll store any hits
Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
PROBE: for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
cancel = false;
progressCancelled();
return;
}
Probe frontProbe;
Probe backProbe;
if (probes[p].strand() == Location.REVERSE) {
backProbe = new Probe(probes[p].chromosome(), probes[p].start(), probes[p].middle(), probes[p].strand());
frontProbe = new Probe(probes[p].chromosome(), probes[p].middle(), probes[p].end(), probes[p].strand());
} else {
frontProbe = new Probe(probes[p].chromosome(), probes[p].start(), probes[p].middle(), probes[p].strand());
backProbe = new Probe(probes[p].chromosome(), probes[p].middle(), probes[p].end(), probes[p].strand());
}
// For each dataset make up a list of forward and reverse probes under this probe
for (int d = 0; d < stores.length; d++) {
long[] frontReads = stores[d].getReadsForProbe(frontProbe);
long[] backReads = stores[d].getReadsForProbe(backProbe);
frontBackCounts[d][0] = 0;
frontBackCounts[d][1] = 0;
for (int r = 0; r < frontReads.length; r++) {
if (readFilter.useRead(frontProbe, frontReads[r]))
++frontBackCounts[d][0];
}
for (int r = 0; r < backReads.length; r++) {
if (readFilter.useRead(backProbe, backReads[r]))
++frontBackCounts[d][1];
}
// System.err.println("Datset = "+stores[d].name()+" Front counts="+frontBackCounts[d][0]+" Back counts="+frontBackCounts[d][1]);
}
// See if we have enough counts and difference to go on with this
double minPercent = 0;
double maxPercent = 0;
for (int d = 0; d < stores.length; d++) {
if (frontBackCounts[d][0] < minObservations) {
// System.err.println("Not enough counts to test");
continue PROBE;
}
double percent = (((double) frontBackCounts[d][0]) / (frontBackCounts[d][0] + frontBackCounts[d][1])) * 100;
if (d == 0 || percent < minPercent)
minPercent = percent;
if (d == 0 || percent > maxPercent)
maxPercent = percent;
}
if (maxPercent - minPercent < minPercentShift) {
// System.err.println("Not enough difference to test");
continue PROBE;
}
// Now perform the Chi-Square test.
double pValue = ChiSquareTest.chiSquarePvalue(frontBackCounts);
// System.err.println("Raw p-value="+pValue);
// Store this as a potential hit (after correcting p-values later)
hits.add(new ProbeTTestValue(probes[p], pValue));
}
// Now we can correct the p-values if we need to
ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(rawHits);
}
for (int h = 0; h < rawHits.length; h++) {
if (applyMultipleTestingCorrection) {
if (rawHits[h].q < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
}
} else {
if (rawHits[h].p < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class HeatmapMatrix method createProbeListsFromClusters.
public ProbeList createProbeListsFromClusters(int minClusterSize, int startIndex, int endIndex) {
if (cluster == null)
return null;
ClusterPair[] connectedClusters = cluster.getConnectedClusters(currentClusterRValue);
ProbeList commonList = findCommonProbeListParent();
ProbeList allClusterList = new ProbeList(commonList, "HiC Clusters", "HiC Clusters with R > " + commonList, null);
HashSet<Probe> allClusterProbes = new HashSet<Probe>();
// Now we need to work our way through the connected clusters
// to make the appropriate sub-lists
// Make up the same initial list of probes as before
Vector<Probe> originallyOrderedProbes = new Vector<Probe>();
for (int l = 0; l < probeLists.length; l++) {
Probe[] theseProbes = probeLists[l].getAllProbes();
for (int p = 0; p < theseProbes.length; p++) {
originallyOrderedProbes.add(theseProbes[p]);
}
}
int currentPosition = 0;
for (int subListIndex = 0; subListIndex < connectedClusters.length; subListIndex++) {
Integer[] indices = connectedClusters[subListIndex].getAllIndices();
currentPosition += indices.length;
if (currentPosition - indices.length < startIndex)
continue;
if (currentPosition > endIndex)
break;
// We may get rid of the list later if there are duplicates in it.
if (indices.length < minClusterSize)
continue;
Probe[] theseProbes = new Probe[indices.length];
for (int i = 0; i < theseProbes.length; i++) {
theseProbes[i] = originallyOrderedProbes.elementAt(indices[i]);
}
Arrays.sort(theseProbes);
// Now find the non-redundant count
int nonRedCount = 1;
for (int i = 1; i < theseProbes.length; i++) {
if (theseProbes[i] != theseProbes[i - 1]) {
nonRedCount++;
}
}
// There aren't enough different probes to keep this set.
if (nonRedCount < minClusterSize)
continue;
ProbeList thisList = new ProbeList(allClusterList, "Cluster " + (subListIndex + 1), "HiC cluster list number " + (subListIndex + 1), "R-value");
float rValue = connectedClusters[subListIndex].rValue();
thisList.addProbe(theseProbes[0], rValue);
if (!allClusterProbes.contains(theseProbes[0])) {
allClusterList.addProbe(theseProbes[0], null);
allClusterProbes.add(theseProbes[0]);
}
for (int i = 1; i < theseProbes.length; i++) {
if (theseProbes[i] == theseProbes[i - 1])
continue;
thisList.addProbe(theseProbes[i], rValue);
if (allClusterProbes.contains(theseProbes[i])) {
continue;
}
allClusterList.addProbe(theseProbes[i], null);
allClusterProbes.add(theseProbes[i]);
}
}
return allClusterList;
}
Aggregations