use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class ValuesFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
Probe[] probes = startingList.getAllProbes();
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
for (int p = 0; p < probes.length; p++) {
progressUpdated(p, probes.length);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
int count = 0;
for (int s = 0; s < stores.length; s++) {
float d = 0;
if (!stores[s].hasValueForProbe(probes[p]))
continue;
try {
d = stores[s].getValueForProbe(probes[p]);
} catch (SeqMonkException e) {
e.printStackTrace();
continue;
}
// NaN values always fail the filter.
if (Float.isNaN(d))
continue;
// Now we have the value we need to know if it passes the test
if (upperLimit != null)
if (d > upperLimit)
continue;
if (lowerLimit != null)
if (d < lowerLimit)
continue;
// This one passes, we can add it to the count
++count;
}
// probe to the probe set.
switch(limitType) {
case EXACTLY:
if (count == chosenNumber)
newList.addProbe(probes[p], null);
break;
case AT_LEAST:
if (count >= chosenNumber)
newList.addProbe(probes[p], null);
break;
case NO_MORE_THAN:
if (count <= chosenNumber)
newList.addProbe(probes[p], null);
break;
}
}
newList.setName("Value between " + lowerLimit + "-" + upperLimit);
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class WindowedReplicateStatsFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<ProbeGroupTTestValue> newListProbesVector = new Vector<ProbeGroupTTestValue>();
for (int c = 0; c < chromosomes.length; c++) {
progressUpdated("Processing windows on Chr" + chromosomes[c].name(), c, chromosomes.length);
Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
ProbeGroupGenerator gen = null;
if (windowType == DISTANCE_WINDOW) {
gen = new ProbeWindowGenerator(probes, windowSize);
} else if (windowType == CONSECUTIVE_WINDOW) {
gen = new ConsecutiveProbeGenerator(probes, windowSize);
} else if (windowType == FEATURE_WINDOW) {
gen = new FeatureProbeGroupGenerator(probes, collection.genome().annotationCollection().getFeaturesForType(optionsPanel.featureTypeBox.getSelectedItem().toString()));
}
while (true) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
Probe[] theseProbes = gen.nextSet();
if (theseProbes == null) {
// System.err.println("List of probes was null");
break;
}
// We need at least 3 probes in a set to calculate a p-value
if (theseProbes.length < 3) {
// System.err.println("Only "+theseProbes.length+" probes in the set");
continue;
}
double[][] values = new double[stores.length][];
for (int j = 0; j < stores.length; j++) {
if (splitReplicateSets & stores[j] instanceof ReplicateSet) {
values[j] = new double[theseProbes.length * ((ReplicateSet) stores[j]).dataStores().length];
} else {
values[j] = new double[theseProbes.length];
}
}
for (int j = 0; j < stores.length; j++) {
int index = 0;
for (int i = 0; i < theseProbes.length; i++) {
try {
if (splitReplicateSets & stores[j] instanceof ReplicateSet) {
DataStore[] localStores = ((ReplicateSet) stores[j]).dataStores();
for (int l = 0; l < localStores.length; l++) {
values[j][index] = localStores[l].getValueForProbe(theseProbes[i]);
index++;
}
} else {
values[j][index] = stores[j].getValueForProbe(theseProbes[i]);
index++;
}
} catch (SeqMonkException e) {
}
}
if (index != values[j].length) {
throw new IllegalStateException("Didn't fill all values total=" + values[j].length + " index=" + index);
}
}
double pValue = 0;
try {
if (stores.length == 1) {
pValue = TTest.calculatePValue(values[0], 0);
} else if (stores.length == 2) {
pValue = TTest.calculatePValue(values[0], values[1]);
} else {
pValue = AnovaTest.calculatePValue(values);
}
} catch (SeqMonkException e) {
throw new IllegalStateException(e);
}
newListProbesVector.add(new ProbeGroupTTestValue(theseProbes, pValue));
}
}
ProbeGroupTTestValue[] newListProbes = newListProbesVector.toArray(new ProbeGroupTTestValue[0]);
// Do the multi-testing correction if necessary
if (multiTest) {
BenjHochFDR.calculateQValues(newListProbes);
}
ProbeList newList;
// We need to handle duplicate hits internally since probe lists can't do
// this themselves any more.
Hashtable<Probe, Float> newListTemp = new Hashtable<Probe, Float>();
if (multiTest) {
newList = new ProbeList(startingList, "", "", "Q-value");
for (int i = 0; i < newListProbes.length; i++) {
if (newListProbes[i].q <= cutoff) {
Probe[] passedProbes = newListProbes[i].probes;
for (int p = 0; p < passedProbes.length; p++) {
if (newListTemp.containsKey(passedProbes[p])) {
// We always give a probe the lowest possible q-value
if (newListTemp.get(passedProbes[p]) <= newListProbes[i].q) {
continue;
}
}
newListTemp.put(passedProbes[p], (float) newListProbes[i].q);
}
}
}
} else {
newList = new ProbeList(startingList, "", "", "P-value");
for (int i = 0; i < newListProbes.length; i++) {
if (newListProbes[i].p <= cutoff) {
Probe[] passedProbes = newListProbes[i].probes;
for (int p = 0; p < passedProbes.length; p++) {
if (newListTemp.containsKey(passedProbes[p])) {
// We always give a probe the lowest possible p-value
if (newListTemp.get(passedProbes[p]) <= newListProbes[i].p) {
continue;
}
}
newListTemp.put(passedProbes[p], (float) newListProbes[i].p);
}
}
}
}
// Add the cached hits to the new list
Enumeration<Probe> en = newListTemp.keys();
while (en.hasMoreElements()) {
Probe p = en.nextElement();
newList.addProbe(p, newListTemp.get(p));
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class WindowedValuesFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
for (int c = 0; c < chromosomes.length; c++) {
HashSet<Probe> passedProbes = new HashSet<Probe>();
progressUpdated("Processing windows on Chr" + chromosomes[c].name(), c, chromosomes.length);
Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
ProbeGroupGenerator gen = null;
if (windowType == DISTANCE_WINDOW) {
gen = new ProbeWindowGenerator(probes, windowSize);
} else if (windowType == CONSECUTIVE_WINDOW) {
gen = new ConsecutiveProbeGenerator(probes, windowSize);
} else if (windowType == FEATURE_WINDOW) {
gen = new FeatureProbeGroupGenerator(probes, collection.genome().annotationCollection().getFeaturesForType(optionsPanel.featureTypeBox.getSelectedItem().toString()));
} else {
progressExceptionReceived(new SeqMonkException("No window type known with number " + windowType));
return;
}
while (true) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
Probe[] theseProbes = gen.nextSet();
if (theseProbes == null) {
break;
}
int count = 0;
for (int s = 0; s < stores.length; s++) {
double totalValue = 0;
for (int i = 0; i < theseProbes.length; i++) {
// Get the values for the probes in this set
try {
totalValue += stores[s].getValueForProbe(theseProbes[i]);
} catch (SeqMonkException e) {
}
}
totalValue /= theseProbes.length;
// Now we have the value we need to know if it passes the test
if (upperLimit != null)
if (totalValue > upperLimit.doubleValue())
continue;
if (lowerLimit != null)
if (totalValue < lowerLimit.doubleValue())
continue;
// This one passes, we can add it to the count
++count;
}
// probe to the probe set.
switch(limitType) {
case EXACTLY:
if (count == storesLimit)
for (int i = 0; i < theseProbes.length; i++) {
if (passedProbes.contains(theseProbes[i]))
continue;
newList.addProbe(theseProbes[i], null);
passedProbes.add(theseProbes[i]);
}
break;
case AT_LEAST:
if (count >= storesLimit)
for (int i = 0; i < theseProbes.length; i++) {
if (passedProbes.contains(theseProbes[i]))
continue;
newList.addProbe(theseProbes[i], null);
passedProbes.add(theseProbes[i]);
}
break;
case NO_MORE_THAN:
if (count <= storesLimit)
for (int i = 0; i < theseProbes.length; i++) {
if (passedProbes.contains(theseProbes[i]))
continue;
newList.addProbe(theseProbes[i], null);
passedProbes.add(theseProbes[i]);
}
break;
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class AntisenseTranscriptionPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
double pValue = optionsPanel.pValue();
QuantitationStrandType readFilter = optionsPanel.readFilter();
long[] senseCounts = new long[data.length];
long[] antisenseCounts = new long[data.length];
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Getting total antisense rate for chr" + chrs[c].name(), c, chrs.length * 2);
Feature[] features = getValidFeatures(chrs[c]);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
probes.add(p);
for (int d = 0; d < data.length; d++) {
long[] reads = data[d].getReadsForProbe(p);
for (int r = 0; r < reads.length; r++) {
if (readFilter.useRead(p, reads[r])) {
senseCounts[d] += SequenceRead.length(reads[r]);
} else {
antisenseCounts[d] += SequenceRead.length(reads[r]);
}
}
}
}
}
Probe[] allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
// Now we can work out the overall antisense rate
double[] antisenseProbability = new double[data.length];
for (int d = 0; d < data.length; d++) {
System.err.println("Antisense counts are " + antisenseCounts[d] + " sense counts are " + senseCounts[d]);
antisenseProbability[d] = antisenseCounts[d] / (double) (antisenseCounts[d] + senseCounts[d]);
System.err.println("Antisense probability for " + data[d].name() + " is " + antisenseProbability[d]);
}
// Now we can quantitate each individual feature and test for whether it is significantly
// showing antisense expression
ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
for (int d = 0; d < data.length; d++) {
significantProbes.add(new Vector<ProbeTTestValue>());
}
int[] readLengths = new int[data.length];
for (int d = 0; d < readLengths.length; d++) {
readLengths[d] = data[d].getMaxReadLength();
System.err.println("For " + data[d].name() + " max read len is " + readLengths[d]);
}
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
Probe[] thisChrProbes = collection().probeSet().getProbesForChromosome(chrs[c]);
for (int p = 0; p < thisChrProbes.length; p++) {
for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
long senseCount = 0;
long antisenseCount = 0;
long[] reads = data[d].getReadsForProbe(thisChrProbes[p]);
for (int r = 0; r < reads.length; r++) {
if (readFilter.useRead(thisChrProbes[p], reads[r])) {
// TODO: Just count overlap?
senseCount += SequenceRead.length(reads[r]);
} else {
antisenseCount += SequenceRead.length(reads[r]);
}
}
// if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
// System.err.println("Raw base counts are sense="+senseCount+" anti="+antisenseCount+" from "+reads.length+" reads");
// }
int senseReads = (int) (senseCount / readLengths[d]);
int antisenseReads = (int) (antisenseCount / readLengths[d]);
// if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
// System.err.println("Raw read counts are sense="+senseReads+" anti="+antisenseReads+" from "+reads.length+" reads");
// }
BinomialDistribution bd = new BinomialDistribution(senseReads + antisenseReads, antisenseProbability[d]);
// Since the binomial distribution gives the probability of getting a value higher than
// this we need to subtract one so we get the probability of this or higher.
double thisPValue = 1 - bd.cumulativeProbability(antisenseReads - 1);
if (antisenseReads == 0)
thisPValue = 1;
// We have to add all results at this stage so we don't mess up the multiple
// testing correction later on.
significantProbes.get(d).add(new ProbeTTestValue(thisChrProbes[p], thisPValue));
double expected = ((senseReads + antisenseReads) * antisenseProbability[d]);
if (expected < 1)
expected = 1;
float obsExp = antisenseReads / (float) expected;
data[d].setValueForProbe(thisChrProbes[p], obsExp);
}
}
}
// filtering those which pass our p-value cutoff
for (int d = 0; d < data.length; d++) {
ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
BenjHochFDR.calculateQValues(ttestResults);
ProbeList newList = new ProbeList(collection().probeSet(), "Antisense < " + pValue + " in " + data[d].name(), "Probes showing significant antisense transcription from a basal level of " + antisenseProbability[d] + " with a cutoff of " + pValue, "FDR");
for (int i = 0; i < ttestResults.length; i++) {
if (ttestResults[i].probe.name().equals("RP4-798A10.2")) {
System.err.println("Raw p=" + ttestResults[i].p + " q=" + ttestResults[i].q);
}
if (ttestResults[i].q < pValue) {
newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
}
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Antisense transcription pipeline quantitation ");
quantitationDescription.append(". Directionality was ");
quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
if (optionsPanel.ignoreOverlaps()) {
quantitationDescription.append(". Ignoring existing overlaps");
}
quantitationDescription.append(". P-value cutoff was ");
quantitationDescription.append(optionsPanel.pValue());
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class ProbeLengthFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
Probe[] probes = startingList.getAllProbes();
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
for (int p = 0; p < probes.length; p++) {
progressUpdated(p, probes.length);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (upperLimit != null)
if (probes[p].length() > upperLimit)
continue;
if (lowerLimit != null)
if (probes[p].length() < lowerLimit)
continue;
newList.addProbe(probes[p], null);
}
newList.setName("Length between " + lowerLimit + "-" + upperLimit);
filterFinished(newList);
}
Aggregations