use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class AntisenseTranscriptionPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
double pValue = optionsPanel.pValue();
QuantitationStrandType readFilter = optionsPanel.readFilter();
long[] senseCounts = new long[data.length];
long[] antisenseCounts = new long[data.length];
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Getting total antisense rate for chr" + chrs[c].name(), c, chrs.length * 2);
Feature[] features = getValidFeatures(chrs[c]);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
probes.add(p);
for (int d = 0; d < data.length; d++) {
long[] reads = data[d].getReadsForProbe(p);
for (int r = 0; r < reads.length; r++) {
if (readFilter.useRead(p, reads[r])) {
senseCounts[d] += SequenceRead.length(reads[r]);
} else {
antisenseCounts[d] += SequenceRead.length(reads[r]);
}
}
}
}
}
Probe[] allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
// Now we can work out the overall antisense rate
double[] antisenseProbability = new double[data.length];
for (int d = 0; d < data.length; d++) {
System.err.println("Antisense counts are " + antisenseCounts[d] + " sense counts are " + senseCounts[d]);
antisenseProbability[d] = antisenseCounts[d] / (double) (antisenseCounts[d] + senseCounts[d]);
System.err.println("Antisense probability for " + data[d].name() + " is " + antisenseProbability[d]);
}
// Now we can quantitate each individual feature and test for whether it is significantly
// showing antisense expression
ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
for (int d = 0; d < data.length; d++) {
significantProbes.add(new Vector<ProbeTTestValue>());
}
int[] readLengths = new int[data.length];
for (int d = 0; d < readLengths.length; d++) {
readLengths[d] = data[d].getMaxReadLength();
System.err.println("For " + data[d].name() + " max read len is " + readLengths[d]);
}
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
Probe[] thisChrProbes = collection().probeSet().getProbesForChromosome(chrs[c]);
for (int p = 0; p < thisChrProbes.length; p++) {
for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
long senseCount = 0;
long antisenseCount = 0;
long[] reads = data[d].getReadsForProbe(thisChrProbes[p]);
for (int r = 0; r < reads.length; r++) {
if (readFilter.useRead(thisChrProbes[p], reads[r])) {
// TODO: Just count overlap?
senseCount += SequenceRead.length(reads[r]);
} else {
antisenseCount += SequenceRead.length(reads[r]);
}
}
// if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
// System.err.println("Raw base counts are sense="+senseCount+" anti="+antisenseCount+" from "+reads.length+" reads");
// }
int senseReads = (int) (senseCount / readLengths[d]);
int antisenseReads = (int) (antisenseCount / readLengths[d]);
// if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
// System.err.println("Raw read counts are sense="+senseReads+" anti="+antisenseReads+" from "+reads.length+" reads");
// }
BinomialDistribution bd = new BinomialDistribution(senseReads + antisenseReads, antisenseProbability[d]);
// Since the binomial distribution gives the probability of getting a value higher than
// this we need to subtract one so we get the probability of this or higher.
double thisPValue = 1 - bd.cumulativeProbability(antisenseReads - 1);
if (antisenseReads == 0)
thisPValue = 1;
// We have to add all results at this stage so we don't mess up the multiple
// testing correction later on.
significantProbes.get(d).add(new ProbeTTestValue(thisChrProbes[p], thisPValue));
double expected = ((senseReads + antisenseReads) * antisenseProbability[d]);
if (expected < 1)
expected = 1;
float obsExp = antisenseReads / (float) expected;
data[d].setValueForProbe(thisChrProbes[p], obsExp);
}
}
}
// filtering those which pass our p-value cutoff
for (int d = 0; d < data.length; d++) {
ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
BenjHochFDR.calculateQValues(ttestResults);
ProbeList newList = new ProbeList(collection().probeSet(), "Antisense < " + pValue + " in " + data[d].name(), "Probes showing significant antisense transcription from a basal level of " + antisenseProbability[d] + " with a cutoff of " + pValue, "FDR");
for (int i = 0; i < ttestResults.length; i++) {
if (ttestResults[i].probe.name().equals("RP4-798A10.2")) {
System.err.println("Raw p=" + ttestResults[i].p + " q=" + ttestResults[i].q);
}
if (ttestResults[i].q < pValue) {
newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
}
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Antisense transcription pipeline quantitation ");
quantitationDescription.append(". Directionality was ");
quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
if (optionsPanel.ignoreOverlaps()) {
quantitationDescription.append(". Ignoring existing overlaps");
}
quantitationDescription.append(". P-value cutoff was ");
quantitationDescription.append(optionsPanel.pValue());
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class IntronRegressionPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
int minDensity = optionsPanel.minDensity();
int minLength = optionsPanel.minLength();
double maxPValue = optionsPanel.maxPValue();
int binSize = optionsPanel.measurementBinSize();
QuantitationStrandType readFilter = optionsPanel.readFilter();
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
Vector<Probe> probesForThisChromosome = new Vector<Probe>();
progressUpdated("Making probes", c, chrs.length);
Feature[] features = getValidFeatures(chrs[c]);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
// There are no introns here
if (!(features[f].location() instanceof SplitLocation))
continue;
Location[] subLocations = ((SplitLocation) features[f].location()).subLocations();
// TODO: Reverse the subLocations if its a reverse feature
for (int intron = 1; intron < subLocations.length; intron++) {
int start = subLocations[intron - 1].end();
int end = subLocations[intron].start();
if ((end - start) + 1 < minLength) {
// This intron is too short.
continue;
}
// TODO: We could throw away any probes which didn't have enough reads in any feature
Probe p = new Probe(chrs[c], start, end, features[f].location().strand(), features[f].name() + "_" + intron);
probesForThisChromosome.add(p);
}
}
// Now we can deduplicate the probes for this chromosome and add them to the main collection
Probe[] dupProbes = probesForThisChromosome.toArray(new Probe[0]);
Arrays.sort(dupProbes);
for (int p = 0; p < dupProbes.length; p++) {
if (p > 0 && dupProbes[p].packedPosition() == dupProbes[p - 1].packedPosition())
continue;
probes.add(dupProbes[p]);
}
}
Probe[] allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
// Now we go back through the probes and quantitate them
for (int p = 0; p < allProbes.length; p++) {
if (cancel) {
progressCancelled();
return;
}
if (p % 1000 == 0) {
progressUpdated("Quantitated " + p + " out of " + allProbes.length + " probes", p, allProbes.length);
}
for (int d = 0; d < data.length; d++) {
long[] reads = data[d].getReadsForProbe(allProbes[p]);
int[] countsPerSite = new int[allProbes[p].length()];
int usableCounts = 0;
for (int r = 0; r < reads.length; r++) {
if (readFilter.useRead(allProbes[p], reads[r])) {
++usableCounts;
for (int pos = Math.max(0, SequenceRead.start(reads[r]) - allProbes[p].start()); pos <= Math.min(countsPerSite.length - 1, SequenceRead.end(reads[r]) - allProbes[p].start()); pos++) {
++countsPerSite[pos];
}
}
}
if (usableCounts / (allProbes[p].length() / 1000d) >= minDensity) {
// We're going to do a linear regression rather than a correlation
// We're analysing in bins so we'll work out the bin counts and
// add them dynamically to the regression.
SimpleRegression regression = new SimpleRegression();
int binCount = 0;
for (int i = 0; i < countsPerSite.length; i++) {
if (i > 0 && i % binSize == 0) {
regression.addData(i, binCount);
binCount = 0;
}
binCount += countsPerSite[i];
}
float slope = (float) (regression.getSlope() * 1000000);
double pValue = regression.getSignificance();
if (allProbes[p].strand() == Location.REVERSE) {
slope = 0 - slope;
}
if (pValue <= maxPValue) {
data[d].setValueForProbe(allProbes[p], slope);
} else {
data[d].setValueForProbe(allProbes[p], Float.NaN);
}
} else {
data[d].setValueForProbe(allProbes[p], Float.NaN);
}
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Intron regression pipeline quantitation ");
quantitationDescription.append(". Directionality was ");
quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
quantitationDescription.append(". Min intron length was ");
quantitationDescription.append(minLength);
quantitationDescription.append(". Min read density was ");
quantitationDescription.append(minDensity);
quantitationDescription.append(". Max slope p-value was ");
quantitationDescription.append(maxPValue);
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class ProbeWindowGenerator method nextSet.
/**
* Provides the next set of probe from this generator
*
* @return A list of probes in the next set.
*/
public Probe[] nextSet() {
if (startPos >= probes.length) {
return null;
}
int windowEnd = probes[startPos].start() + windowSize;
v.removeAllElements();
v.add(probes[startPos]);
Chromosome c = probes[startPos].chromosome();
for (int i = startPos + 1; i < probes.length; i++) {
if (probes[i].chromosome() != c)
break;
if (probes[i].start() > windowEnd)
break;
if (probes[i].end() <= windowEnd) {
v.add(probes[i]);
}
}
++startPos;
return v.toArray(new Probe[0]);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class HiCLengthHistogramPlot method getData.
/**
* Gets the data.
*
* @param pl the pl
* @return the data
*/
private float[] getData() {
Vector<Integer> data = new Vector<Integer>();
// If there is a probeset then we get pairs for the current probe list
if (probes != null) {
for (int p = 0; p < probes.length; p++) {
HiCHitCollection hiCHits = store.getHiCReadsForProbe(probes[p]);
long[] sourcePositions = hiCHits.getSourcePositionsForChromosome(hiCHits.getSourceChromosomeName());
long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
for (int r = 0; r < sourcePositions.length; r++) {
// Don't enter every pair twice
if (SequenceRead.compare(sourcePositions[r], hitPositions[r]) < 0)
continue;
data.add(SequenceRead.fragmentLength(sourcePositions[r], hitPositions[r]));
}
}
} else {
Chromosome[] chrs = SeqMonkApplication.getInstance().dataCollection().genome().getAllChromosomes();
for (int c1 = 0; c1 < chrs.length; c1++) {
HiCHitCollection hiCHits = store.getHiCReadsForChromosome(chrs[c1]);
long[] sourcePositions = hiCHits.getSourcePositionsForChromosome(hiCHits.getSourceChromosomeName());
long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
for (int r = 0; r < sourcePositions.length; r++) {
// Don't enter every pair twice
if (SequenceRead.compare(sourcePositions[r], hitPositions[r]) < 0)
continue;
data.add(SequenceRead.fragmentLength(sourcePositions[r], hitPositions[r]));
}
}
}
// Convert to float [] for the return array
float[] returnData = new float[data.size()];
int index = 0;
Enumeration<Integer> en = data.elements();
while (en.hasMoreElements()) {
returnData[index] = en.nextElement().floatValue();
index++;
}
data.clear();
return returnData;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class HeatmapGenomeWindow method actionPerformed.
/* (non-Javadoc)
* @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
*/
public void actionPerformed(ActionEvent ae) {
if (ae.getActionCommand().equals("close")) {
setVisible(false);
dispose();
} else if (ae.getActionCommand().equals("save_image")) {
ImageSaver.saveImage(heatmapPanel);
} else if (ae.getActionCommand().equals("cluster")) {
if (clusterButton.getText().equals("Cluster Interactions")) {
// First we need to make up an interaction cluster matrix which we
// can then feed to the generic hierarchical clustering program
InteractionClusterMatrix clusterMatrix = new InteractionClusterMatrix(matrix.filteredInteractions(), matrix.probeCount());
clusterMatrix.addListener(new ProgressDialog(this, "HiC Interaction Clustering", clusterMatrix));
clusterMatrix.addListener(this);
clusterMatrix.startCorrelating();
clusterButton.setEnabled(false);
} else {
matrix.setCluster(null);
saveClustersButton.setEnabled(false);
clusterButton.setText("Cluster Interactions");
}
} else if (ae.getActionCommand().equals("save_probes")) {
ProbeList newList = matrix.createProbeListFromCurrentInteractions();
// Ask for a name for the list
String groupName = null;
while (true) {
groupName = (String) JOptionPane.showInputDialog(this, "Enter list name", "Found " + newList.getAllProbes().length + " probes", JOptionPane.QUESTION_MESSAGE, null, null, newList.name());
if (groupName == null) {
// Since the list will automatically have been added to
// the ProbeList tree we actively need to delete it if
// they choose to cancel at this point.
newList.delete();
// They cancelled
return;
}
if (groupName.length() == 0)
// Try again
continue;
break;
}
newList.setName(groupName);
} else if (ae.getActionCommand().equals("save_clusters")) {
if (matrix.cluster() == null)
return;
// Get a limit for how many probes per cluster
String howManyProbes = JOptionPane.showInputDialog(this, "Minimum number of probes per cluster", "10");
if (howManyProbes == null)
return;
int minProbes;
try {
minProbes = Integer.parseInt(howManyProbes);
} catch (NumberFormatException nfe) {
JOptionPane.showMessageDialog(this, howManyProbes + " was not an integer", "Error", JOptionPane.ERROR_MESSAGE);
return;
}
ProbeList newList = matrix.createProbeListsFromClusters(minProbes, heatmapPanel.genomePanel().currentYStartIndex(), heatmapPanel.genomePanel().currentYEndIndex());
// This was called before clustering had completed.
if (newList == null)
return;
// Ask for a name for the list
String groupName = null;
while (true) {
groupName = (String) JOptionPane.showInputDialog(this, "Enter list name", "Found " + newList.getAllProbes().length + " probes", JOptionPane.QUESTION_MESSAGE, null, null, newList.name());
if (groupName == null) {
// Since the list will automatically have been added to
// the ProbeList tree we actively need to delete it if
// they choose to cancel at this point.
newList.delete();
// They cancelled
return;
}
if (groupName.length() == 0)
// Try again
continue;
break;
}
newList.setName(groupName);
} else if (ae.getActionCommand().equals("send_x")) {
DisplayPreferences.getInstance().setLocation(xChr, SequenceRead.packPosition(xStart, xEnd, Location.UNKNOWN));
} else if (ae.getActionCommand().equals("send_y")) {
DisplayPreferences.getInstance().setLocation(yChr, SequenceRead.packPosition(yStart, yEnd, Location.UNKNOWN));
} else if (ae.getActionCommand().equals("match")) {
Chromosome chr = DisplayPreferences.getInstance().getCurrentChromosome();
int start = SequenceRead.start(DisplayPreferences.getInstance().getCurrentLocation());
int end = SequenceRead.end(DisplayPreferences.getInstance().getCurrentLocation());
heatmapPanel.genomePanel().setCurrentPosition(chr, start, end);
} else if (ae.getActionCommand().equals("make_report")) {
new InteractionReportOptions(SeqMonkApplication.getInstance(), new AnnotatedInteractionReport(SeqMonkApplication.getInstance().dataCollection(), matrix));
}
}
Aggregations