use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class DataGroup method getHiCReadsForChromosome.
public HiCHitCollection getHiCReadsForChromosome(Chromosome c) {
HiCHitCollection collection = new HiCHitCollection(c.name());
for (int i = 0; i < dataSets.length; i++) {
if (dataSets[i] instanceof HiCDataStore) {
HiCHitCollection thisCollection = ((HiCDataStore) dataSets[i]).getHiCReadsForChromosome(c);
collection.addCollection(thisCollection);
}
}
// TODO: Do an incremental add to this collection so that we don't have
// to re-sort it each time. This is nastily inefficient
collection.sortCollection();
return collection;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class HeatmapMatrix method run.
public void run() {
// First calculate chromosome offsets so we can access the
// relevant probes more quickly when we come to adding other
// end data.
Hashtable<String, Integer> offsets = new Hashtable<String, Integer>();
offsets.put(probes[0].probe.chromosome().name(), 0);
for (int i = 1; i < probes.length; i++) {
if (probes[i].probe.chromosome() != probes[i - 1].probe.chromosome()) {
offsets.put(probes[i].probe.chromosome().name(), i);
}
}
// We also need an initial list of total counts for all of our probes
// so we can do the O/E calculations. We can incorporate into this
// the ability to correct for linkage.
int[] totalCisCounts = new int[probes.length];
int[] totalTransCounts = new int[probes.length];
getTotalCounts(totalCisCounts, totalTransCounts);
if (cancel) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressCancelled();
}
return;
}
// We'll make up an ever expanding list of interactions which we
// care about
Vector<InteractionProbePair> filteredInteractions = new Vector<InteractionProbePair>();
// We'll need to correct for the number of tests we perform, but we're also able
// to skip a load of tests based on the initial filters we supplied (non-statistical ones)
// so we're going to keep a record of how many valid tests we actually need to correct for.
//
// The worse case scenario is that we do every possible comparison (but only one way round).
// After some testing this proved to be a bad idea. We skipped so many tests where the
// interaction wasn't observed that we ended up hugely under-correcting our final p-values
// and making a load of false positive predictions. We can do this more correctly later,
// but for now we're either going to correct for every possible test, or for just every
// possible cis test if we're excluding trans hits.
long numberOfTestsPerformed = 0;
if (initialMaxDistance > 0) {
// We're just counting cis interactions
int currentChrCount = 1;
Chromosome currentChr = probes[0].probe.chromosome();
for (int p = 1; p < probes.length; p++) {
if (probes[p].probe.chromosome() == currentChr) {
++currentChrCount;
} else {
numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
currentChrCount = 1;
currentChr = probes[p].probe.chromosome();
}
}
numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
} else {
numberOfTestsPerformed = (probes.length * ((long) probes.length - 1)) / 2;
}
for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressCancelled();
}
return;
}
}
// System.err.println("Getting interactions for "+probes[p].probe);
// We temporarily store the interactions with this probe which means we
// can make a decision about which ones we're keeping as we go along which
// drastically reduces the amount we need to store.
// This is going to be the data structure which holds the information
// on the pairs of probes which have any interactions. The key is the
// index position of the pair (x+(y*no of probes), and the value is the
// number of times this interaction was seen
Hashtable<Long, Integer> interactionCounts = new Hashtable<Long, Integer>();
HiCHitCollection hiCHits = dataSet.getHiCReadsForProbe(probes[p].probe);
String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
// System.err.println("Found hits on "+chromosomeNames.length+" chromosomes");
CHROM: for (int c = 0; c < chromosomeNames.length; c++) {
// Skip all trans reads if there is a max distance set.
if (initialMaxDistance > 0) {
if (!probes[p].probe.chromosome().name().equals(chromosomeNames[c])) {
continue;
}
}
long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
SequenceRead.sort(hitReads);
// We're going to start when we hit the probes for this chromosome
if (!offsets.containsKey(chromosomeNames[c])) {
// System.err.println("No probes on chr "+chromosomeNames[c]+" skipping");
continue;
}
int lastIndex = offsets.get(chromosomeNames[c]);
READ: for (int o = 0; o < hitReads.length; o++) {
if (cancel) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressCancelled();
}
return;
}
// Check against the relevant reads
int startIndex = lastIndex;
for (int x = startIndex; x < probes.length; x++) {
// Check that we're on the right chromosome
if (!probes[x].probe.chromosome().name().equals(chromosomeNames[c])) {
// System.err.println("Stopping searching at position "+x+" since we changed to chr "+probes[x].probe.chromosome());
continue CHROM;
}
if (probes[x].probe.start() > SequenceRead.end(hitReads[o])) {
// to the next read.
continue READ;
}
if (SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
// We overlap with this probe
// System.err.println("Found hit to probe "+probes[x].probe);
// For probe list probes we could have the same probe several times
// so we can add the read to all of the probes which are the same
// from this point.
// We can skip over interactions where the matched index is less than our index
// since we'll duplicate this later anyway.
long interactionKey = p + (x * (long) probes.length);
if (!interactionCounts.containsKey(interactionKey)) {
if (probes[p].index < probes[x].index) {
interactionCounts.put(interactionKey, 0);
}
// else {
// System.err.println("Skipping earlier probe hit");
// }
}
// Add one to our counts for this interaction
if (probes[p].index < probes[x].index) {
interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
}
// Since we found our first hit here we can start looking from here next time.
lastIndex = x;
// this one.
for (x = x + 1; x < probes.length; x++) {
if (probes[x].probe.chromosome() != probes[x - 1].probe.chromosome()) {
// We've reached the end for this chromosome
break;
}
if (probes[x].probe == probes[x - 1].probe || SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
if (probes[p].index >= probes[x].index)
continue;
interactionKey = p + (x * (long) probes.length);
if (!interactionCounts.containsKey(interactionKey)) {
if (interactionCounts.size() >= MAX_INTERACTIONS) {
continue READ;
}
interactionCounts.put(interactionKey, 0);
}
interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
} else {
break;
}
}
continue READ;
}
// End if overlaps
}
// End check each probe
}
// End each hit read
}
// End each hit chromosome
// We can now go through the interactions we saw and decide if we
// want to keep any of them.
HiCInteractionStrengthCalculator strengthCalc = new HiCInteractionStrengthCalculator(dataSet, correctLinkage);
Enumeration<Long> en = interactionCounts.keys();
while (en.hasMoreElements()) {
long index = en.nextElement();
int absoluteValue = interactionCounts.get(index);
if (absoluteValue < initialMinAbsolute)
continue;
ProbeWithIndex probe1 = probes[(int) (index % probes.length)];
ProbeWithIndex probe2 = probes[(int) (index / probes.length)];
// We calculate the obs/exp based on the total pair count
// and the relative counts at each end of the interaction
// Do the interaction strength calculation
strengthCalc.calculateInteraction(absoluteValue, totalCisCounts[probe1.index], totalTransCounts[probe1.index], totalCisCounts[probe2.index], totalTransCounts[probe2.index], probe1.probe, probe2.probe);
// We're not counting this here any more. We work out theoretical numbers at the top instead
// ++numberOfTestsPerformed;
float obsExp = (float) strengthCalc.obsExp();
float pValue = (float) strengthCalc.rawPValue();
// interaction objects we have to create.
if (obsExp < initialMinStrength)
continue;
// This isn't the final p-value check, but if the raw pvalue fails then the corrected value is never going to pass.
if (initialMaxSignificance < 1 && pValue > initialMaxSignificance)
continue;
InteractionProbePair interaction = new InteractionProbePair(probe1.probe, probe1.index, probe2.probe, probe2.index, obsExp, absoluteValue);
interaction.setSignificance(pValue);
// We check against the current list of filters
if (passesCurrentFilters(interaction)) {
// See if the strength of any of the interactions is bigger than our current max
if (obsExp > maxValue)
maxValue = obsExp;
if (filteredInteractions.size() >= MAX_INTERACTIONS) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressWarningReceived(new SeqMonkException("More than " + MAX_INTERACTIONS + " interactions passed the filters. Showing as many as I can"));
}
} else {
filteredInteractions.add(interaction);
}
}
}
}
// Put the interactions which worked into an Array
interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
// System.err.println("Found a set of "+interactions.length+" initially filtered interactions");
// Apply multiple testing correction
Arrays.sort(interactions, new Comparator<InteractionProbePair>() {
public int compare(InteractionProbePair o1, InteractionProbePair o2) {
return Float.compare(o1.signficance(), o2.signficance());
}
});
for (int i = 0; i < interactions.length; i++) {
interactions[i].setSignificance(interactions[i].signficance() * ((float) (numberOfTestsPerformed) / (i + 1)));
// We can't allow corrected p-values to end up in a different order to the uncorrected ones
if (i > 0 && interactions[i].signficance() < interactions[i - 1].signficance()) {
interactions[i].setSignificance(interactions[i - 1].signficance());
}
}
// Now re-filter the interactions to get those which finally pass the p-value filter
filteredInteractions.clear();
for (int i = 0; i < interactions.length; i++) {
if (initialMaxSignificance >= 1 || interactions[i].signficance() < initialMaxSignificance) {
filteredInteractions.add(interactions[i]);
} else {
break;
}
}
// Put the interactions which worked into an Array
interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
// System.err.println("Found a set of "+interactions.length+" interactions after multiple testing correction");
// We've processed all of the reads and the matrix is complete. We can therefore
// draw the plot
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressComplete("heatmap", null);
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class CisTransScatterPlotPanel method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// We need to find the cis trans counts, and get a non-overlapping
// set of probes
Probe[] probes = probeList.getAllProbes();
// If there aren't any probes there's no point going any further
if (probes.length == 0)
return;
minValueX = 0;
minValueY = 0;
maxValueX = 0;
maxValueY = 0;
Arrays.sort(probes);
xData = new int[probes.length];
yData = new int[probes.length];
for (int p = 0; p < probes.length; p++) {
HiCHitCollection hiCHits = store.getHiCReadsForProbe(probes[p]);
String[] chromosomes = hiCHits.getChromosomeNamesWithHits();
for (int c = 0; c < chromosomes.length; c++) {
if (hiCHits.getSourceChromosomeName().equals(chromosomes[c])) {
xData[p] += hiCHits.getSourcePositionsForChromosome(chromosomes[c]).length;
} else {
yData[p] += hiCHits.getSourcePositionsForChromosome(chromosomes[c]).length;
}
}
if (xData[p] < minValueX)
minValueX = xData[p];
if (yData[p] < minValueY)
minValueY = yData[p];
if (xData[p] > maxValueX)
maxValueX = xData[p];
if (yData[p] > maxValueY)
maxValueY = yData[p];
}
if (commonScale) {
minValueX = Math.min(minValueX, minValueY);
minValueY = minValueX;
maxValueX = Math.max(maxValueX, maxValueY);
maxValueY = maxValueX;
}
readyToDraw = true;
repaint();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class ReplicateSet method getHiCReadsForProbe.
public HiCHitCollection getHiCReadsForProbe(Probe p) {
HiCHitCollection collection = new HiCHitCollection(p.chromosome().name());
for (int i = 0; i < dataStores.length; i++) {
if (dataStores[i] instanceof HiCDataStore) {
HiCHitCollection thisCollection = ((HiCDataStore) dataStores[i]).getHiCReadsForProbe(p);
collection.addCollection(thisCollection);
}
}
collection.sortCollection();
return collection;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class SeqMonkDataWriter method printPairedDataSet.
private boolean printPairedDataSet(PairedDataSet set, PrintStream p, int index, int indexTotal) throws IOException {
p.println(set.getTotalReadCount() * 2 + "\t" + set.name());
// Go through one chromosome at a time.
Chromosome[] chrs = data.genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
HiCHitCollection hiCHits = set.getHiCReadsForChromosome(chrs[c]);
// Work out how many of these reads we're actually going to output
int validReadCount = 0;
for (int c2 = 0; c2 < chrs.length; c2++) {
validReadCount += hiCHits.getSourcePositionsForChromosome(chrs[c2].name()).length;
}
p.println(chrs[c].name() + "\t" + validReadCount);
for (int c2 = 0; c2 < chrs.length; c2++) {
long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chrs[c2].name());
long[] hitReads = hiCHits.getHitPositionsForChromosome(chrs[c2].name());
for (int j = 0; j < sourceReads.length; j++) {
if (cancel) {
cancelled(p);
return false;
}
// TODO: Fix the progress bar
if ((j % (1 + (validReadCount / 10))) == 0) {
Enumeration<ProgressListener> e2 = listeners.elements();
while (e2.hasMoreElements()) {
e2.nextElement().progressUpdated("Writing data for " + set.name(), index * chrs.length + c, indexTotal * chrs.length);
}
}
p.println(sourceReads[j] + "\t" + chrs[c2].name() + "\t" + hitReads[j]);
}
}
}
// Print a blank line after the last chromosome
p.println("");
return true;
}
Aggregations