use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class CisTransScatterPlotDialog method actionPerformed.
/* (non-Javadoc)
* @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
*/
public void actionPerformed(ActionEvent ae) {
if (ae.getActionCommand().equals("close")) {
setVisible(false);
dispose();
} else if (ae.getActionCommand().equals("plot")) {
HiCDataStore store = (HiCDataStore) hiCStores.getSelectedItem();
getContentPane().remove(scatterPlotPanel);
scatterPlotPanel = new CisTransScatterPlotPanel(store, collection.probeSet().getActiveList(), commonScaleBox.isSelected(), dotSizeSlider.getValue());
getContentPane().add(scatterPlotPanel, BorderLayout.CENTER);
validate();
} else if (ae.getActionCommand().equals("filter_chromosome")) {
Chromosome c;
if (chromosomes.getSelectedItem() instanceof Chromosome) {
c = (Chromosome) chromosomes.getSelectedItem();
} else {
c = null;
}
if (scatterPlotPanel instanceof CisTransScatterPlotPanel) {
((CisTransScatterPlotPanel) scatterPlotPanel).setChromosome(c);
}
} else if (ae.getActionCommand().equals("save_probe_list")) {
if (scatterPlotPanel instanceof CisTransScatterPlotPanel) {
ProbeList list = ((CisTransScatterPlotPanel) scatterPlotPanel).getFilteredProbes(collection.probeSet());
if (list.getAllProbes().length == 0) {
JOptionPane.showMessageDialog(this, "No probes were selected", "No probes", JOptionPane.INFORMATION_MESSAGE);
return;
}
// Ask for a name for the list
String groupName = null;
while (true) {
groupName = (String) JOptionPane.showInputDialog(this, "Enter list name", "Found " + list.getAllProbes().length + " probes", JOptionPane.QUESTION_MESSAGE, null, null, list.name());
if (groupName == null) {
// Remove the list which will have been created by this stage
list.delete();
// They cancelled
return;
}
if (groupName.length() == 0)
// Try again
continue;
break;
}
list.setName(groupName);
}
} else if (ae.getActionCommand().equals("save_image")) {
ImageSaver.saveImage(scatterPlotPanel);
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class HeatmapMatrix method run.
public void run() {
// First calculate chromosome offsets so we can access the
// relevant probes more quickly when we come to adding other
// end data.
Hashtable<String, Integer> offsets = new Hashtable<String, Integer>();
offsets.put(probes[0].probe.chromosome().name(), 0);
for (int i = 1; i < probes.length; i++) {
if (probes[i].probe.chromosome() != probes[i - 1].probe.chromosome()) {
offsets.put(probes[i].probe.chromosome().name(), i);
}
}
// We also need an initial list of total counts for all of our probes
// so we can do the O/E calculations. We can incorporate into this
// the ability to correct for linkage.
int[] totalCisCounts = new int[probes.length];
int[] totalTransCounts = new int[probes.length];
getTotalCounts(totalCisCounts, totalTransCounts);
if (cancel) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressCancelled();
}
return;
}
// We'll make up an ever expanding list of interactions which we
// care about
Vector<InteractionProbePair> filteredInteractions = new Vector<InteractionProbePair>();
// We'll need to correct for the number of tests we perform, but we're also able
// to skip a load of tests based on the initial filters we supplied (non-statistical ones)
// so we're going to keep a record of how many valid tests we actually need to correct for.
//
// The worse case scenario is that we do every possible comparison (but only one way round).
// After some testing this proved to be a bad idea. We skipped so many tests where the
// interaction wasn't observed that we ended up hugely under-correcting our final p-values
// and making a load of false positive predictions. We can do this more correctly later,
// but for now we're either going to correct for every possible test, or for just every
// possible cis test if we're excluding trans hits.
long numberOfTestsPerformed = 0;
if (initialMaxDistance > 0) {
// We're just counting cis interactions
int currentChrCount = 1;
Chromosome currentChr = probes[0].probe.chromosome();
for (int p = 1; p < probes.length; p++) {
if (probes[p].probe.chromosome() == currentChr) {
++currentChrCount;
} else {
numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
currentChrCount = 1;
currentChr = probes[p].probe.chromosome();
}
}
numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
} else {
numberOfTestsPerformed = (probes.length * ((long) probes.length - 1)) / 2;
}
for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressCancelled();
}
return;
}
}
// System.err.println("Getting interactions for "+probes[p].probe);
// We temporarily store the interactions with this probe which means we
// can make a decision about which ones we're keeping as we go along which
// drastically reduces the amount we need to store.
// This is going to be the data structure which holds the information
// on the pairs of probes which have any interactions. The key is the
// index position of the pair (x+(y*no of probes), and the value is the
// number of times this interaction was seen
Hashtable<Long, Integer> interactionCounts = new Hashtable<Long, Integer>();
HiCHitCollection hiCHits = dataSet.getHiCReadsForProbe(probes[p].probe);
String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
// System.err.println("Found hits on "+chromosomeNames.length+" chromosomes");
CHROM: for (int c = 0; c < chromosomeNames.length; c++) {
// Skip all trans reads if there is a max distance set.
if (initialMaxDistance > 0) {
if (!probes[p].probe.chromosome().name().equals(chromosomeNames[c])) {
continue;
}
}
long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
SequenceRead.sort(hitReads);
// We're going to start when we hit the probes for this chromosome
if (!offsets.containsKey(chromosomeNames[c])) {
// System.err.println("No probes on chr "+chromosomeNames[c]+" skipping");
continue;
}
int lastIndex = offsets.get(chromosomeNames[c]);
READ: for (int o = 0; o < hitReads.length; o++) {
if (cancel) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressCancelled();
}
return;
}
// Check against the relevant reads
int startIndex = lastIndex;
for (int x = startIndex; x < probes.length; x++) {
// Check that we're on the right chromosome
if (!probes[x].probe.chromosome().name().equals(chromosomeNames[c])) {
// System.err.println("Stopping searching at position "+x+" since we changed to chr "+probes[x].probe.chromosome());
continue CHROM;
}
if (probes[x].probe.start() > SequenceRead.end(hitReads[o])) {
// to the next read.
continue READ;
}
if (SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
// We overlap with this probe
// System.err.println("Found hit to probe "+probes[x].probe);
// For probe list probes we could have the same probe several times
// so we can add the read to all of the probes which are the same
// from this point.
// We can skip over interactions where the matched index is less than our index
// since we'll duplicate this later anyway.
long interactionKey = p + (x * (long) probes.length);
if (!interactionCounts.containsKey(interactionKey)) {
if (probes[p].index < probes[x].index) {
interactionCounts.put(interactionKey, 0);
}
// else {
// System.err.println("Skipping earlier probe hit");
// }
}
// Add one to our counts for this interaction
if (probes[p].index < probes[x].index) {
interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
}
// Since we found our first hit here we can start looking from here next time.
lastIndex = x;
// this one.
for (x = x + 1; x < probes.length; x++) {
if (probes[x].probe.chromosome() != probes[x - 1].probe.chromosome()) {
// We've reached the end for this chromosome
break;
}
if (probes[x].probe == probes[x - 1].probe || SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
if (probes[p].index >= probes[x].index)
continue;
interactionKey = p + (x * (long) probes.length);
if (!interactionCounts.containsKey(interactionKey)) {
if (interactionCounts.size() >= MAX_INTERACTIONS) {
continue READ;
}
interactionCounts.put(interactionKey, 0);
}
interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
} else {
break;
}
}
continue READ;
}
// End if overlaps
}
// End check each probe
}
// End each hit read
}
// End each hit chromosome
// We can now go through the interactions we saw and decide if we
// want to keep any of them.
HiCInteractionStrengthCalculator strengthCalc = new HiCInteractionStrengthCalculator(dataSet, correctLinkage);
Enumeration<Long> en = interactionCounts.keys();
while (en.hasMoreElements()) {
long index = en.nextElement();
int absoluteValue = interactionCounts.get(index);
if (absoluteValue < initialMinAbsolute)
continue;
ProbeWithIndex probe1 = probes[(int) (index % probes.length)];
ProbeWithIndex probe2 = probes[(int) (index / probes.length)];
// We calculate the obs/exp based on the total pair count
// and the relative counts at each end of the interaction
// Do the interaction strength calculation
strengthCalc.calculateInteraction(absoluteValue, totalCisCounts[probe1.index], totalTransCounts[probe1.index], totalCisCounts[probe2.index], totalTransCounts[probe2.index], probe1.probe, probe2.probe);
// We're not counting this here any more. We work out theoretical numbers at the top instead
// ++numberOfTestsPerformed;
float obsExp = (float) strengthCalc.obsExp();
float pValue = (float) strengthCalc.rawPValue();
// interaction objects we have to create.
if (obsExp < initialMinStrength)
continue;
// This isn't the final p-value check, but if the raw pvalue fails then the corrected value is never going to pass.
if (initialMaxSignificance < 1 && pValue > initialMaxSignificance)
continue;
InteractionProbePair interaction = new InteractionProbePair(probe1.probe, probe1.index, probe2.probe, probe2.index, obsExp, absoluteValue);
interaction.setSignificance(pValue);
// We check against the current list of filters
if (passesCurrentFilters(interaction)) {
// See if the strength of any of the interactions is bigger than our current max
if (obsExp > maxValue)
maxValue = obsExp;
if (filteredInteractions.size() >= MAX_INTERACTIONS) {
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressWarningReceived(new SeqMonkException("More than " + MAX_INTERACTIONS + " interactions passed the filters. Showing as many as I can"));
}
} else {
filteredInteractions.add(interaction);
}
}
}
}
// Put the interactions which worked into an Array
interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
// System.err.println("Found a set of "+interactions.length+" initially filtered interactions");
// Apply multiple testing correction
Arrays.sort(interactions, new Comparator<InteractionProbePair>() {
public int compare(InteractionProbePair o1, InteractionProbePair o2) {
return Float.compare(o1.signficance(), o2.signficance());
}
});
for (int i = 0; i < interactions.length; i++) {
interactions[i].setSignificance(interactions[i].signficance() * ((float) (numberOfTestsPerformed) / (i + 1)));
// We can't allow corrected p-values to end up in a different order to the uncorrected ones
if (i > 0 && interactions[i].signficance() < interactions[i - 1].signficance()) {
interactions[i].setSignificance(interactions[i - 1].signficance());
}
}
// Now re-filter the interactions to get those which finally pass the p-value filter
filteredInteractions.clear();
for (int i = 0; i < interactions.length; i++) {
if (initialMaxSignificance >= 1 || interactions[i].signficance() < initialMaxSignificance) {
filteredInteractions.add(interactions[i]);
} else {
break;
}
}
// Put the interactions which worked into an Array
interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
// System.err.println("Found a set of "+interactions.length+" interactions after multiple testing correction");
// We've processed all of the reads and the matrix is complete. We can therefore
// draw the plot
Enumeration<ProgressListener> en2 = listeners.elements();
while (en2.hasMoreElements()) {
en2.nextElement().progressComplete("heatmap", null);
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class DataStorePropertiesDialog method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Chromosome[] chrs = dataStore.collection().genome().getAllChromosomes();
double averageLength = 0;
int totalCount = 0;
int shortestLength = 0;
int longestLength = 0;
for (int c = 0; c < chrs.length; c++) {
ReadsWithCounts reads = dataStore.getReadsForChromosome(chrs[c]);
for (int i = 0; i < reads.reads.length; i++) {
totalCount += reads.counts[i];
if (i == 0) {
shortestLength = SequenceRead.length(reads.reads[i]);
longestLength = SequenceRead.length(reads.reads[i]);
}
if (SequenceRead.length(reads.reads[i]) < shortestLength)
shortestLength = SequenceRead.length(reads.reads[i]);
if (SequenceRead.length(reads.reads[i]) > longestLength)
longestLength = SequenceRead.length(reads.reads[i]);
averageLength += SequenceRead.length(reads.reads[i]) * reads.counts[i];
}
}
averageLength /= totalCount;
this.averageLength.setText("" + (int) averageLength + "bp (" + shortestLength + "-" + longestLength + ")");
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class SeqMonkDataWriter method printPairedDataSet.
private boolean printPairedDataSet(PairedDataSet set, PrintStream p, int index, int indexTotal) throws IOException {
p.println(set.getTotalReadCount() * 2 + "\t" + set.name());
// Go through one chromosome at a time.
Chromosome[] chrs = data.genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
HiCHitCollection hiCHits = set.getHiCReadsForChromosome(chrs[c]);
// Work out how many of these reads we're actually going to output
int validReadCount = 0;
for (int c2 = 0; c2 < chrs.length; c2++) {
validReadCount += hiCHits.getSourcePositionsForChromosome(chrs[c2].name()).length;
}
p.println(chrs[c].name() + "\t" + validReadCount);
for (int c2 = 0; c2 < chrs.length; c2++) {
long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chrs[c2].name());
long[] hitReads = hiCHits.getHitPositionsForChromosome(chrs[c2].name());
for (int j = 0; j < sourceReads.length; j++) {
if (cancel) {
cancelled(p);
return false;
}
// TODO: Fix the progress bar
if ((j % (1 + (validReadCount / 10))) == 0) {
Enumeration<ProgressListener> e2 = listeners.elements();
while (e2.hasMoreElements()) {
e2.nextElement().progressUpdated("Writing data for " + set.name(), index * chrs.length + c, indexTotal * chrs.length);
}
}
p.println(sourceReads[j] + "\t" + chrs[c2].name() + "\t" + hitReads[j]);
}
}
}
// Print a blank line after the last chromosome
p.println("");
return true;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.
the class ChromosomeNameTranslator method getChromosomeForName.
/**
* Tries to find a match for a name, but provides an option to not remember failures.
* This can be used when the genome is still in a state of flux (with new chromosomes
* being added) so we know that something which fails now may not necessarily fail
* later. This shouldn't be used once the genome is completely loaded since performance
* will be adversely affected by not caching failures and rechecking them every time.
*
* @param name The name to match
* @param rememberFailures Whether to cache names which couldn't be matched to a chromosome
* @return The chromosome we matched.
* @throws SeqMonkException If we couldn't match the name to any chromosome
*/
public ChromosomeWithOffset getChromosomeForName(String name, boolean rememberFailures) {
// See if we've already worked this out for this name
if (nameMap.containsKey(name)) {
ChromosomeWithOffset chr = nameMap.get(name);
if (chr != null) {
return chr;
}
throw new IllegalArgumentException("Couldn't extract a valid name from '" + name + "'");
}
// Ensembl chromsome files look like:
// Homo_sapiens.GRCh37.55.dna.chromosome.1.fa
String[] dotSections = name.split("\\.");
if (dotSections.length == 7) {
// Try it
Chromosome chr = genome.getExactChromsomeNameMatch(dotSections[5]);
if (chr != null) {
nameMap.put(name, new ChromosomeWithOffset(chr, 0));
return nameMap.get(name);
}
}
// Try filenames next
String filename = name.trim();
if (filename.toLowerCase().endsWith(".txt")) {
filename = filename.substring(0, filename.length() - 4);
}
if (filename.toLowerCase().endsWith(".fa")) {
filename = filename.substring(0, filename.length() - 3);
}
if (filename.toLowerCase().startsWith("chromosome")) {
filename = filename.substring(10);
}
if (filename.toLowerCase().startsWith("chr")) {
filename = filename.substring(3);
}
filename = filename.trim();
Chromosome chr = genome.getExactChromsomeNameMatch(filename);
if (chr != null) {
nameMap.put(name, new ChromosomeWithOffset(chr, 0));
return nameMap.get(name);
}
chr = genome.getExactChromsomeNameMatch(filename.toUpperCase());
if (chr != null) {
nameMap.put(name, new ChromosomeWithOffset(chr, 0));
return nameMap.get(name);
}
chr = genome.getExactChromsomeNameMatch(filename.toLowerCase());
if (chr != null) {
nameMap.put(name, new ChromosomeWithOffset(chr, 0));
return nameMap.get(name);
}
// We could also try the first text before a space in the string
String[] spaceSections = name.trim().split("\\s+");
chr = genome.getExactChromsomeNameMatch(spaceSections[0]);
if (chr != null) {
nameMap.put(name, new ChromosomeWithOffset(chr, 0));
return nameMap.get(name);
}
// If we get this far then all is lost and we have to give up.
if (rememberFailures) {
nameMap.put(name, null);
}
throw new IllegalArgumentException("Couldn't extract a valid name from '" + name + "'");
}
Aggregations