use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class FourCEnrichmentQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// Start off by finding the right HiC data for each 4C dataset
HiCDataStore[] parentHiC = new HiCDataStore[data.length];
Probe[] parentProbes = new Probe[data.length];
ProbeList[] significantLists = new ProbeList[data.length];
for (int d = 0; d < data.length; d++) {
String filename = data[d].fileName();
filename = filename.replaceAll("HiC other end of ", "");
filename = filename.replaceAll(" for region.*", "");
System.err.println("Looking for HiC match to " + filename);
for (int h = 0; h < hiCData.length; h++) {
if (((DataStore) hiCData[h]).name().equals(filename)) {
parentHiC[d] = hiCData[h];
}
}
if (parentHiC[d] == null) {
progressWarningReceived(new SeqMonkException("Failed to find HiC dataset '" + filename + "' for 4C dataset " + data[d].name()));
continue;
}
significantLists[d] = new ProbeList(application.dataCollection().probeSet(), "4C p<0.05 " + data[d].name(), "4C pipeline significance < 0.05", "P-value");
// Also make up a probe to represent the region from which
// the dataset was made
filename = data[d].fileName();
filename = filename.replaceAll("^.*for region ", "");
String[] locationSections = filename.split("[-:]");
if (locationSections.length != 3) {
progressWarningReceived(new SeqMonkException("Couldn't extract location from " + filename));
continue;
}
try {
parentProbes[d] = new Probe(application.dataCollection().genome().getChromosome(locationSections[0]).chromosome(), Integer.parseInt(locationSections[1]), Integer.parseInt(locationSections[2]));
} catch (Exception e) {
progressExceptionReceived(e);
return;
}
}
// Make strength calculators for each HiC set
HiCInteractionStrengthCalculator[] strengthCalculators = new HiCInteractionStrengthCalculator[parentHiC.length];
for (int i = 0; i < parentHiC.length; i++) {
strengthCalculators[i] = new HiCInteractionStrengthCalculator(parentHiC[i], true);
}
// Get the cis/trans counts for the parent probes
int[] parentProbeCisCounts = new int[parentHiC.length];
int[] parentProbeTransCounts = new int[parentHiC.length];
for (int p = 0; p < parentHiC.length; p++) {
if (parentHiC[p] == null)
continue;
HiCHitCollection hits = parentHiC[p].getHiCReadsForProbe(parentProbes[p]);
String[] chrNames = hits.getChromosomeNamesWithHits();
for (int c = 0; c < chrNames.length; c++) {
if (chrNames[c].equals(hits.getSourceChromosomeName())) {
parentProbeCisCounts[p] = hits.getSourcePositionsForChromosome(chrNames[c]).length;
} else {
parentProbeTransCounts[p] += hits.getSourcePositionsForChromosome(chrNames[c]).length;
}
}
}
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
if (parentHiC[d] == null)
continue;
int thisProbeCisCount = 0;
int thisProbeTransCount = 0;
HiCHitCollection hiCHits = parentHiC[d].getHiCReadsForProbe(probes[p]);
String[] chrNames = hiCHits.getChromosomeNamesWithHits();
for (int c = 0; c < chrNames.length; c++) {
if (chrNames[c].equals(hiCHits.getSourceChromosomeName())) {
thisProbeCisCount = hiCHits.getSourcePositionsForChromosome(chrNames[c]).length;
} else {
thisProbeTransCount += hiCHits.getSourcePositionsForChromosome(chrNames[c]).length;
}
}
strengthCalculators[d].calculateInteraction(data[d].getReadsForProbe(probes[p]).length, thisProbeCisCount, thisProbeTransCount, parentProbeCisCounts[d], parentProbeTransCounts[d], probes[p], parentProbes[d]);
float obsExp = (float) strengthCalculators[d].obsExp();
data[d].setValueForProbe(probes[p], obsExp);
float pValue = (float) strengthCalculators[d].rawPValue() * probes.length;
if (pValue < 0.05) {
significantLists[d].addProbe(probes[p], pValue);
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class HiCCisTransQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
int cisCount = 0;
int transCount = 0;
HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
for (int c = 0; c < chromosomeNames.length; c++) {
long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
for (int r = 0; r < sourceReads.length; r++) {
// Check if we can ignore this one
if (removeDuplicates) {
if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
continue;
}
if (!chromosomeNames[c].equals(probes[p].chromosome().name())) {
++transCount;
} else {
if (includeFarCis) {
int distance = SequenceRead.fragmentLength(sourceReads[r], hitReads[r]);
if (distance > farCisDistance) {
++transCount;
} else {
// System.err.println("Distance was "+distance);
++cisCount;
}
} else {
++cisCount;
}
}
}
}
float percentage = ((transCount * 100f) / (cisCount + transCount));
if (cisCount + transCount == 0) {
percentage = 0;
}
// TODO: This is icky since the inheritance between HiCDataStore and DataStore
// isn't properly sorted out.
((DataStore) data[d]).setValueForProbe(probes[p], percentage);
}
}
if (correctPerChromosome) {
Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
float[] thisChrValues = new float[thisChrProbes.length];
for (int d = 0; d < data.length; d++) {
DataStore ds = (DataStore) data[d];
for (int p = 0; p < thisChrProbes.length; p++) {
try {
thisChrValues[p] = ds.getValueForProbe(thisChrProbes[p]);
} catch (SeqMonkException e) {
}
}
float median = SimpleStats.median(thisChrValues);
for (int p = 0; p < thisChrProbes.length; p++) {
try {
ds.setValueForProbe(thisChrProbes[p], ds.getValueForProbe(thisChrProbes[p]) - median);
} catch (SeqMonkException e) {
}
}
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class HiCPrevNextQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
Arrays.sort(probes);
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
if (probes[p].start() - distance < 0 || probes[p].end() + distance > probes[p].chromosome().length()) {
for (int d = 0; d < data.length; d++) {
((DataStore) data[d]).setValueForProbe(probes[p], 0);
}
continue;
}
// Make up a pseudo probe for the previous and next regions
Probe previousProbe = new Probe(probes[p].chromosome(), probes[p].start() - distance, probes[p].start());
Probe nextProbe = new Probe(probes[p].chromosome(), probes[p].end(), probes[p].end() + distance);
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
// Get the counts for the previous and next probes
int previousTotalCount = data[d].getHiCReadCountForProbe(previousProbe);
HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
int nextTotalCount = data[d].getHiCReadCountForProbe(nextProbe);
// any reads then give up on this one.
if (previousTotalCount == 0 || nextTotalCount == 0) {
((DataStore) data[d]).setValueForProbe(probes[p], 0);
continue;
}
int previousCount = 0;
int nextCount = 0;
long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
SequenceRead.sort(hitPositions);
for (int r = 0; r < hitPositions.length; r++) {
// Check if we can ignore this one
if (removeDuplicates) {
if (r > 0 && hitPositions[r] == hitPositions[r - 1])
continue;
}
// Check if the other end maps to either the prev or next probe
if (SequenceRead.overlaps(hitPositions[r], previousProbe.packedPosition())) {
++previousCount;
}
if (SequenceRead.overlaps(hitPositions[r], nextProbe.packedPosition())) {
++nextCount;
}
}
// If both of the actual counts are zero then don't try to calculate a real value
if (previousCount == 0 && nextCount == 0) {
((DataStore) data[d]).setValueForProbe(probes[p], 0);
continue;
}
// Basically what happens here is that we calculate a normal X^2 value and then
// turn it into a negative depending on the direction of the deviation from the
// expected proportion.
// We need to calculate expected values for the two directions based on the proportion
// of reads falling into those two regions in total.
// Correct the counts based on the proportions of the two totals
double expectedProportion = (previousTotalCount / (double) (nextTotalCount + previousTotalCount));
int totalObservedCount = previousCount + nextCount;
double expectedPreviousCount = totalObservedCount * expectedProportion;
double expectedNextCount = totalObservedCount - expectedPreviousCount;
// Now we can calculate the X^2 values to compare the expected and observed values
double chisquare = (Math.pow(previousCount - expectedPreviousCount, 2) / expectedPreviousCount) + (Math.pow(nextCount - expectedNextCount, 2) / expectedNextCount);
// We now negate this count if the proportions bias towards the next count
double observedProportion = (previousCount / (double) totalObservedCount);
if (observedProportion > expectedProportion) {
chisquare = 0 - chisquare;
}
// System.err.println("Raw counts "+previousCount+","+nextCount+" Totals "+previousTotalCount+","+nextTotalCount+" Expected "+expectedPreviousCount+","+expectedNextCount+" Chi "+chisquare);
// TODO: This is icky since the inheritance between HiCDataStore and DataStore
// isn't properly sorted out.
((DataStore) data[d]).setValueForProbe(probes[p], (float) chisquare);
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class HiCLengthHistogramPlot method getData.
/**
* Gets the data.
*
* @param pl the pl
* @return the data
*/
private float[] getData() {
Vector<Integer> data = new Vector<Integer>();
// If there is a probeset then we get pairs for the current probe list
if (probes != null) {
for (int p = 0; p < probes.length; p++) {
HiCHitCollection hiCHits = store.getHiCReadsForProbe(probes[p]);
long[] sourcePositions = hiCHits.getSourcePositionsForChromosome(hiCHits.getSourceChromosomeName());
long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
for (int r = 0; r < sourcePositions.length; r++) {
// Don't enter every pair twice
if (SequenceRead.compare(sourcePositions[r], hitPositions[r]) < 0)
continue;
data.add(SequenceRead.fragmentLength(sourcePositions[r], hitPositions[r]));
}
}
} else {
Chromosome[] chrs = SeqMonkApplication.getInstance().dataCollection().genome().getAllChromosomes();
for (int c1 = 0; c1 < chrs.length; c1++) {
HiCHitCollection hiCHits = store.getHiCReadsForChromosome(chrs[c1]);
long[] sourcePositions = hiCHits.getSourcePositionsForChromosome(hiCHits.getSourceChromosomeName());
long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
for (int r = 0; r < sourcePositions.length; r++) {
// Don't enter every pair twice
if (SequenceRead.compare(sourcePositions[r], hitPositions[r]) < 0)
continue;
data.add(SequenceRead.fragmentLength(sourcePositions[r], hitPositions[r]));
}
}
}
// Convert to float [] for the return array
float[] returnData = new float[data.size()];
int index = 0;
Enumeration<Integer> en = data.elements();
while (en.hasMoreElements()) {
returnData[index] = en.nextElement().floatValue();
index++;
}
data.clear();
return returnData;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection in project SeqMonk by s-andrews.
the class HiCOtherEndExtractor method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// We need to get the baits and other options from the preferences
Probe[] baits = prefs.getBaits();
boolean mergeIntoOne = prefs.mergeIntoOne();
boolean excludeSelf = prefs.excludeSelf();
DataSet[] newData;
if (mergeIntoOne) {
newData = new DataSet[visibleHiCDataSets.length];
} else {
newData = new DataSet[visibleHiCDataSets.length * baits.length];
}
try {
for (int d = 0; d < visibleHiCDataSets.length; d++) {
if (baits.length == 1) {
System.err.println("Only one bait");
newData[d] = new DataSet(prefs.prefix.getText() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for region " + baits[0].chromosome().name() + ":" + baits[0].start() + "-" + baits[0].end(), DataSet.DUPLICATES_REMOVE_NO);
} else if (mergeIntoOne) {
System.err.println("Multiple baits, but merging");
newData[d] = new DataSet(prefs.prefix.getText() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for " + baits.length + " regions", DataSet.DUPLICATES_REMOVE_NO);
}
for (int b = 0; b < baits.length; b++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Getting other ends from " + visibleHiCDataSets[d].name() + " and " + baits[b].toString(), (d * baits.length) + b, visibleHiCDataSets.length * baits.length);
if (!(baits.length == 1 || mergeIntoOne)) {
newData[(d * baits.length) + b] = new DataSet(baits[b].toString() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for region " + baits[b].chromosome().name() + ":" + baits[b].start() + "-" + baits[b].end(), DataSet.DUPLICATES_REMOVE_NO);
}
HiCHitCollection hiCCollection = visibleHiCDataSets[d].getHiCReadsForProbe(baits[b]);
String[] chromosomes = hiCCollection.getChromosomeNamesWithHits();
for (int c = 0; c < chromosomes.length; c++) {
Chromosome chromosome = collection.genome().getChromosome(chromosomes[c]).chromosome();
long[] reads = hiCCollection.getHitPositionsForChromosome(chromosomes[c]);
for (int r = 0; r < reads.length; r++) {
if (cancel) {
progressCancelled();
return;
}
if (excludeSelf && baits[b].chromosome().name().equals(chromosomes[c]) && SequenceRead.overlaps(reads[r], baits[b].packedPosition())) {
continue;
}
if (mergeIntoOne) {
newData[d].addData(chromosome, reads[r]);
} else {
newData[(d * baits.length) + b].addData(chromosome, reads[r]);
}
}
}
if (!mergeIntoOne) {
// Cache the data in the new dataset
progressUpdated("Caching data", (d * baits.length) + b, visibleHiCDataSets.length * baits.length);
newData[(d * baits.length) + b].finalise();
}
}
if (mergeIntoOne) {
// Cache the data in the new dataset
progressUpdated("Caching data", d, visibleHiCDataSets.length);
newData[d].finalise();
}
}
processingFinished(newData);
} catch (Exception ex) {
progressExceptionReceived(ex);
}
}
Aggregations