use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class EnrichmentNormalisationQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
if (!isReady()) {
progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
}
Probe[] allProbes = application.dataCollection().probeSet().getAllProbes();
Probe[] calculateProbes = ((ProbeList) calculateFromProbeList.getSelectedItem()).getAllProbes();
float[] lowerPercentileValues = new float[data.length];
float[] upperPercentileValues = new float[data.length];
// Work out the value at the appropriate percentile
for (int d = 0; d < data.length; d++) {
progressUpdated("Calculating correction for " + data[d].name(), d, data.length);
float[] theseValues = new float[calculateProbes.length];
for (int p = 0; p < calculateProbes.length; p++) {
try {
theseValues[p] = data[d].getValueForProbe(calculateProbes[p]);
} catch (SeqMonkException e) {
progressExceptionReceived(e);
}
}
Arrays.sort(theseValues);
lowerPercentileValues[d] = getPercentileValue(theseValues, lowerPercentile);
upperPercentileValues[d] = getPercentileValue(theseValues, upperPercentile);
System.err.println("Lower percentile for " + data[d].name() + " is " + lowerPercentileValues[d] + " from " + theseValues.length + " values");
}
float lowerMedian = SimpleStats.median(Arrays.copyOf(lowerPercentileValues, lowerPercentileValues.length));
float upperMedian = SimpleStats.median(Arrays.copyOf(upperPercentileValues, upperPercentileValues.length));
for (int d = 0; d < data.length; d++) {
System.err.println("Looking at " + data[d].name());
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(d, data.length);
// Now we work out the correction factor we're actually going to use
float lowerCorrectionFactor = lowerMedian - lowerPercentileValues[d];
float upperCorrectionFactor = ((upperPercentileValues[d] + lowerCorrectionFactor) - lowerMedian) / (upperMedian - lowerMedian);
System.err.println("Lower percentile = " + lowerPercentileValues[d]);
System.err.println("Upper percentile = " + upperPercentileValues[d]);
System.err.println("Lower median = " + lowerMedian);
System.err.println("Upper median = " + upperMedian);
System.err.println("Lower correction = " + lowerCorrectionFactor);
System.err.println("Upper correction = " + upperCorrectionFactor);
System.err.println("\n\n");
// Apply the correction to all probes
try {
for (int p = 0; p < allProbes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
float probeValue = data[d].getValueForProbe(allProbes[p]);
probeValue += lowerCorrectionFactor;
probeValue -= lowerMedian;
probeValue /= upperCorrectionFactor;
probeValue += lowerMedian;
data[d].setValueForProbe(allProbes[p], probeValue);
}
} catch (SeqMonkException e) {
progressExceptionReceived(e);
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class ExactOverlapQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
float[] corrections = new float[data.length];
if (correctTotal) {
float largest = 0;
if (correctPerMillion) {
largest = 1000000;
}
for (int d = 0; d < data.length; d++) {
if (correctOnlyInProbes) {
corrections[d] = getTotalCountInProbes(data[d], probes);
} else {
corrections[d] = data[d].getTotalReadCount();
}
if (d == 0 && !correctPerMillion) {
largest = corrections[d];
} else {
if (!correctPerMillion && corrections[d] > largest) {
largest = corrections[d];
}
}
}
// We correct everything by the largest count
for (int d = 0; d < corrections.length; d++) {
corrections[d] = largest / corrections[d];
}
}
// To make this more efficient we'll do this chromosome by chromosome
Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
progressUpdated("Quantiating probes on " + chrs[c].name(), c, chrs.length);
Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
Arrays.sort(thisChrProbes);
for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
// We'll fetch all reads for this chr and then do a count per position
ReadsWithCounts reads = data[d].getReadsForChromosome(chrs[c]);
quantitationType.resetLastRead();
int startIndex = 0;
for (int p = 0; p < thisChrProbes.length; p++) {
int rawCount = 0;
for (int r = startIndex; r < reads.reads.length; r++) {
if (SequenceRead.start(reads.reads[r]) < thisChrProbes[p].start()) {
startIndex = r;
}
if (SequenceRead.start(reads.reads[r]) > thisChrProbes[p].start())
break;
if (quantitationType.useRead(thisChrProbes[p], reads.reads[r])) {
if (SequenceRead.start(reads.reads[r]) == thisChrProbes[p].start() && SequenceRead.end(reads.reads[r]) == thisChrProbes[p].end()) {
rawCount += reads.counts[r];
}
}
}
// We have the counts now work out any correction.
float count = rawCount;
if (logTransform && count == 0) {
count = 0.9f;
}
if (correctTotal) {
count *= corrections[d];
}
if (logTransform) {
count = (float) Math.log(count) / log2;
}
data[d].setValueForProbe(thisChrProbes[p], count);
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class FourCEnrichmentQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
// Start off by finding the right HiC data for each 4C dataset
HiCDataStore[] parentHiC = new HiCDataStore[data.length];
Probe[] parentProbes = new Probe[data.length];
ProbeList[] significantLists = new ProbeList[data.length];
for (int d = 0; d < data.length; d++) {
String filename = data[d].fileName();
filename = filename.replaceAll("HiC other end of ", "");
filename = filename.replaceAll(" for region.*", "");
System.err.println("Looking for HiC match to " + filename);
for (int h = 0; h < hiCData.length; h++) {
if (((DataStore) hiCData[h]).name().equals(filename)) {
parentHiC[d] = hiCData[h];
}
}
if (parentHiC[d] == null) {
progressWarningReceived(new SeqMonkException("Failed to find HiC dataset '" + filename + "' for 4C dataset " + data[d].name()));
continue;
}
significantLists[d] = new ProbeList(application.dataCollection().probeSet(), "4C p<0.05 " + data[d].name(), "4C pipeline significance < 0.05", "P-value");
// Also make up a probe to represent the region from which
// the dataset was made
filename = data[d].fileName();
filename = filename.replaceAll("^.*for region ", "");
String[] locationSections = filename.split("[-:]");
if (locationSections.length != 3) {
progressWarningReceived(new SeqMonkException("Couldn't extract location from " + filename));
continue;
}
try {
parentProbes[d] = new Probe(application.dataCollection().genome().getChromosome(locationSections[0]).chromosome(), Integer.parseInt(locationSections[1]), Integer.parseInt(locationSections[2]));
} catch (Exception e) {
progressExceptionReceived(e);
return;
}
}
// Make strength calculators for each HiC set
HiCInteractionStrengthCalculator[] strengthCalculators = new HiCInteractionStrengthCalculator[parentHiC.length];
for (int i = 0; i < parentHiC.length; i++) {
strengthCalculators[i] = new HiCInteractionStrengthCalculator(parentHiC[i], true);
}
// Get the cis/trans counts for the parent probes
int[] parentProbeCisCounts = new int[parentHiC.length];
int[] parentProbeTransCounts = new int[parentHiC.length];
for (int p = 0; p < parentHiC.length; p++) {
if (parentHiC[p] == null)
continue;
HiCHitCollection hits = parentHiC[p].getHiCReadsForProbe(parentProbes[p]);
String[] chrNames = hits.getChromosomeNamesWithHits();
for (int c = 0; c < chrNames.length; c++) {
if (chrNames[c].equals(hits.getSourceChromosomeName())) {
parentProbeCisCounts[p] = hits.getSourcePositionsForChromosome(chrNames[c]).length;
} else {
parentProbeTransCounts[p] += hits.getSourcePositionsForChromosome(chrNames[c]).length;
}
}
}
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
if (parentHiC[d] == null)
continue;
int thisProbeCisCount = 0;
int thisProbeTransCount = 0;
HiCHitCollection hiCHits = parentHiC[d].getHiCReadsForProbe(probes[p]);
String[] chrNames = hiCHits.getChromosomeNamesWithHits();
for (int c = 0; c < chrNames.length; c++) {
if (chrNames[c].equals(hiCHits.getSourceChromosomeName())) {
thisProbeCisCount = hiCHits.getSourcePositionsForChromosome(chrNames[c]).length;
} else {
thisProbeTransCount += hiCHits.getSourcePositionsForChromosome(chrNames[c]).length;
}
}
strengthCalculators[d].calculateInteraction(data[d].getReadsForProbe(probes[p]).length, thisProbeCisCount, thisProbeTransCount, parentProbeCisCounts[d], parentProbeTransCounts[d], probes[p], parentProbes[d]);
float obsExp = (float) strengthCalculators[d].obsExp();
data[d].setValueForProbe(probes[p], obsExp);
float pValue = (float) strengthCalculators[d].rawPValue() * probes.length;
if (pValue < 0.05) {
significantLists[d].addProbe(probes[p], pValue);
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class SmoothingSubtractionQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
if (!isReady()) {
progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
}
Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
Vector<DataStore> quantitatedStores = new Vector<DataStore>();
DataSet[] sets = application.dataCollection().getAllDataSets();
for (int s = 0; s < sets.length; s++) {
if (sets[s].isQuantitated()) {
quantitatedStores.add(sets[s]);
}
}
DataGroup[] groups = application.dataCollection().getAllDataGroups();
for (int g = 0; g < groups.length; g++) {
if (groups[g].isQuantitated()) {
quantitatedStores.add(groups[g]);
}
}
DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
for (int c = 0; c < chromosomes.length; c++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(c, chromosomes.length);
Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
float[][] newValues = new float[data.length][allProbes.length];
try {
for (int p = 0; p < allProbes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
// Find the min and max indices we're going to use.
int minIndex = p;
int maxIndex = p;
if (correctionAction == ADJACENT) {
minIndex = p - (distance / 2);
maxIndex = minIndex + (distance - 1);
if (minIndex < 0)
minIndex = 0;
if (maxIndex > allProbes.length - 1)
maxIndex = allProbes.length - 1;
} else if (correctionAction == WINDOW) {
for (int i = p; i >= 0; i--) {
if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
break;
}
minIndex = i;
}
for (int i = p; i < allProbes.length; i++) {
if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
break;
}
maxIndex = i;
}
}
// Now go through all of the datasets working out the new value for this range
float[] tempValues = new float[(maxIndex - minIndex) + 1];
for (int d = 0; d < data.length; d++) {
for (int i = minIndex; i <= maxIndex; i++) {
tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
}
newValues[d][p] = SimpleStats.mean(tempValues);
}
}
// Now assign the values for the probes on this chromosome
for (int d = 0; d < data.length; d++) {
for (int p = 0; p < allProbes.length; p++) {
data[d].setValueForProbe(allProbes[p], data[d].getValueForProbe(allProbes[p]) - newValues[d][p]);
}
}
} catch (SeqMonkException e) {
progressExceptionReceived(e);
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class HiCCisTransQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
int cisCount = 0;
int transCount = 0;
HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
for (int c = 0; c < chromosomeNames.length; c++) {
long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
for (int r = 0; r < sourceReads.length; r++) {
// Check if we can ignore this one
if (removeDuplicates) {
if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
continue;
}
if (!chromosomeNames[c].equals(probes[p].chromosome().name())) {
++transCount;
} else {
if (includeFarCis) {
int distance = SequenceRead.fragmentLength(sourceReads[r], hitReads[r]);
if (distance > farCisDistance) {
++transCount;
} else {
// System.err.println("Distance was "+distance);
++cisCount;
}
} else {
++cisCount;
}
}
}
}
float percentage = ((transCount * 100f) / (cisCount + transCount));
if (cisCount + transCount == 0) {
percentage = 0;
}
// TODO: This is icky since the inheritance between HiCDataStore and DataStore
// isn't properly sorted out.
((DataStore) data[d]).setValueForProbe(probes[p], percentage);
}
}
if (correctPerChromosome) {
Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
float[] thisChrValues = new float[thisChrProbes.length];
for (int d = 0; d < data.length; d++) {
DataStore ds = (DataStore) data[d];
for (int p = 0; p < thisChrProbes.length; p++) {
try {
thisChrValues[p] = ds.getValueForProbe(thisChrProbes[p]);
} catch (SeqMonkException e) {
}
}
float median = SimpleStats.median(thisChrValues);
for (int p = 0; p < thisChrProbes.length; p++) {
try {
ds.setValueForProbe(thisChrProbes[p], ds.getValueForProbe(thisChrProbes[p]) - median);
} catch (SeqMonkException e) {
}
}
}
}
}
quantitatonComplete();
}
Aggregations