use of org.apache.commons.math3.stat.regression.SimpleRegression in project SeqMonk by s-andrews.
the class IntronRegressionPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
int minDensity = optionsPanel.minDensity();
int minLength = optionsPanel.minLength();
double maxPValue = optionsPanel.maxPValue();
int binSize = optionsPanel.measurementBinSize();
QuantitationStrandType readFilter = optionsPanel.readFilter();
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
Vector<Probe> probesForThisChromosome = new Vector<Probe>();
progressUpdated("Making probes", c, chrs.length);
Feature[] features = getValidFeatures(chrs[c]);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
// There are no introns here
if (!(features[f].location() instanceof SplitLocation))
continue;
Location[] subLocations = ((SplitLocation) features[f].location()).subLocations();
// TODO: Reverse the subLocations if its a reverse feature
for (int intron = 1; intron < subLocations.length; intron++) {
int start = subLocations[intron - 1].end();
int end = subLocations[intron].start();
if ((end - start) + 1 < minLength) {
// This intron is too short.
continue;
}
// TODO: We could throw away any probes which didn't have enough reads in any feature
Probe p = new Probe(chrs[c], start, end, features[f].location().strand(), features[f].name() + "_" + intron);
probesForThisChromosome.add(p);
}
}
// Now we can deduplicate the probes for this chromosome and add them to the main collection
Probe[] dupProbes = probesForThisChromosome.toArray(new Probe[0]);
Arrays.sort(dupProbes);
for (int p = 0; p < dupProbes.length; p++) {
if (p > 0 && dupProbes[p].packedPosition() == dupProbes[p - 1].packedPosition())
continue;
probes.add(dupProbes[p]);
}
}
Probe[] allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
// Now we go back through the probes and quantitate them
for (int p = 0; p < allProbes.length; p++) {
if (cancel) {
progressCancelled();
return;
}
if (p % 1000 == 0) {
progressUpdated("Quantitated " + p + " out of " + allProbes.length + " probes", p, allProbes.length);
}
for (int d = 0; d < data.length; d++) {
long[] reads = data[d].getReadsForProbe(allProbes[p]);
int[] countsPerSite = new int[allProbes[p].length()];
int usableCounts = 0;
for (int r = 0; r < reads.length; r++) {
if (readFilter.useRead(allProbes[p], reads[r])) {
++usableCounts;
for (int pos = Math.max(0, SequenceRead.start(reads[r]) - allProbes[p].start()); pos <= Math.min(countsPerSite.length - 1, SequenceRead.end(reads[r]) - allProbes[p].start()); pos++) {
++countsPerSite[pos];
}
}
}
if (usableCounts / (allProbes[p].length() / 1000d) >= minDensity) {
// We're going to do a linear regression rather than a correlation
// We're analysing in bins so we'll work out the bin counts and
// add them dynamically to the regression.
SimpleRegression regression = new SimpleRegression();
int binCount = 0;
for (int i = 0; i < countsPerSite.length; i++) {
if (i > 0 && i % binSize == 0) {
regression.addData(i, binCount);
binCount = 0;
}
binCount += countsPerSite[i];
}
float slope = (float) (regression.getSlope() * 1000000);
double pValue = regression.getSignificance();
if (allProbes[p].strand() == Location.REVERSE) {
slope = 0 - slope;
}
if (pValue <= maxPValue) {
data[d].setValueForProbe(allProbes[p], slope);
} else {
data[d].setValueForProbe(allProbes[p], Float.NaN);
}
} else {
data[d].setValueForProbe(allProbes[p], Float.NaN);
}
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Intron regression pipeline quantitation ");
quantitationDescription.append(". Directionality was ");
quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
quantitationDescription.append(". Min intron length was ");
quantitationDescription.append(minLength);
quantitationDescription.append(". Min read density was ");
quantitationDescription.append(minDensity);
quantitationDescription.append(". Max slope p-value was ");
quantitationDescription.append(maxPValue);
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of org.apache.commons.math3.stat.regression.SimpleRegression in project neo4j-apoc-procedures by neo4j-contrib.
the class RegressionTest method testCalculateRegr.
@Test
public void testCalculateRegr() throws Throwable {
db.execute("CREATE " + "(:REGR_TEST {x_property: 1 , y_property: 2 })," + "(:REGR_TEST {x_property: 2 , y_property: 3 })," + "(:REGR_TEST {y_property: 10000 })," + "(:REGR_TEST {x_property: 3 , y_property: 6 })").close();
SimpleRegression expectedRegr = new SimpleRegression(false);
expectedRegr.addData(new double[][] { { 1, 1 }, { 2, 3 }, // {3, 10000},
{ 3, 6 } });
TestUtil.testCall(db, "CALL apoc.math.regr('REGR_TEST', 'y_property', 'x_property')", result -> {
assertEquals(expectedRegr.getRSquare(), (Double) result.get("r2"), 0.1);
assertEquals(2.0, (Double) result.get("avgX"), 0.1);
assertEquals(3.67, (Double) result.get("avgY"), 0.1);
assertEquals(expectedRegr.getSlope(), (Double) result.get("slope"), 0.1);
});
}
use of org.apache.commons.math3.stat.regression.SimpleRegression in project presto by prestodb.
the class TestDoubleRegrInterceptAggregation method testNonTrivialAggregation.
private void testNonTrivialAggregation(Double[] y, Double[] x) {
SimpleRegression regression = new SimpleRegression();
for (int i = 0; i < x.length; i++) {
regression.addData(x[i], y[i]);
}
double expected = regression.getIntercept();
checkArgument(Double.isFinite(expected) && expected != 0., "Expected result is trivial");
testAggregation(expected, createDoublesBlock(y), createDoublesBlock(x));
}
use of org.apache.commons.math3.stat.regression.SimpleRegression in project presto by prestodb.
the class TestDoubleRegrSlopeAggregation method testNonTrivialAggregation.
private void testNonTrivialAggregation(Double[] y, Double[] x) {
SimpleRegression regression = new SimpleRegression();
for (int i = 0; i < x.length; i++) {
regression.addData(x[i], y[i]);
}
double expected = regression.getSlope();
checkArgument(Double.isFinite(expected) && expected != 0.0, "Expected result is trivial");
testAggregation(expected, createDoublesBlock(y), createDoublesBlock(x));
}
use of org.apache.commons.math3.stat.regression.SimpleRegression in project presto by prestodb.
the class TestRealRegrInterceptAggregation method testNonTrivialAggregation.
private void testNonTrivialAggregation(Float[] y, Float[] x) {
SimpleRegression regression = new SimpleRegression();
for (int i = 0; i < x.length; i++) {
regression.addData(x[i], y[i]);
}
float expected = (float) regression.getIntercept();
checkArgument(Float.isFinite(expected) && expected != 0.f, "Expected result is trivial");
testAggregation(expected, createBlockOfReals(y), createBlockOfReals(x));
}
Aggregations