Search in sources :

Example 21 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project SeqMonk by s-andrews.

the class IntronRegressionPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    int minDensity = optionsPanel.minDensity();
    int minLength = optionsPanel.minLength();
    double maxPValue = optionsPanel.maxPValue();
    int binSize = optionsPanel.measurementBinSize();
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        Vector<Probe> probesForThisChromosome = new Vector<Probe>();
        progressUpdated("Making probes", c, chrs.length);
        Feature[] features = getValidFeatures(chrs[c]);
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            // There are no introns here
            if (!(features[f].location() instanceof SplitLocation))
                continue;
            Location[] subLocations = ((SplitLocation) features[f].location()).subLocations();
            // TODO: Reverse the subLocations if its a reverse feature
            for (int intron = 1; intron < subLocations.length; intron++) {
                int start = subLocations[intron - 1].end();
                int end = subLocations[intron].start();
                if ((end - start) + 1 < minLength) {
                    // This intron is too short.
                    continue;
                }
                // TODO: We could throw away any probes which didn't have enough reads in any feature
                Probe p = new Probe(chrs[c], start, end, features[f].location().strand(), features[f].name() + "_" + intron);
                probesForThisChromosome.add(p);
            }
        }
        // Now we can deduplicate the probes for this chromosome and add them to the main collection
        Probe[] dupProbes = probesForThisChromosome.toArray(new Probe[0]);
        Arrays.sort(dupProbes);
        for (int p = 0; p < dupProbes.length; p++) {
            if (p > 0 && dupProbes[p].packedPosition() == dupProbes[p - 1].packedPosition())
                continue;
            probes.add(dupProbes[p]);
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Now we go back through the probes and quantitate them
    for (int p = 0; p < allProbes.length; p++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        if (p % 1000 == 0) {
            progressUpdated("Quantitated " + p + " out of " + allProbes.length + " probes", p, allProbes.length);
        }
        for (int d = 0; d < data.length; d++) {
            long[] reads = data[d].getReadsForProbe(allProbes[p]);
            int[] countsPerSite = new int[allProbes[p].length()];
            int usableCounts = 0;
            for (int r = 0; r < reads.length; r++) {
                if (readFilter.useRead(allProbes[p], reads[r])) {
                    ++usableCounts;
                    for (int pos = Math.max(0, SequenceRead.start(reads[r]) - allProbes[p].start()); pos <= Math.min(countsPerSite.length - 1, SequenceRead.end(reads[r]) - allProbes[p].start()); pos++) {
                        ++countsPerSite[pos];
                    }
                }
            }
            if (usableCounts / (allProbes[p].length() / 1000d) >= minDensity) {
                // We're going to do a linear regression rather than a correlation
                // We're analysing in bins so we'll work out the bin counts and
                // add them dynamically to the regression.
                SimpleRegression regression = new SimpleRegression();
                int binCount = 0;
                for (int i = 0; i < countsPerSite.length; i++) {
                    if (i > 0 && i % binSize == 0) {
                        regression.addData(i, binCount);
                        binCount = 0;
                    }
                    binCount += countsPerSite[i];
                }
                float slope = (float) (regression.getSlope() * 1000000);
                double pValue = regression.getSignificance();
                if (allProbes[p].strand() == Location.REVERSE) {
                    slope = 0 - slope;
                }
                if (pValue <= maxPValue) {
                    data[d].setValueForProbe(allProbes[p], slope);
                } else {
                    data[d].setValueForProbe(allProbes[p], Float.NaN);
                }
            } else {
                data[d].setValueForProbe(allProbes[p], Float.NaN);
            }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Intron regression pipeline quantitation ");
    quantitationDescription.append(". Directionality was ");
    quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
    quantitationDescription.append(". Min intron length was ");
    quantitationDescription.append(minLength);
    quantitationDescription.append(". Min read density was ");
    quantitationDescription.append(minDensity);
    quantitationDescription.append(". Max slope p-value was ");
    quantitationDescription.append(maxPValue);
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Vector(java.util.Vector) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 22 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project neo4j-apoc-procedures by neo4j-contrib.

the class RegressionTest method testCalculateRegr.

@Test
public void testCalculateRegr() throws Throwable {
    db.execute("CREATE " + "(:REGR_TEST {x_property: 1 , y_property: 2 })," + "(:REGR_TEST {x_property: 2 , y_property: 3 })," + "(:REGR_TEST {y_property: 10000 })," + "(:REGR_TEST {x_property: 3 , y_property: 6 })").close();
    SimpleRegression expectedRegr = new SimpleRegression(false);
    expectedRegr.addData(new double[][] { { 1, 1 }, { 2, 3 }, // {3, 10000},
    { 3, 6 } });
    TestUtil.testCall(db, "CALL apoc.math.regr('REGR_TEST', 'y_property', 'x_property')", result -> {
        assertEquals(expectedRegr.getRSquare(), (Double) result.get("r2"), 0.1);
        assertEquals(2.0, (Double) result.get("avgX"), 0.1);
        assertEquals(3.67, (Double) result.get("avgY"), 0.1);
        assertEquals(expectedRegr.getSlope(), (Double) result.get("slope"), 0.1);
    });
}
Also used : SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) Test(org.junit.Test)

Example 23 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project presto by prestodb.

the class TestDoubleRegrInterceptAggregation method testNonTrivialAggregation.

private void testNonTrivialAggregation(Double[] y, Double[] x) {
    SimpleRegression regression = new SimpleRegression();
    for (int i = 0; i < x.length; i++) {
        regression.addData(x[i], y[i]);
    }
    double expected = regression.getIntercept();
    checkArgument(Double.isFinite(expected) && expected != 0., "Expected result is trivial");
    testAggregation(expected, createDoublesBlock(y), createDoublesBlock(x));
}
Also used : SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression)

Example 24 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project presto by prestodb.

the class TestDoubleRegrSlopeAggregation method testNonTrivialAggregation.

private void testNonTrivialAggregation(Double[] y, Double[] x) {
    SimpleRegression regression = new SimpleRegression();
    for (int i = 0; i < x.length; i++) {
        regression.addData(x[i], y[i]);
    }
    double expected = regression.getSlope();
    checkArgument(Double.isFinite(expected) && expected != 0.0, "Expected result is trivial");
    testAggregation(expected, createDoublesBlock(y), createDoublesBlock(x));
}
Also used : SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression)

Example 25 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project presto by prestodb.

the class TestRealRegrInterceptAggregation method testNonTrivialAggregation.

private void testNonTrivialAggregation(Float[] y, Float[] x) {
    SimpleRegression regression = new SimpleRegression();
    for (int i = 0; i < x.length; i++) {
        regression.addData(x[i], y[i]);
    }
    float expected = (float) regression.getIntercept();
    checkArgument(Float.isFinite(expected) && expected != 0.f, "Expected result is trivial");
    testAggregation(expected, createBlockOfReals(y), createBlockOfReals(x));
}
Also used : SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression)

Aggregations

SimpleRegression (org.apache.commons.math3.stat.regression.SimpleRegression)33 ArrayList (java.util.ArrayList)13 Test (org.junit.Test)4 FractionClassificationResult (gdsc.core.match.FractionClassificationResult)3 List (java.util.List)3 Sample (function.genotype.base.Sample)2 BasePoint (gdsc.core.match.BasePoint)2 FractionalAssignment (gdsc.core.match.FractionalAssignment)2 FastCorrelator (gdsc.core.utils.FastCorrelator)2 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)2 PeakResultPoint (gdsc.smlm.ij.plugins.ResultsMatchCalculator.PeakResultPoint)2 BasePreprocessedPeakResult (gdsc.smlm.results.filter.BasePreprocessedPeakResult)2 DirectFilter (gdsc.smlm.results.filter.DirectFilter)2 PeakFractionalAssignment (gdsc.smlm.results.filter.PeakFractionalAssignment)2 PreprocessedPeakResult (gdsc.smlm.results.filter.PreprocessedPeakResult)2 TIntHashSet (gnu.trove.set.hash.TIntHashSet)2 Plot (ij.gui.Plot)2 PlotWindow (ij.gui.PlotWindow)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2