Search in sources :

Example 81 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project talismane by joliciel-informatique.

the class CorpusStatisticsWriter method onNextParseConfiguration.

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) {
    stats.sentenceCount++;
    stats.sentenceLengthStats.addValue(parseConfiguration.getPosTagSequence().size());
    for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) {
        if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
            continue;
        Token token = posTaggedToken.getToken();
        String word = token.getOriginalText();
        stats.words.add(word);
        if (referenceStats != null) {
            if (!referenceStats.words.contains(word))
                stats.unknownTokenCount++;
        }
        if (alphanumeric.matcher(token.getOriginalText()).find()) {
            String lowercase = word.toLowerCase(TalismaneSession.get(sessionId).getLocale());
            stats.lowerCaseWords.add(lowercase);
            stats.alphanumericCount++;
            if (referenceStats != null) {
                if (!referenceStats.lowerCaseWords.contains(lowercase))
                    stats.unknownAlphanumericCount++;
            }
        }
        stats.tokenCount++;
        Integer countObj = stats.posTagCounts.get(posTaggedToken.getTag().getCode());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        stats.posTagCounts.put(posTaggedToken.getTag().getCode(), count);
    }
    int maxDepth = 0;
    DescriptiveStatistics avgSyntaxDepthForSentenceStats = new DescriptiveStatistics();
    for (DependencyArc arc : parseConfiguration.getNonProjectiveDependencies()) {
        Integer countObj = stats.depLabelCounts.get(arc.getLabel());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        stats.depLabelCounts.put(arc.getLabel(), count);
        stats.totalDepCount++;
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) {
        // do nothing for unattached stuff (e.g. punctuation)
        } else if (arc.getLabel().equals("ponct")) {
        // do nothing for punctuation
        } else {
            int depth = 0;
            DependencyArc theArc = arc;
            while (theArc != null && !theArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)) {
                theArc = parseConfiguration.getGoverningDependency(theArc.getHead());
                depth++;
            }
            if (depth > maxDepth)
                maxDepth = depth;
            stats.syntaxDepthStats.addValue(depth);
            avgSyntaxDepthForSentenceStats.addValue(depth);
            int distance = Math.abs(arc.getHead().getToken().getIndex() - arc.getDependent().getToken().getIndex());
            stats.syntaxDistanceStats.addValue(distance);
        }
    }
    stats.maxSyntaxDepthStats.addValue(maxDepth);
    if (avgSyntaxDepthForSentenceStats.getN() > 0)
        stats.avgSyntaxDepthStats.addValue(avgSyntaxDepthForSentenceStats.getMean());
    if (maxDepth > stats.maxDepthCorpus)
        stats.maxDepthCorpus = maxDepth;
    // we cheat a little bit by only allowing each arc to count once
    // there could be a situation where there are two independent
    // non-projective arcs
    // crossing the same mother arc, but we prefer here to underestimate,
    // as this phenomenon is quite rare.
    Set<DependencyArc> nonProjectiveArcs = new HashSet<DependencyArc>();
    int i = 0;
    for (DependencyArc arc : parseConfiguration.getNonProjectiveDependencies()) {
        i++;
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0))
            continue;
        if (nonProjectiveArcs.contains(arc))
            continue;
        int headIndex = arc.getHead().getToken().getIndex();
        int depIndex = arc.getDependent().getToken().getIndex();
        int startIndex = headIndex < depIndex ? headIndex : depIndex;
        int endIndex = headIndex >= depIndex ? headIndex : depIndex;
        int j = 0;
        for (DependencyArc otherArc : parseConfiguration.getNonProjectiveDependencies()) {
            j++;
            if (j <= i)
                continue;
            if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0))
                continue;
            if (nonProjectiveArcs.contains(otherArc))
                continue;
            int headIndex2 = otherArc.getHead().getToken().getIndex();
            int depIndex2 = otherArc.getDependent().getToken().getIndex();
            int startIndex2 = headIndex2 < depIndex2 ? headIndex2 : depIndex2;
            int endIndex2 = headIndex2 >= depIndex2 ? headIndex2 : depIndex2;
            boolean nonProjective = false;
            if (startIndex2 < startIndex && endIndex2 > startIndex && endIndex2 < endIndex) {
                nonProjective = true;
            } else if (startIndex2 > startIndex && startIndex2 < endIndex && endIndex2 > endIndex) {
                nonProjective = true;
            }
            if (nonProjective) {
                nonProjectiveArcs.add(arc);
                nonProjectiveArcs.add(otherArc);
                stats.nonProjectiveCount++;
                LOG.debug("Non-projective arcs in sentence: " + parseConfiguration.getSentence().getText());
                LOG.debug(arc.toString());
                LOG.debug(otherArc.toString());
                break;
            }
        }
    }
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) DependencyArc(com.joliciel.talismane.parser.DependencyArc) PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) Token(com.joliciel.talismane.tokeniser.Token) HashSet(java.util.HashSet)

Example 82 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project talismane by joliciel-informatique.

the class FScoreCalculatorOneVsRest method calculate.

private void calculate() {
    if (!this.calculated) {
        precisionStats = new DescriptiveStatistics();
        recallStats = new DescriptiveStatistics();
        fScoreStats = new DescriptiveStatistics();
        precisionWeightedStats = new DescriptiveStatistics();
        recallWeightedStats = new DescriptiveStatistics();
        fScoreWeightedStats = new DescriptiveStatistics();
        for (E outcome : fScoreCalculators.keySet()) {
            if (!outcomeCounts.containsKey(outcome))
                outcomeCounts.put(outcome, 0);
            int count = outcomeCounts.get(outcome);
            FScoreCalculator<Boolean> fScoreCalculator = fScoreCalculators.get(outcome);
            if (count > 0) {
                precisionStats.addValue(fScoreCalculator.getPrecision(true));
                recallStats.addValue(fScoreCalculator.getRecall(true));
                fScoreStats.addValue(fScoreCalculator.getFScore(true));
            }
            for (int i = 0; i < count; i++) {
                precisionWeightedStats.addValue(fScoreCalculator.getPrecision(true));
                recallWeightedStats.addValue(fScoreCalculator.getRecall(true));
                fScoreWeightedStats.addValue(fScoreCalculator.getFScore(true));
            }
        }
        this.calculated = true;
    }
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)

Example 83 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project trino by trinodb.

the class AbstractTestApproximateSetGeneric method testMultiplePositions.

@Test
public void testMultiplePositions() {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    for (int i = 0; i < 500; ++i) {
        int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1;
        List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5));
        long actualCount = esitmateSetGrouped(values).cardinality();
        double error = (actualCount - uniques) * 1.0 / uniques;
        stats.addValue(error);
    }
    assertLessThan(stats.getMean(), 1.0e-2);
    assertLessThan(stats.getStandardDeviation(), 1.0e-2 + STD_ERROR);
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) Test(org.testng.annotations.Test)

Example 84 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project trino by trinodb.

the class AbstractTestQueries method testTableSampleBernoulli.

@Test
public void testTableSampleBernoulli() {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().size();
    for (int i = 0; i < 100; i++) {
        List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)").getMaterializedRows();
        assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
        stats.addValue(values.size() * 1.0 / total);
    }
    double mean = stats.getGeometricMean();
    assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) Test(org.testng.annotations.Test)

Example 85 with DescriptiveStatistics

use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project rpcx-benchmark by rpcx-ecosystem.

the class DemoAction method start.

public void start() throws Exception {
    final DescriptiveStatistics stats = new SynchronizedDescriptiveStatistics();
    DubboBenchmark.BenchmarkMessage msg = prepareArgs();
    final byte[] msgBytes = msg.toByteArray();
    final CountDownLatch latch = new CountDownLatch(this.count);
    ExecutorService es = Executors.newFixedThreadPool(threads);
    final AtomicInteger trans = new AtomicInteger(0);
    final AtomicInteger transOK = new AtomicInteger(0);
    long start = System.currentTimeMillis();
    for (int i = 0; i < this.count; i++) {
        es.submit(new Runnable() {

            @Override
            public void run() {
                try {
                    long t = System.currentTimeMillis();
                    DubboBenchmark.BenchmarkMessage m = testSay(msgBytes);
                    t = System.currentTimeMillis() - t;
                    stats.addValue(t);
                    trans.incrementAndGet();
                    if (m != null && m.getField1().equals("OK")) {
                        transOK.incrementAndGet();
                    }
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } finally {
                    latch.countDown();
                }
            }
        });
    }
    latch.await();
    start = System.currentTimeMillis() - start;
    System.out.printf("sent     requests    : %d\n", this.count);
    System.out.printf("received requests    : %d\n", trans.get());
    System.out.printf("received requests_OK : %d\n", transOK.get());
    System.out.printf("throughput  (TPS)    : %d\n", this.count * 1000 / start);
    System.out.printf("mean: %f\n", stats.getMean());
    System.out.printf("median: %f\n", stats.getPercentile(50));
    System.out.printf("max: %f\n", stats.getMax());
    System.out.printf("min: %f\n", stats.getMin());
    System.out.printf("99P: %f\n", stats.getPercentile(90));
}
Also used : SynchronizedDescriptiveStatistics(org.apache.commons.math3.stat.descriptive.SynchronizedDescriptiveStatistics) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) DubboBenchmark(org.apache.dubbo.bench.DubboBenchmark) CountDownLatch(java.util.concurrent.CountDownLatch) SynchronizedDescriptiveStatistics(org.apache.commons.math3.stat.descriptive.SynchronizedDescriptiveStatistics) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ExecutorService(java.util.concurrent.ExecutorService)

Aggregations

DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)179 ArrayList (java.util.ArrayList)22 List (java.util.List)17 Test (org.testng.annotations.Test)15 Test (org.junit.Test)13 Test (org.junit.jupiter.api.Test)12 File (java.io.File)11 Plot (ij.gui.Plot)10 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)10 Rectangle (java.awt.Rectangle)8 TException (org.apache.thrift.TException)7 Result (de.dagere.kopeme.generated.Result)6 LinkedList (java.util.LinkedList)6 AbstractMagmaTest (org.obiba.magma.test.AbstractMagmaTest)6 ExtendedGenericDialog (uk.ac.sussex.gdsc.core.ij.gui.ExtendedGenericDialog)6 PrecisionResultProcedure (uk.ac.sussex.gdsc.smlm.results.procedures.PrecisionResultProcedure)6 ImagePlus (ij.ImagePlus)5 PlotWindow (ij.gui.PlotWindow)5 ImageProcessor (ij.process.ImageProcessor)5 Map (java.util.Map)5