use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project talismane by joliciel-informatique.
the class CorpusStatisticsWriter method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) {
stats.sentenceCount++;
stats.sentenceLengthStats.addValue(parseConfiguration.getPosTagSequence().size());
for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) {
if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
continue;
Token token = posTaggedToken.getToken();
String word = token.getOriginalText();
stats.words.add(word);
if (referenceStats != null) {
if (!referenceStats.words.contains(word))
stats.unknownTokenCount++;
}
if (alphanumeric.matcher(token.getOriginalText()).find()) {
String lowercase = word.toLowerCase(TalismaneSession.get(sessionId).getLocale());
stats.lowerCaseWords.add(lowercase);
stats.alphanumericCount++;
if (referenceStats != null) {
if (!referenceStats.lowerCaseWords.contains(lowercase))
stats.unknownAlphanumericCount++;
}
}
stats.tokenCount++;
Integer countObj = stats.posTagCounts.get(posTaggedToken.getTag().getCode());
int count = countObj == null ? 0 : countObj.intValue();
count++;
stats.posTagCounts.put(posTaggedToken.getTag().getCode(), count);
}
int maxDepth = 0;
DescriptiveStatistics avgSyntaxDepthForSentenceStats = new DescriptiveStatistics();
for (DependencyArc arc : parseConfiguration.getNonProjectiveDependencies()) {
Integer countObj = stats.depLabelCounts.get(arc.getLabel());
int count = countObj == null ? 0 : countObj.intValue();
count++;
stats.depLabelCounts.put(arc.getLabel(), count);
stats.totalDepCount++;
if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) {
// do nothing for unattached stuff (e.g. punctuation)
} else if (arc.getLabel().equals("ponct")) {
// do nothing for punctuation
} else {
int depth = 0;
DependencyArc theArc = arc;
while (theArc != null && !theArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)) {
theArc = parseConfiguration.getGoverningDependency(theArc.getHead());
depth++;
}
if (depth > maxDepth)
maxDepth = depth;
stats.syntaxDepthStats.addValue(depth);
avgSyntaxDepthForSentenceStats.addValue(depth);
int distance = Math.abs(arc.getHead().getToken().getIndex() - arc.getDependent().getToken().getIndex());
stats.syntaxDistanceStats.addValue(distance);
}
}
stats.maxSyntaxDepthStats.addValue(maxDepth);
if (avgSyntaxDepthForSentenceStats.getN() > 0)
stats.avgSyntaxDepthStats.addValue(avgSyntaxDepthForSentenceStats.getMean());
if (maxDepth > stats.maxDepthCorpus)
stats.maxDepthCorpus = maxDepth;
// we cheat a little bit by only allowing each arc to count once
// there could be a situation where there are two independent
// non-projective arcs
// crossing the same mother arc, but we prefer here to underestimate,
// as this phenomenon is quite rare.
Set<DependencyArc> nonProjectiveArcs = new HashSet<DependencyArc>();
int i = 0;
for (DependencyArc arc : parseConfiguration.getNonProjectiveDependencies()) {
i++;
if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0))
continue;
if (nonProjectiveArcs.contains(arc))
continue;
int headIndex = arc.getHead().getToken().getIndex();
int depIndex = arc.getDependent().getToken().getIndex();
int startIndex = headIndex < depIndex ? headIndex : depIndex;
int endIndex = headIndex >= depIndex ? headIndex : depIndex;
int j = 0;
for (DependencyArc otherArc : parseConfiguration.getNonProjectiveDependencies()) {
j++;
if (j <= i)
continue;
if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0))
continue;
if (nonProjectiveArcs.contains(otherArc))
continue;
int headIndex2 = otherArc.getHead().getToken().getIndex();
int depIndex2 = otherArc.getDependent().getToken().getIndex();
int startIndex2 = headIndex2 < depIndex2 ? headIndex2 : depIndex2;
int endIndex2 = headIndex2 >= depIndex2 ? headIndex2 : depIndex2;
boolean nonProjective = false;
if (startIndex2 < startIndex && endIndex2 > startIndex && endIndex2 < endIndex) {
nonProjective = true;
} else if (startIndex2 > startIndex && startIndex2 < endIndex && endIndex2 > endIndex) {
nonProjective = true;
}
if (nonProjective) {
nonProjectiveArcs.add(arc);
nonProjectiveArcs.add(otherArc);
stats.nonProjectiveCount++;
LOG.debug("Non-projective arcs in sentence: " + parseConfiguration.getSentence().getText());
LOG.debug(arc.toString());
LOG.debug(otherArc.toString());
break;
}
}
}
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project talismane by joliciel-informatique.
the class FScoreCalculatorOneVsRest method calculate.
private void calculate() {
if (!this.calculated) {
precisionStats = new DescriptiveStatistics();
recallStats = new DescriptiveStatistics();
fScoreStats = new DescriptiveStatistics();
precisionWeightedStats = new DescriptiveStatistics();
recallWeightedStats = new DescriptiveStatistics();
fScoreWeightedStats = new DescriptiveStatistics();
for (E outcome : fScoreCalculators.keySet()) {
if (!outcomeCounts.containsKey(outcome))
outcomeCounts.put(outcome, 0);
int count = outcomeCounts.get(outcome);
FScoreCalculator<Boolean> fScoreCalculator = fScoreCalculators.get(outcome);
if (count > 0) {
precisionStats.addValue(fScoreCalculator.getPrecision(true));
recallStats.addValue(fScoreCalculator.getRecall(true));
fScoreStats.addValue(fScoreCalculator.getFScore(true));
}
for (int i = 0; i < count; i++) {
precisionWeightedStats.addValue(fScoreCalculator.getPrecision(true));
recallWeightedStats.addValue(fScoreCalculator.getRecall(true));
fScoreWeightedStats.addValue(fScoreCalculator.getFScore(true));
}
}
this.calculated = true;
}
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project trino by trinodb.
the class AbstractTestApproximateSetGeneric method testMultiplePositions.
@Test
public void testMultiplePositions() {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int i = 0; i < 500; ++i) {
int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1;
List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5));
long actualCount = esitmateSetGrouped(values).cardinality();
double error = (actualCount - uniques) * 1.0 / uniques;
stats.addValue(error);
}
assertLessThan(stats.getMean(), 1.0e-2);
assertLessThan(stats.getStandardDeviation(), 1.0e-2 + STD_ERROR);
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project trino by trinodb.
the class AbstractTestQueries method testTableSampleBernoulli.
@Test
public void testTableSampleBernoulli() {
DescriptiveStatistics stats = new DescriptiveStatistics();
int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().size();
for (int i = 0; i < 100; i++) {
List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)").getMaterializedRows();
assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows");
stats.addValue(values.size() * 1.0 / total);
}
double mean = stats.getGeometricMean();
assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean));
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project rpcx-benchmark by rpcx-ecosystem.
the class DemoAction method start.
public void start() throws Exception {
final DescriptiveStatistics stats = new SynchronizedDescriptiveStatistics();
DubboBenchmark.BenchmarkMessage msg = prepareArgs();
final byte[] msgBytes = msg.toByteArray();
final CountDownLatch latch = new CountDownLatch(this.count);
ExecutorService es = Executors.newFixedThreadPool(threads);
final AtomicInteger trans = new AtomicInteger(0);
final AtomicInteger transOK = new AtomicInteger(0);
long start = System.currentTimeMillis();
for (int i = 0; i < this.count; i++) {
es.submit(new Runnable() {
@Override
public void run() {
try {
long t = System.currentTimeMillis();
DubboBenchmark.BenchmarkMessage m = testSay(msgBytes);
t = System.currentTimeMillis() - t;
stats.addValue(t);
trans.incrementAndGet();
if (m != null && m.getField1().equals("OK")) {
transOK.incrementAndGet();
}
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
latch.countDown();
}
}
});
}
latch.await();
start = System.currentTimeMillis() - start;
System.out.printf("sent requests : %d\n", this.count);
System.out.printf("received requests : %d\n", trans.get());
System.out.printf("received requests_OK : %d\n", transOK.get());
System.out.printf("throughput (TPS) : %d\n", this.count * 1000 / start);
System.out.printf("mean: %f\n", stats.getMean());
System.out.printf("median: %f\n", stats.getPercentile(50));
System.out.printf("max: %f\n", stats.getMax());
System.out.printf("min: %f\n", stats.getMin());
System.out.printf("99P: %f\n", stats.getPercentile(90));
}
Aggregations