Search in sources :

Example 6 with SummaryStatistics

use of org.apache.commons.math3.stat.descriptive.SummaryStatistics in project GDSC-SMLM by aherbert.

the class HysteresisFilter method getSearchDistanceUsingCandidates.

/**
	 * Find average precision of the candidates and use it for the search
	 * distance
	 * 
	 * @param peakResults
	 * @param candidates
	 * @return
	 */
private double getSearchDistanceUsingCandidates(MemoryPeakResults peakResults, LinkedList<PeakResult> candidates) {
    SummaryStatistics stats = new SummaryStatistics();
    final double nmPerPixel = peakResults.getNmPerPixel();
    final double gain = peakResults.getGain();
    final boolean emCCD = peakResults.isEMCCD();
    for (PeakResult peakResult : candidates) {
        stats.addValue(peakResult.getPrecision(nmPerPixel, gain, emCCD));
    }
    double distanceThreshold = stats.getMean() * searchDistance / nmPerPixel;
    return distanceThreshold;
}
Also used : SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) PeakResult(gdsc.smlm.results.PeakResult)

Example 7 with SummaryStatistics

use of org.apache.commons.math3.stat.descriptive.SummaryStatistics in project tika by apache.

the class LuceneTokenCounter method count.

void count(String field) throws IOException {
    long tokenCount = leafReader.getSumTotalTermFreq(field);
    if (tokenCount > Integer.MAX_VALUE) {
        throw new IllegalArgumentException("can't handle longs");
    }
    int tokenCountInt = (int) tokenCount;
    int uniqueTokenCount = 0;
    SummaryStatistics summStats = new SummaryStatistics();
    double ent = 0.0d;
    double p = 0.0d;
    double base = 2.0;
    Terms terms = leafReader.terms(field);
    if (terms == null) {
        //if there were no terms
        fieldStats.put(field, new TokenStatistics(uniqueTokenCount, tokenCountInt, new TokenIntPair[0], ent, summStats));
        return;
    }
    TermsEnum termsEnum = terms.iterator();
    BytesRef bytesRef = termsEnum.next();
    TokenCountPriorityQueue queue = new TokenCountPriorityQueue(topN);
    while (bytesRef != null) {
        long termFreq = termsEnum.totalTermFreq();
        if (termFreq > Integer.MAX_VALUE) {
            throw new IllegalArgumentException("Sorry can't handle longs yet");
        }
        int tf = (int) termFreq;
        //TODO: figure out how to avoid Stringifying this
        //to get codepoint count
        String t = bytesRef.utf8ToString();
        int len = t.codePointCount(0, t.length());
        for (int i = 0; i < tf; i++) {
            summStats.addValue(len);
        }
        p = (double) tf / (double) tokenCount;
        ent += p * FastMath.log(base, p);
        if (queue.top() == null || queue.size() < topN || tf >= queue.top().getValue()) {
            queue.insertWithOverflow(new TokenIntPair(t, tf));
        }
        uniqueTokenCount++;
        bytesRef = termsEnum.next();
    }
    if (tokenCountInt > 0) {
        ent = (-1.0d / (double) tokenCountInt) * ent;
    }
    fieldStats.put(field, new TokenStatistics(uniqueTokenCount, tokenCountInt, queue.getArray(), ent, summStats));
}
Also used : Terms(org.apache.lucene.index.Terms) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 8 with SummaryStatistics

use of org.apache.commons.math3.stat.descriptive.SummaryStatistics in project tika by apache.

the class TokenStatistics method equals.

@Override
public boolean equals(Object o) {
    if (this == o)
        return true;
    if (o == null || getClass() != o.getClass())
        return false;
    TokenStatistics that = (TokenStatistics) o;
    if (totalTokens != that.totalTokens)
        return false;
    if (totalUniqueTokens != that.totalUniqueTokens)
        return false;
    if (!doubleEquals(that.entropy, entropy))
        return false;
    // Probably incorrect - comparing Object[] arrays with Arrays.equals
    if (!Arrays.equals(topN, that.topN))
        return false;
    SummaryStatistics thatS = ((TokenStatistics) o).summaryStatistics;
    if (summaryStatistics.getN() != thatS.getN())
        return false;
    //if both have n==0, don't bother with the stats
    if (summaryStatistics.getN() == 0L)
        return true;
    //TODO: consider adding others...
    if (!doubleEquals(summaryStatistics.getGeometricMean(), thatS.getGeometricMean()))
        return false;
    if (!doubleEquals(summaryStatistics.getMax(), thatS.getMax()))
        return false;
    if (!doubleEquals(summaryStatistics.getMean(), thatS.getMean()))
        return false;
    if (!doubleEquals(summaryStatistics.getMin(), thatS.getMin()))
        return false;
    if (!doubleEquals(summaryStatistics.getSum(), thatS.getSum()))
        return false;
    if (!doubleEquals(summaryStatistics.getStandardDeviation(), thatS.getStandardDeviation()))
        return false;
    return true;
}
Also used : SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics)

Example 9 with SummaryStatistics

use of org.apache.commons.math3.stat.descriptive.SummaryStatistics in project tika by apache.

the class AbstractProfiler method writeContentData.

/**
     * Checks to see if metadata is null or content is empty (null or only whitespace).
     * If any of these, then this does no processing, and the fileId is not
     * entered into the content table.
     *
     * @param fileId
     * @param m
     * @param fieldName
     * @param contentsTable
     */
protected void writeContentData(String fileId, Metadata m, String fieldName, TableInfo contentsTable) throws IOException {
    if (m == null) {
        return;
    }
    Map<Cols, String> data = new HashMap<>();
    String content = getContent(m, maxContentLength, data);
    if (content == null || content.trim().length() == 0) {
        return;
    }
    tokenCounter.clear(fieldName);
    tokenCounter.add(fieldName, content);
    data.put(Cols.ID, fileId);
    data.put(Cols.CONTENT_LENGTH, Integer.toString(content.length()));
    langid(m, data);
    String langid = data.get(Cols.LANG_ID_1);
    langid = (langid == null) ? "" : langid;
    writeTokenCounts(data, fieldName, tokenCounter);
    CommonTokenResult commonTokenResult = null;
    try {
        commonTokenResult = commonTokenCountManager.countTokenOverlaps(langid, tokenCounter.getTokens(fieldName));
    } catch (IOException e) {
        LOG.error("{}", e.getMessage(), e);
    }
    data.put(Cols.COMMON_TOKENS_LANG, commonTokenResult.getLangCode());
    data.put(Cols.NUM_COMMON_TOKENS, Integer.toString(commonTokenResult.getCommonTokens()));
    TokenStatistics tokenStatistics = tokenCounter.getTokenStatistics(fieldName);
    data.put(Cols.NUM_UNIQUE_TOKENS, Integer.toString(tokenStatistics.getTotalUniqueTokens()));
    data.put(Cols.NUM_TOKENS, Integer.toString(tokenStatistics.getTotalTokens()));
    data.put(Cols.NUM_ALPHABETIC_TOKENS, Integer.toString(commonTokenResult.getAlphabeticTokens()));
    data.put(Cols.TOKEN_ENTROPY_RATE, Double.toString(tokenStatistics.getEntropy()));
    SummaryStatistics summStats = tokenStatistics.getSummaryStatistics();
    data.put(Cols.TOKEN_LENGTH_SUM, Integer.toString((int) summStats.getSum()));
    data.put(Cols.TOKEN_LENGTH_MEAN, Double.toString(summStats.getMean()));
    data.put(Cols.TOKEN_LENGTH_STD_DEV, Double.toString(summStats.getStandardDeviation()));
    unicodeBlocks(m, data);
    try {
        writer.writeRow(contentsTable, data);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : Cols(org.apache.tika.eval.db.Cols) CommonTokenResult(org.apache.tika.eval.tokens.CommonTokenResult) HashMap(java.util.HashMap) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) TokenStatistics(org.apache.tika.eval.tokens.TokenStatistics) IOException(java.io.IOException)

Example 10 with SummaryStatistics

use of org.apache.commons.math3.stat.descriptive.SummaryStatistics in project cassandra by apache.

the class BootStrapperTest method allocateTokensForNode.

private void allocateTokensForNode(int vn, String ks, TokenMetadata tm, InetAddress addr) {
    SummaryStatistics os = TokenAllocation.replicatedOwnershipStats(tm.cloneOnlyTokenMap(), Keyspace.open(ks).getReplicationStrategy(), addr);
    Collection<Token> tokens = BootStrapper.allocateTokens(tm, addr, ks, vn, 0);
    assertEquals(vn, tokens.size());
    tm.updateNormalTokens(tokens, addr);
    SummaryStatistics ns = TokenAllocation.replicatedOwnershipStats(tm.cloneOnlyTokenMap(), Keyspace.open(ks).getReplicationStrategy(), addr);
    verifyImprovement(os, ns);
}
Also used : SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics)

Aggregations

SummaryStatistics (org.apache.commons.math3.stat.descriptive.SummaryStatistics)17 HashMap (java.util.HashMap)3 Map (java.util.Map)3 PeakResult (gdsc.smlm.results.PeakResult)2 Rectangle (java.awt.Rectangle)2 InetAddress (java.net.InetAddress)2 Token (org.apache.cassandra.dht.Token)2 TokenMetadata (org.apache.cassandra.locator.TokenMetadata)2 ClusterPoint (gdsc.core.clustering.ClusterPoint)1 DensityManager (gdsc.core.clustering.DensityManager)1 IJImageSource (gdsc.smlm.ij.IJImageSource)1 ImagePSFModel (gdsc.smlm.model.ImagePSFModel)1 LocalisationModel (gdsc.smlm.model.LocalisationModel)1 LocalisationModelSet (gdsc.smlm.model.LocalisationModelSet)1 Calibration (gdsc.smlm.results.Calibration)1 MemoryPeakResults (gdsc.smlm.results.MemoryPeakResults)1 Trace (gdsc.smlm.results.Trace)1 ImagePlus (ij.ImagePlus)1 ImageStack (ij.ImageStack)1 IOException (java.io.IOException)1