Search in sources :

Example 1 with DimConfig

use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.

the class RandomSamplingFacetsCollector method amortizeFacetCounts.

/**
   * Note: if you use a counting {@link Facets} implementation, you can amortize the
   * sampled counts by calling this method. Uses the {@link FacetsConfig} and
   * the {@link IndexSearcher} to determine the upper bound for each facet value.
   */
public FacetResult amortizeFacetCounts(FacetResult res, FacetsConfig config, IndexSearcher searcher) throws IOException {
    if (res == null || totalHits <= sampleSize) {
        return res;
    }
    LabelAndValue[] fixedLabelValues = new LabelAndValue[res.labelValues.length];
    IndexReader reader = searcher.getIndexReader();
    DimConfig dimConfig = config.getDimConfig(res.dim);
    // +2 to prepend dimension, append child label
    String[] childPath = new String[res.path.length + 2];
    childPath[0] = res.dim;
    // reuse
    System.arraycopy(res.path, 0, childPath, 1, res.path.length);
    for (int i = 0; i < res.labelValues.length; i++) {
        childPath[res.path.length + 1] = res.labelValues[i].label;
        String fullPath = FacetsConfig.pathToString(childPath, childPath.length);
        int max = reader.docFreq(new Term(dimConfig.indexFieldName, fullPath));
        int correctedCount = (int) (res.labelValues[i].value.doubleValue() / samplingRate);
        correctedCount = Math.min(max, correctedCount);
        fixedLabelValues[i] = new LabelAndValue(res.labelValues[i].label, correctedCount);
    }
    // cap the total count on the total number of non-deleted documents in the reader
    int correctedTotalCount = res.value.intValue();
    if (correctedTotalCount > 0) {
        correctedTotalCount = Math.min(reader.numDocs(), (int) (res.value.doubleValue() / samplingRate));
    }
    return new FacetResult(res.dim, res.path, correctedTotalCount, fixedLabelValues, res.childCount);
}
Also used : IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) DimConfig(org.apache.lucene.facet.FacetsConfig.DimConfig)

Example 2 with DimConfig

use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.

the class FloatTaxonomyFacets method rollup.

/** Rolls up any single-valued hierarchical dimensions. */
protected void rollup() throws IOException {
    // Rollup any necessary dims:
    for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
        String dim = ent.getKey();
        DimConfig ft = ent.getValue();
        if (ft.hierarchical && ft.multiValued == false) {
            int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
            assert dimRootOrd > 0;
            values[dimRootOrd] += rollup(children[dimRootOrd]);
        }
    }
}
Also used : Map(java.util.Map) DimConfig(org.apache.lucene.facet.FacetsConfig.DimConfig)

Example 3 with DimConfig

use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.

the class FloatTaxonomyFacets method getTopChildren.

@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
    if (topN <= 0) {
        throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
    }
    DimConfig dimConfig = verifyDim(dim);
    FacetLabel cp = new FacetLabel(dim, path);
    int dimOrd = taxoReader.getOrdinal(cp);
    if (dimOrd == -1) {
        return null;
    }
    TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
    float bottomValue = 0;
    int ord = children[dimOrd];
    float sumValues = 0;
    int childCount = 0;
    TopOrdAndFloatQueue.OrdAndValue reuse = null;
    while (ord != TaxonomyReader.INVALID_ORDINAL) {
        if (values[ord] > 0) {
            sumValues += values[ord];
            childCount++;
            if (values[ord] > bottomValue) {
                if (reuse == null) {
                    reuse = new TopOrdAndFloatQueue.OrdAndValue();
                }
                reuse.ord = ord;
                reuse.value = values[ord];
                reuse = q.insertWithOverflow(reuse);
                if (q.size() == topN) {
                    bottomValue = q.top().value;
                }
            }
        }
        ord = siblings[ord];
    }
    if (sumValues == 0) {
        return null;
    }
    if (dimConfig.multiValued) {
        if (dimConfig.requireDimCount) {
            sumValues = values[dimOrd];
        } else {
            // Our sum'd count is not correct, in general:
            sumValues = -1;
        }
    } else {
    // Our sum'd dim count is accurate, so we keep it
    }
    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
    for (int i = labelValues.length - 1; i >= 0; i--) {
        TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
        FacetLabel child = taxoReader.getPath(ordAndValue.ord);
        labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
    }
    return new FacetResult(dim, path, sumValues, labelValues, childCount);
}
Also used : TopOrdAndFloatQueue(org.apache.lucene.facet.TopOrdAndFloatQueue) FacetResult(org.apache.lucene.facet.FacetResult) LabelAndValue(org.apache.lucene.facet.LabelAndValue) DimConfig(org.apache.lucene.facet.FacetsConfig.DimConfig)

Example 4 with DimConfig

use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.

the class IntTaxonomyFacets method getTopChildren.

@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
    if (topN <= 0) {
        throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
    }
    DimConfig dimConfig = verifyDim(dim);
    FacetLabel cp = new FacetLabel(dim, path);
    int dimOrd = taxoReader.getOrdinal(cp);
    if (dimOrd == -1) {
        return null;
    }
    TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
    int bottomValue = 0;
    int ord = children[dimOrd];
    int totValue = 0;
    int childCount = 0;
    TopOrdAndIntQueue.OrdAndValue reuse = null;
    while (ord != TaxonomyReader.INVALID_ORDINAL) {
        if (values[ord] > 0) {
            totValue += values[ord];
            childCount++;
            if (values[ord] > bottomValue) {
                if (reuse == null) {
                    reuse = new TopOrdAndIntQueue.OrdAndValue();
                }
                reuse.ord = ord;
                reuse.value = values[ord];
                reuse = q.insertWithOverflow(reuse);
                if (q.size() == topN) {
                    bottomValue = q.top().value;
                }
            }
        }
        ord = siblings[ord];
    }
    if (totValue == 0) {
        return null;
    }
    if (dimConfig.multiValued) {
        if (dimConfig.requireDimCount) {
            totValue = values[dimOrd];
        } else {
            // Our sum'd value is not correct, in general:
            totValue = -1;
        }
    } else {
    // Our sum'd dim value is accurate, so we keep it
    }
    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
    for (int i = labelValues.length - 1; i >= 0; i--) {
        TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
        FacetLabel child = taxoReader.getPath(ordAndValue.ord);
        labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
    }
    return new FacetResult(dim, path, totValue, labelValues, childCount);
}
Also used : TopOrdAndIntQueue(org.apache.lucene.facet.TopOrdAndIntQueue) FacetResult(org.apache.lucene.facet.FacetResult) LabelAndValue(org.apache.lucene.facet.LabelAndValue) DimConfig(org.apache.lucene.facet.FacetsConfig.DimConfig)

Example 5 with DimConfig

use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.

the class IntTaxonomyFacets method getSpecificValue.

@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
    DimConfig dimConfig = verifyDim(dim);
    if (path.length == 0) {
        if (dimConfig.hierarchical && dimConfig.multiValued == false) {
        // ok: rolled up at search time
        } else if (dimConfig.requireDimCount && dimConfig.multiValued) {
        // ok: we indexed all ords at index time
        } else {
            throw new IllegalArgumentException("cannot return dimension-level value alone; use getTopChildren instead");
        }
    }
    int ord = taxoReader.getOrdinal(new FacetLabel(dim, path));
    if (ord < 0) {
        return -1;
    }
    return values[ord];
}
Also used : DimConfig(org.apache.lucene.facet.FacetsConfig.DimConfig)

Aggregations

DimConfig (org.apache.lucene.facet.FacetsConfig.DimConfig)8 FacetResult (org.apache.lucene.facet.FacetResult)3 Map (java.util.Map)2 LabelAndValue (org.apache.lucene.facet.LabelAndValue)2 ArrayList (java.util.ArrayList)1 FacetsConfig (org.apache.lucene.facet.FacetsConfig)1 TopOrdAndFloatQueue (org.apache.lucene.facet.TopOrdAndFloatQueue)1 TopOrdAndIntQueue (org.apache.lucene.facet.TopOrdAndIntQueue)1 IndexReader (org.apache.lucene.index.IndexReader)1 Term (org.apache.lucene.index.Term)1