use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.
the class RandomSamplingFacetsCollector method amortizeFacetCounts.
/**
* Note: if you use a counting {@link Facets} implementation, you can amortize the
* sampled counts by calling this method. Uses the {@link FacetsConfig} and
* the {@link IndexSearcher} to determine the upper bound for each facet value.
*/
public FacetResult amortizeFacetCounts(FacetResult res, FacetsConfig config, IndexSearcher searcher) throws IOException {
if (res == null || totalHits <= sampleSize) {
return res;
}
LabelAndValue[] fixedLabelValues = new LabelAndValue[res.labelValues.length];
IndexReader reader = searcher.getIndexReader();
DimConfig dimConfig = config.getDimConfig(res.dim);
// +2 to prepend dimension, append child label
String[] childPath = new String[res.path.length + 2];
childPath[0] = res.dim;
// reuse
System.arraycopy(res.path, 0, childPath, 1, res.path.length);
for (int i = 0; i < res.labelValues.length; i++) {
childPath[res.path.length + 1] = res.labelValues[i].label;
String fullPath = FacetsConfig.pathToString(childPath, childPath.length);
int max = reader.docFreq(new Term(dimConfig.indexFieldName, fullPath));
int correctedCount = (int) (res.labelValues[i].value.doubleValue() / samplingRate);
correctedCount = Math.min(max, correctedCount);
fixedLabelValues[i] = new LabelAndValue(res.labelValues[i].label, correctedCount);
}
// cap the total count on the total number of non-deleted documents in the reader
int correctedTotalCount = res.value.intValue();
if (correctedTotalCount > 0) {
correctedTotalCount = Math.min(reader.numDocs(), (int) (res.value.doubleValue() / samplingRate));
}
return new FacetResult(res.dim, res.path, correctedTotalCount, fixedLabelValues, res.childCount);
}
use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.
the class FloatTaxonomyFacets method rollup.
/** Rolls up any single-valued hierarchical dimensions. */
protected void rollup() throws IOException {
// Rollup any necessary dims:
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
String dim = ent.getKey();
DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
assert dimRootOrd > 0;
values[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.
the class FloatTaxonomyFacets method getTopChildren.
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
DimConfig dimConfig = verifyDim(dim);
FacetLabel cp = new FacetLabel(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
return null;
}
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
int ord = children[dimOrd];
float sumValues = 0;
int childCount = 0;
TopOrdAndFloatQueue.OrdAndValue reuse = null;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
sumValues += values[ord];
childCount++;
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdAndFloatQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
}
}
}
ord = siblings[ord];
}
if (sumValues == 0) {
return null;
}
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
sumValues = values[dimOrd];
} else {
// Our sum'd count is not correct, in general:
sumValues = -1;
}
} else {
// Our sum'd dim count is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for (int i = labelValues.length - 1; i >= 0; i--) {
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(dim, path, sumValues, labelValues, childCount);
}
use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.
the class IntTaxonomyFacets method getTopChildren.
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
DimConfig dimConfig = verifyDim(dim);
FacetLabel cp = new FacetLabel(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
return null;
}
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomValue = 0;
int ord = children[dimOrd];
int totValue = 0;
int childCount = 0;
TopOrdAndIntQueue.OrdAndValue reuse = null;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
totValue += values[ord];
childCount++;
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
}
}
}
ord = siblings[ord];
}
if (totValue == 0) {
return null;
}
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
totValue = values[dimOrd];
} else {
// Our sum'd value is not correct, in general:
totValue = -1;
}
} else {
// Our sum'd dim value is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for (int i = labelValues.length - 1; i >= 0; i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(dim, path, totValue, labelValues, childCount);
}
use of org.apache.lucene.facet.FacetsConfig.DimConfig in project lucene-solr by apache.
the class IntTaxonomyFacets method getSpecificValue.
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
DimConfig dimConfig = verifyDim(dim);
if (path.length == 0) {
if (dimConfig.hierarchical && dimConfig.multiValued == false) {
// ok: rolled up at search time
} else if (dimConfig.requireDimCount && dimConfig.multiValued) {
// ok: we indexed all ords at index time
} else {
throw new IllegalArgumentException("cannot return dimension-level value alone; use getTopChildren instead");
}
}
int ord = taxoReader.getOrdinal(new FacetLabel(dim, path));
if (ord < 0) {
return -1;
}
return values[ord];
}
Aggregations