use of org.apache.solr.handler.component.FieldFacetStats in project lucene-solr by apache.
the class DocValuesStats method getCounts.
public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {
final SchemaField schemaField = statsField.getSchemaField();
assert null != statsField.getSchemaField() : "DocValuesStats requires a StatsField using a SchemaField";
final String fieldName = schemaField.getName();
final FieldType ft = schemaField.getType();
final StatsValues res = StatsValuesFactory.createStatsValues(statsField);
//Initialize facetstats, if facets have been passed in
final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
int upto = 0;
for (String facetField : facet) {
SchemaField fsf = searcher.getSchema().getField(facetField);
if (fsf.multiValued()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField);
}
SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
}
// TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
// for term lookups only
SortedSetDocValues si;
// for mapping per-segment ords to global ones
OrdinalMap ordinalMap = null;
if (multiValued) {
si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
if (si instanceof MultiSortedSetDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
}
} else {
SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
si = single == null ? null : DocValues.singleton(single);
if (single instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
}
}
if (si == null) {
si = DocValues.emptySortedSet();
}
if (si.getValueCount() >= Integer.MAX_VALUE) {
throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
}
int missingDocCountTotal = 0;
final int nTerms = (int) si.getValueCount();
// count collection array only needs to be as big as the number of terms we are
// going to collect counts for.
final int[] counts = new int[nTerms];
Filter filter = docs.getTopFilter();
List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
LeafReaderContext leaf = leaves.get(subIndex);
// solr docsets already exclude any deleted docs
DocIdSet dis = filter.getDocIdSet(leaf, null);
DocIdSetIterator disi = null;
if (dis != null) {
disi = dis.iterator();
}
if (disi != null) {
int docBase = leaf.docBase;
if (multiValued) {
SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
if (sub == null) {
sub = DocValues.emptySortedSet();
}
SortedDocValues singleton = DocValues.unwrapSingleton(sub);
if (singleton != null) {
// some codecs may optimize SORTED_SET storage for single-valued fields
missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap);
} else {
missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
}
} else {
SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
if (sub == null) {
sub = DocValues.emptySorted();
}
missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
}
}
}
// add results in index order
for (int ord = 0; ord < counts.length; ord++) {
int count = counts[ord];
if (count > 0) {
final BytesRef value = si.lookupOrd(ord);
res.accumulate(value, count);
for (FieldFacetStats f : facetStats) {
f.accumulateTermNum(ord, value);
}
}
}
res.addMissing(missingDocCountTotal);
if (facetStats.length > 0) {
for (FieldFacetStats f : facetStats) {
Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
f.accumulateMissing();
res.addFacet(f.name, facetStatsValues);
}
}
return res;
}
use of org.apache.solr.handler.component.FieldFacetStats in project lucene-solr by apache.
the class DocValuesStats method accumSingle.
/** accumulates per-segment single-valued stats */
static int accumSingle(int[] counts, int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
int missingDocCount = 0;
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (doc > si.docID()) {
si.advance(doc);
}
if (doc == si.docID()) {
int term = si.ordValue();
if (map != null) {
term = (int) ordMap.get(term);
}
counts[term]++;
for (FieldFacetStats f : facetStats) {
f.facetTermNum(docBase + doc, term);
}
} else {
for (FieldFacetStats f : facetStats) {
f.facetMissingNum(docBase + doc);
}
missingDocCount++;
}
}
return missingDocCount;
}
use of org.apache.solr.handler.component.FieldFacetStats in project lucene-solr by apache.
the class DocValuesStats method accumMulti.
/** accumulates per-segment multi-valued stats */
static int accumMulti(int[] counts, int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
int missingDocCount = 0;
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (doc > si.docID()) {
si.advance(doc);
}
if (doc == si.docID()) {
long ord;
while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
int term = (int) ord;
if (map != null) {
term = (int) ordMap.get(term);
}
counts[term]++;
for (FieldFacetStats f : facetStats) {
f.facetTermNum(docBase + doc, term);
}
}
} else {
for (FieldFacetStats f : facetStats) {
f.facetMissingNum(docBase + doc);
}
missingDocCount++;
}
}
return missingDocCount;
}
Aggregations