use of org.apache.lucene.search.CollectionStatistics in project elasticsearch by elastic.
the class SearchPhaseController method aggregateDfs.
public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
ObjectObjectHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
long aggMaxDoc = 0;
for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
final Term[] terms = lEntry.value.terms();
final TermStatistics[] stats = lEntry.value.termStatistics();
assert terms.length == stats.length;
for (int i = 0; i < terms.length; i++) {
assert terms[i] != null;
TermStatistics existing = termStatistics.get(terms[i]);
if (existing != null) {
assert terms[i].bytes().equals(existing.term());
// totalTermFrequency is an optional statistic we need to check if either one or both
// are set to -1 which means not present and then set it globally to -1
termStatistics.put(terms[i], new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(), optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
} else {
termStatistics.put(terms[i], stats[i]);
}
}
assert !lEntry.value.fieldStatistics().containsKey(null);
final Object[] keys = lEntry.value.fieldStatistics().keys;
final Object[] values = lEntry.value.fieldStatistics().values;
for (int i = 0; i < keys.length; i++) {
if (keys[i] != null) {
String key = (String) keys[i];
CollectionStatistics value = (CollectionStatistics) values[i];
assert key != null;
CollectionStatistics existing = fieldStatistics.get(key);
if (existing != null) {
CollectionStatistics merged = new CollectionStatistics(key, existing.maxDoc() + value.maxDoc(), optionalSum(existing.docCount(), value.docCount()), optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()), optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
fieldStatistics.put(key, merged);
} else {
fieldStatistics.put(key, value);
}
}
}
aggMaxDoc += lEntry.value.maxDoc();
}
return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}
use of org.apache.lucene.search.CollectionStatistics in project elasticsearch by elastic.
the class DfsSearchResult method readFieldStats.
public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
final int numFieldStatistics = in.readVInt();
if (fieldStatistics == null) {
fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
}
for (int i = 0; i < numFieldStatistics; i++) {
final String field = in.readString();
assert field != null;
final long maxDoc = in.readVLong();
final long docCount = subOne(in.readVLong());
final long sumTotalTermFreq = subOne(in.readVLong());
final long sumDocFreq = subOne(in.readVLong());
CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
fieldStatistics.put(field, stats);
}
return fieldStatistics;
}
use of org.apache.lucene.search.CollectionStatistics in project elasticsearch by elastic.
the class DfsSearchResult method writeFieldStats.
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
out.writeVInt(fieldStatistics.size());
for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) {
out.writeString(c.key);
CollectionStatistics statistics = c.value;
assert statistics.maxDoc() >= 0;
out.writeVLong(statistics.maxDoc());
out.writeVLong(addOne(statistics.docCount()));
out.writeVLong(addOne(statistics.sumTotalTermFreq()));
out.writeVLong(addOne(statistics.sumDocFreq()));
}
}
use of org.apache.lucene.search.CollectionStatistics in project lucene-solr by apache.
the class SpanWeight method buildSimWeight.
private Similarity.SimWeight buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermContext> termContexts, float boost) throws IOException {
if (termContexts == null || termContexts.size() == 0 || query.getField() == null)
return null;
TermStatistics[] termStats = new TermStatistics[termContexts.size()];
int i = 0;
for (Term term : termContexts.keySet()) {
termStats[i] = searcher.termStatistics(term, termContexts.get(term));
i++;
}
CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
return similarity.computeWeight(boost, collectionStats, termStats);
}
use of org.apache.lucene.search.CollectionStatistics in project lucene-solr by apache.
the class TestMemoryIndex method testSimilarities.
@Test
public void testSimilarities() throws IOException {
MemoryIndex mi = new MemoryIndex();
mi.addField("f1", "a long text field that contains many many terms", analyzer);
IndexSearcher searcher = mi.createSearcher();
LeafReader reader = (LeafReader) searcher.getIndexReader();
NumericDocValues norms = reader.getNormValues("f1");
assertEquals(0, norms.nextDoc());
float n1 = norms.longValue();
// Norms are re-computed when we change the Similarity
mi.setSimilarity(new Similarity() {
@Override
public long computeNorm(FieldInvertState state) {
return 74;
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
});
norms = reader.getNormValues("f1");
assertEquals(0, norms.nextDoc());
float n2 = norms.longValue();
assertTrue(n1 != n2);
TestUtil.checkReader(reader);
}
Aggregations