use of org.apache.lucene.search.TermStatistics in project elasticsearch by elastic.
the class AggregatedDfs method writeTo.
@Override
public void writeTo(final StreamOutput out) throws IOException {
out.writeVInt(termStatistics.size());
for (ObjectObjectCursor<Term, TermStatistics> c : termStatistics()) {
Term term = c.key;
out.writeString(term.field());
out.writeBytesRef(term.bytes());
TermStatistics stats = c.value;
out.writeBytesRef(stats.term());
out.writeVLong(stats.docFreq());
out.writeVLong(DfsSearchResult.addOne(stats.totalTermFreq()));
}
DfsSearchResult.writeFieldStats(out, fieldStatistics);
out.writeVLong(maxDoc);
}
use of org.apache.lucene.search.TermStatistics in project elasticsearch by elastic.
the class DfsSearchResult method readTermStats.
public static TermStatistics[] readTermStats(StreamInput in, Term[] terms) throws IOException {
int termsStatsSize = in.readVInt();
final TermStatistics[] termStatistics;
if (termsStatsSize == 0) {
termStatistics = EMPTY_TERM_STATS;
} else {
termStatistics = new TermStatistics[termsStatsSize];
assert terms.length == termsStatsSize;
for (int i = 0; i < termStatistics.length; i++) {
BytesRef term = terms[i].bytes();
final long docFreq = in.readVLong();
assert docFreq >= 0;
final long totalTermFreq = subOne(in.readVLong());
termStatistics[i] = new TermStatistics(term, docFreq, totalTermFreq);
}
}
return termStatistics;
}
use of org.apache.lucene.search.TermStatistics in project elasticsearch by elastic.
the class AggregatedDfs method readFrom.
@Override
public void readFrom(StreamInput in) throws IOException {
int size = in.readVInt();
termStatistics = HppcMaps.newMap(size);
for (int i = 0; i < size; i++) {
Term term = new Term(in.readString(), in.readBytesRef());
TermStatistics stats = new TermStatistics(in.readBytesRef(), in.readVLong(), DfsSearchResult.subOne(in.readVLong()));
termStatistics.put(term, stats);
}
fieldStatistics = DfsSearchResult.readFieldStats(in);
maxDoc = in.readVLong();
}
use of org.apache.lucene.search.TermStatistics in project elasticsearch by elastic.
the class SearchPhaseController method aggregateDfs.
public AggregatedDfs aggregateDfs(AtomicArray<DfsSearchResult> results) {
ObjectObjectHashMap<Term, TermStatistics> termStatistics = HppcMaps.newNoNullKeysMap();
ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap();
long aggMaxDoc = 0;
for (AtomicArray.Entry<DfsSearchResult> lEntry : results.asList()) {
final Term[] terms = lEntry.value.terms();
final TermStatistics[] stats = lEntry.value.termStatistics();
assert terms.length == stats.length;
for (int i = 0; i < terms.length; i++) {
assert terms[i] != null;
TermStatistics existing = termStatistics.get(terms[i]);
if (existing != null) {
assert terms[i].bytes().equals(existing.term());
// totalTermFrequency is an optional statistic we need to check if either one or both
// are set to -1 which means not present and then set it globally to -1
termStatistics.put(terms[i], new TermStatistics(existing.term(), existing.docFreq() + stats[i].docFreq(), optionalSum(existing.totalTermFreq(), stats[i].totalTermFreq())));
} else {
termStatistics.put(terms[i], stats[i]);
}
}
assert !lEntry.value.fieldStatistics().containsKey(null);
final Object[] keys = lEntry.value.fieldStatistics().keys;
final Object[] values = lEntry.value.fieldStatistics().values;
for (int i = 0; i < keys.length; i++) {
if (keys[i] != null) {
String key = (String) keys[i];
CollectionStatistics value = (CollectionStatistics) values[i];
assert key != null;
CollectionStatistics existing = fieldStatistics.get(key);
if (existing != null) {
CollectionStatistics merged = new CollectionStatistics(key, existing.maxDoc() + value.maxDoc(), optionalSum(existing.docCount(), value.docCount()), optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()), optionalSum(existing.sumDocFreq(), value.sumDocFreq()));
fieldStatistics.put(key, merged);
} else {
fieldStatistics.put(key, value);
}
}
}
aggMaxDoc += lEntry.value.maxDoc();
}
return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
}
use of org.apache.lucene.search.TermStatistics in project lucene-solr by apache.
the class TestMemoryIndex method testSimilarities.
@Test
public void testSimilarities() throws IOException {
MemoryIndex mi = new MemoryIndex();
mi.addField("f1", "a long text field that contains many many terms", analyzer);
IndexSearcher searcher = mi.createSearcher();
LeafReader reader = (LeafReader) searcher.getIndexReader();
NumericDocValues norms = reader.getNormValues("f1");
assertEquals(0, norms.nextDoc());
float n1 = norms.longValue();
// Norms are re-computed when we change the Similarity
mi.setSimilarity(new Similarity() {
@Override
public long computeNorm(FieldInvertState state) {
return 74;
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
});
norms = reader.getNormValues("f1");
assertEquals(0, norms.nextDoc());
float n2 = norms.longValue();
assertTrue(n1 != n2);
TestUtil.checkReader(reader);
}
Aggregations