use of org.apache.lucene.search.Collector in project elasticsearch by elastic.
the class QueryPhaseTests method countTestCase.
private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
TestSearchContext context = new TestSearchContext(null);
context.parsedQuery(new ParsedQuery(query));
context.setSize(0);
context.setTask(new SearchTask(123L, "", "", "", null));
IndexSearcher searcher = new IndexSearcher(reader);
final AtomicBoolean collected = new AtomicBoolean();
IndexSearcher contextSearcher = new IndexSearcher(reader) {
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
collected.set(true);
super.search(leaves, weight, collector);
}
};
final boolean rescore = QueryPhase.execute(context, contextSearcher);
assertFalse(rescore);
assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits);
assertEquals(shouldCollect, collected.get());
}
use of org.apache.lucene.search.Collector in project lucene-solr by apache.
the class CommandHandler method computeDocSet.
private DocSet computeDocSet(Query query, ProcessedFilter filter, List<Collector> collectors) throws IOException {
int maxDoc = searcher.maxDoc();
final DocSetCollector docSetCollector = new DocSetCollector(maxDoc);
List<Collector> allCollectors = new ArrayList<>(collectors);
allCollectors.add(docSetCollector);
searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors));
return DocSetUtil.getDocSet(docSetCollector, searcher);
}
use of org.apache.lucene.search.Collector in project lucene-solr by apache.
the class AnalyzingInfixSuggester method lookup.
/**
* This is an advanced method providing the capability to send down to the suggester any
* arbitrary lucene query to be used to filter the result of the suggester
*
* @param key the keyword being looked for
* @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
* @param num number of items to return
* @param allTermsRequired all searched terms must match or not
* @param doHighlight if true, the matching term will be highlighted in the search result
* @return the result of the suggester
* @throws IOException f the is IO exception while reading data from the index
*/
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
if (searcherMgr == null) {
throw new IllegalStateException("suggester was not built");
}
final BooleanClause.Occur occur;
if (allTermsRequired) {
occur = BooleanClause.Occur.MUST;
} else {
occur = BooleanClause.Occur.SHOULD;
}
BooleanQuery.Builder query;
Set<String> matchedTokens;
String prefixToken = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
//long t0 = System.currentTimeMillis();
ts.reset();
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
String lastToken = null;
query = new BooleanQuery.Builder();
int maxEndOffset = -1;
matchedTokens = new HashSet<>();
while (ts.incrementToken()) {
if (lastToken != null) {
matchedTokens.add(lastToken);
query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
}
lastToken = termAtt.toString();
if (lastToken != null) {
maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
}
}
ts.end();
if (lastToken != null) {
Query lastQuery;
if (maxEndOffset == offsetAtt.endOffset()) {
// Use PrefixQuery (or the ngram equivalent) when
// there was no trailing discarded chars in the
// string (e.g. whitespace), so that if query does
// not end with a space we show prefix matches for
// that token:
lastQuery = getLastTokenQuery(lastToken);
prefixToken = lastToken;
} else {
// Use TermQuery for an exact match if there were
// trailing discarded chars (e.g. whitespace), so
// that if query ends with a space we only show
// exact matches for that term:
matchedTokens.add(lastToken);
lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
}
if (lastQuery != null) {
query.add(lastQuery, occur);
}
}
if (contextQuery != null) {
boolean allMustNot = true;
for (BooleanClause clause : contextQuery.clauses()) {
if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
allMustNot = false;
break;
}
}
if (allMustNot) {
// All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
for (BooleanClause clause : contextQuery.clauses()) {
query.add(clause);
}
} else if (allTermsRequired == false) {
// We must carefully upgrade the query clauses to MUST:
BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
newQuery.add(query.build(), BooleanClause.Occur.MUST);
newQuery.add(contextQuery, BooleanClause.Occur.MUST);
query = newQuery;
} else {
// Add contextQuery as sub-query
query.add(contextQuery, BooleanClause.Occur.MUST);
}
}
}
// TODO: we could allow blended sort here, combining
// weight w/ score. Now we ignore score and sort only
// by weight:
Query finalQuery = finishQuery(query, allTermsRequired);
//System.out.println("finalQuery=" + finalQuery);
// Sort by weight, descending:
TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
List<LookupResult> results = null;
SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
// acquire & release on same SearcherManager, via local reference
mgr = searcherMgr;
searcher = mgr.acquire();
}
try {
//System.out.println("got searcher=" + searcher);
searcher.search(finalQuery, c2);
TopFieldDocs hits = c.topDocs();
// Slower way if postings are not pre-sorted by weight:
// hits = searcher.search(query, null, num, SORT);
results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
} finally {
mgr.release(searcher);
}
return results;
}
use of org.apache.lucene.search.Collector in project lucene-solr by apache.
the class TestGrouping method testRandom.
public void testRandom() throws Exception {
int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
for (int iter = 0; iter < numberOfRuns; iter++) {
if (VERBOSE) {
System.out.println("TEST: iter=" + iter);
}
final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
//final int numDocs = _TestUtil.nextInt(random, 5, 20);
final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
}
final List<BytesRef> groups = new ArrayList<>();
for (int i = 0; i < numGroups; i++) {
String randomValue;
do {
// B/c of DV based impl we can't see the difference between an empty string and a null value.
// For that reason we don't generate empty string
// groups.
randomValue = TestUtil.randomRealisticUnicodeString(random());
//randomValue = TestUtil.randomSimpleString(random());
} while ("".equals(randomValue));
groups.add(new BytesRef(randomValue));
}
final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
if (VERBOSE) {
System.out.println("TEST: create fake content");
}
for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
final StringBuilder sb = new StringBuilder();
sb.append("real").append(random().nextInt(3)).append(' ');
final int fakeCount = random().nextInt(10);
for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
sb.append("fake ");
}
contentStrings[contentIDX] = sb.toString();
if (VERBOSE) {
System.out.println(" content=" + sb.toString());
}
}
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
Document docNoGroup = new Document();
Field idvGroupField = new SortedDocValuesField("group", new BytesRef());
doc.add(idvGroupField);
docNoGroup.add(idvGroupField);
Field group = newStringField("group", "", Field.Store.NO);
doc.add(group);
Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
doc.add(sort1);
docNoGroup.add(sort1);
Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
doc.add(sort2);
docNoGroup.add(sort2);
Field content = newTextField("content", "", Field.Store.NO);
doc.add(content);
docNoGroup.add(content);
NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
doc.add(idDV);
docNoGroup.add(idDV);
final GroupDoc[] groupDocs = new GroupDoc[numDocs];
for (int i = 0; i < numDocs; i++) {
final BytesRef groupValue;
if (random().nextInt(24) == 17) {
// So we test the "doc doesn't have the group'd
// field" case:
groupValue = null;
} else {
groupValue = groups.get(random().nextInt(groups.size()));
}
final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), contentStrings[random().nextInt(contentStrings.length)]);
if (VERBOSE) {
System.out.println(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
}
groupDocs[i] = groupDoc;
if (groupDoc.group != null) {
group.setStringValue(groupDoc.group.utf8ToString());
idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
} else {
// TODO: not true
// Must explicitly set empty string, else eg if
// the segment has all docs missing the field then
// we get null back instead of empty BytesRef:
idvGroupField.setBytesValue(new BytesRef());
}
sort1.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort1));
sort2.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort2));
content.setStringValue(groupDoc.content);
idDV.setLongValue(groupDoc.id);
if (groupDoc.group == null) {
w.addDocument(docNoGroup);
} else {
w.addDocument(doc);
}
}
final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];
System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);
final DirectoryReader r = w.getReader();
w.close();
NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
int[] docIDToID = new int[r.maxDoc()];
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(i, values.nextDoc());
docIDToID[i] = (int) values.longValue();
}
DirectoryReader rBlocks = null;
Directory dirBlocks = null;
final IndexSearcher s = newSearcher(r);
if (VERBOSE) {
System.out.println("\nTEST: searcher=" + s);
}
final ShardState shards = new ShardState(s);
Set<Integer> seenIDs = new HashSet<>();
for (int contentID = 0; contentID < 3; contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
for (ScoreDoc hit : hits) {
int idValue = docIDToID[hit.doc];
final GroupDoc gd = groupDocs[idValue];
seenIDs.add(idValue);
assertTrue(gd.score == 0.0);
gd.score = hit.score;
assertEquals(gd.id, idValue);
}
}
// make sure all groups were seen across the hits
assertEquals(groupDocs.length, seenIDs.size());
for (GroupDoc gd : groupDocs) {
assertTrue(Float.isFinite(gd.score));
assertTrue(gd.score >= 0.0);
}
// Build 2nd index, where docs are added in blocks by
// group, so we can use single pass collector
dirBlocks = newDirectory();
rBlocks = getDocBlockReader(dirBlocks, groupDocs);
final Query lastDocInBlock = new TermQuery(new Term("groupend", "x"));
final IndexSearcher sBlocks = newSearcher(rBlocks);
final ShardState shardsBlocks = new ShardState(sBlocks);
// ReaderBlocks only increases maxDoc() vs reader, which
// means a monotonic shift in scores, so we can
// reliably remap them w/ Map:
final Map<String, Map<Float, Float>> scoreMap = new HashMap<>();
values = MultiDocValues.getNumericValues(rBlocks, "id");
assertNotNull(values);
int[] docIDToIDBlocks = new int[rBlocks.maxDoc()];
for (int i = 0; i < rBlocks.maxDoc(); i++) {
assertEquals(i, values.nextDoc());
docIDToIDBlocks[i] = (int) values.longValue();
}
//System.out.println("fixup score2");
for (int contentID = 0; contentID < 3; contentID++) {
//System.out.println(" term=real" + contentID);
final Map<Float, Float> termScoreMap = new HashMap<>();
scoreMap.put("real" + contentID, termScoreMap);
//System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
//" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
for (ScoreDoc hit : hits) {
final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];
assertTrue(gd.score2 == 0.0);
gd.score2 = hit.score;
assertEquals(gd.id, docIDToIDBlocks[hit.doc]);
//System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);
termScoreMap.put(gd.score, gd.score2);
}
}
for (int searchIter = 0; searchIter < 100; searchIter++) {
if (VERBOSE) {
System.out.println("\nTEST: searchIter=" + searchIter);
}
final String searchTerm = "real" + random().nextInt(3);
final boolean fillFields = random().nextBoolean();
boolean getScores = random().nextBoolean();
final boolean getMaxScores = random().nextBoolean();
final Sort groupSort = getRandomSort();
//final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
final Sort docSort = getRandomSort();
getScores |= (groupSort.needsScores() || docSort.needsScores());
final int topNGroups = TestUtil.nextInt(random(), 1, 30);
//final int topNGroups = 10;
final int docsPerGroup = TestUtil.nextInt(random(), 1, 50);
final int groupOffset = TestUtil.nextInt(random(), 0, (topNGroups - 1) / 2);
//final int groupOffset = 0;
final int docOffset = TestUtil.nextInt(random(), 0, docsPerGroup - 1);
//final int docOffset = 0;
final boolean doCache = random().nextBoolean();
final boolean doAllGroups = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) + " dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
}
String groupField = "group";
if (VERBOSE) {
System.out.println(" groupField=" + groupField);
}
final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups);
final CachingCollector cCache;
final Collector c;
final AllGroupsCollector<?> allGroupsCollector;
if (doAllGroups) {
allGroupsCollector = createAllGroupsCollector(c1, groupField);
} else {
allGroupsCollector = null;
}
final boolean useWrappingCollector = random().nextBoolean();
if (doCache) {
final double maxCacheMB = random().nextDouble();
if (VERBOSE) {
System.out.println("TEST: maxCacheMB=" + maxCacheMB);
}
if (useWrappingCollector) {
if (doAllGroups) {
cCache = CachingCollector.create(c1, true, maxCacheMB);
c = MultiCollector.wrap(cCache, allGroupsCollector);
} else {
c = cCache = CachingCollector.create(c1, true, maxCacheMB);
}
} else {
// Collect only into cache, then replay multiple times:
c = cCache = CachingCollector.create(true, maxCacheMB);
}
} else {
cCache = null;
if (doAllGroups) {
c = MultiCollector.wrap(c1, allGroupsCollector);
} else {
c = c1;
}
}
// Search top reader:
final Query query = new TermQuery(new Term("content", searchTerm));
s.search(query, c);
if (doCache && !useWrappingCollector) {
if (cCache.isCached()) {
// Replay for first-pass grouping
cCache.replay(c1);
if (doAllGroups) {
// Replay for all groups:
cCache.replay(allGroupsCollector);
}
} else {
// Replay by re-running search:
s.search(query, c1);
if (doAllGroups) {
s.search(query, allGroupsCollector);
}
}
}
// Get 1st pass top groups
final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(c1, groupOffset, fillFields);
final TopGroups<BytesRef> groupsResult;
if (VERBOSE) {
System.out.println("TEST: first pass topGroups");
if (topGroups == null) {
System.out.println(" null");
} else {
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
}
// Get 1st pass top groups using shards
final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, true, true);
final TopGroupsCollector<?> c2;
if (topGroups != null) {
if (VERBOSE) {
System.out.println("TEST: topGroups");
for (SearchGroup<BytesRef> searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
c2 = createSecondPassCollector(c1, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
if (VERBOSE) {
System.out.println("TEST: cache is intact");
}
cCache.replay(c2);
} else {
if (VERBOSE) {
System.out.println("TEST: cache was too large");
}
s.search(query, c2);
}
} else {
s.search(query, c2);
}
if (doAllGroups) {
TopGroups<BytesRef> tempTopGroups = getTopGroups(c2, docOffset);
groupsResult = new TopGroups<>(tempTopGroups, allGroupsCollector.getGroupCount());
} else {
groupsResult = getTopGroups(c2, docOffset);
}
} else {
c2 = null;
groupsResult = null;
if (VERBOSE) {
System.out.println("TEST: no results");
}
}
final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
if (VERBOSE) {
if (expectedGroups == null) {
System.out.println("TEST: no expected groups");
} else {
System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
for (GroupDocs<BytesRef> gd : expectedGroups.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
for (ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc + " score=" + sd.score);
}
}
}
if (groupsResult == null) {
System.out.println("TEST: no matched groups");
} else {
System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
for (GroupDocs<BytesRef> gd : groupsResult.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for (ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToID[sd.doc] + " score=" + sd.score);
}
}
if (searchIter == 14) {
for (int docIDX = 0; docIDX < s.getIndexReader().maxDoc(); docIDX++) {
System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));
}
}
}
if (topGroupsShards == null) {
System.out.println("TEST: no matched-merged groups");
} else {
System.out.println("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.totalGroupedHitCount);
for (GroupDocs<BytesRef> gd : topGroupsShards.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for (ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToID[sd.doc] + " score=" + sd.score);
}
}
}
}
assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, true);
// Confirm merged shards match:
assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, true);
if (topGroupsShards != null) {
verifyShards(shards.docStarts, topGroupsShards);
}
final boolean needsScores = getScores || getMaxScores || docSort == null;
final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset + topNGroups, needsScores, sBlocks.createNormalizedWeight(lastDocInBlock, false));
final AllGroupsCollector<BytesRef> allGroupsCollector2;
final Collector c4;
if (doAllGroups) {
// NOTE: must be "group" and not "group_dv"
// (groupField) because we didn't index doc
// values in the block index:
allGroupsCollector2 = new AllGroupsCollector<>(new TermGroupSelector("group"));
c4 = MultiCollector.wrap(c3, allGroupsCollector2);
} else {
allGroupsCollector2 = null;
c4 = c3;
}
// Get block grouping result:
sBlocks.search(query, c4);
@SuppressWarnings({ "unchecked", "rawtypes" }) final TopGroups<BytesRef> tempTopGroupsBlocks = (TopGroups<BytesRef>) c3.getTopGroups(docSort, groupOffset, docOffset, docOffset + docsPerGroup, fillFields);
final TopGroups<BytesRef> groupsResultBlocks;
if (doAllGroups && tempTopGroupsBlocks != null) {
assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
groupsResultBlocks = new TopGroups<>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
} else {
groupsResultBlocks = tempTopGroupsBlocks;
}
if (VERBOSE) {
if (groupsResultBlocks == null) {
System.out.println("TEST: no block groups");
} else {
System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
boolean first = true;
for (GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
for (ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);
if (first) {
System.out.println("explain: " + sBlocks.explain(query, sd.doc));
first = false;
}
}
}
}
}
// Get shard'd block grouping result:
final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false);
if (expectedGroups != null) {
// Fixup scores for reader2
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
for (ScoreDoc hit : groupDocsHits.scoreDocs) {
final GroupDoc gd = groupDocsByID[hit.doc];
assertEquals(gd.id, hit.doc);
//System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
hit.score = gd.score2;
}
}
final SortField[] sortFields = groupSort.getSort();
final Map<Float, Float> termScoreMap = scoreMap.get(searchTerm);
for (int groupSortIDX = 0; groupSortIDX < sortFields.length; groupSortIDX++) {
if (sortFields[groupSortIDX].getType() == SortField.Type.SCORE) {
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
if (groupDocsHits.groupSortValues != null) {
//System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);
}
}
}
}
final SortField[] docSortFields = docSort.getSort();
for (int docSortIDX = 0; docSortIDX < docSortFields.length; docSortIDX++) {
if (docSortFields[docSortIDX].getType() == SortField.Type.SCORE) {
for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
for (ScoreDoc _hit : groupDocsHits.scoreDocs) {
FieldDoc hit = (FieldDoc) _hit;
if (hit.fields != null) {
hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
assertNotNull(hit.fields[docSortIDX]);
}
}
}
}
}
}
assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false);
assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
}
r.close();
dir.close();
rBlocks.close();
dirBlocks.close();
}
}
use of org.apache.lucene.search.Collector in project lucene-solr by apache.
the class ExpandComponent method process.
@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
if (!rb.doExpand) {
return;
}
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
String field = params.get(ExpandParams.EXPAND_FIELD);
String hint = null;
if (field == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
field = cp.getField();
hint = cp.hint;
}
}
}
}
if (field == null) {
throw new IOException("Expand field is null.");
}
String sortParam = params.get(ExpandParams.EXPAND_SORT);
String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
String qs = params.get(ExpandParams.EXPAND_Q);
int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
Sort sort = null;
if (sortParam != null) {
sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
}
Query query;
if (qs == null) {
query = rb.getQuery();
} else {
try {
QParser parser = QParser.getParser(qs, req);
query = parser.getQuery();
} catch (Exception e) {
throw new IOException(e);
}
}
List<Query> newFilters = new ArrayList<>();
if (fqs == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
newFilters.add(q);
}
}
}
} else {
try {
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
QParser fqp = QParser.getParser(fq, req);
newFilters.add(fqp.getQuery());
}
}
} catch (Exception e) {
throw new IOException(e);
}
}
SolrIndexSearcher searcher = req.getSearcher();
LeafReader reader = searcher.getSlowAtomicReader();
SchemaField schemaField = searcher.getSchema().getField(field);
FieldType fieldType = schemaField.getType();
SortedDocValues values = null;
long nullValue = 0L;
if (fieldType instanceof StrField) {
//Get The Top Level SortedDocValues
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
} else {
//Get the nullValue for the numeric collapse field
String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
final NumberType numType = fieldType.getNumberType();
// we don't need to handle invalid 64-bit field types here.
if (defaultValue != null) {
if (numType == NumberType.INTEGER) {
nullValue = Long.parseLong(defaultValue);
} else if (numType == NumberType.FLOAT) {
nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
}
} else if (NumberType.FLOAT.equals(numType)) {
// Integer case already handled by nullValue defaulting to 0
nullValue = Float.floatToIntBits(0.0f);
}
}
FixedBitSet groupBits = null;
LongHashSet groupSet = null;
DocList docList = rb.getResults().docList;
IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
//Gather the groups for the current page of documents
DocIterator idit = docList.iterator();
int[] globalDocs = new int[docList.size()];
int docsIndex = -1;
while (idit.hasNext()) {
globalDocs[++docsIndex] = idit.nextDoc();
}
Arrays.sort(globalDocs);
Query groupQuery = null;
/*
* This code gathers the group information for the current page.
*/
List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
if (contexts.size() == 0) {
//When no context is available we can skip the expanding
return;
}
int currentContext = 0;
int currentDocBase = contexts.get(currentContext).docBase;
int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
IntObjectHashMap<BytesRef> ordBytes = null;
if (values != null) {
groupBits = new FixedBitSet(values.getValueCount());
MultiDocValues.OrdinalMap ordinalMap = null;
SortedDocValues[] sortedDocValues = null;
LongValues segmentOrdinalMap = null;
SortedDocValues currentValues = null;
if (values instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
int count = 0;
ordBytes = new IntObjectHashMap<>();
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
if (ordinalMap != null) {
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
}
int contextDoc = globalDoc - currentDocBase;
if (ordinalMap != null) {
if (contextDoc > currentValues.docID()) {
currentValues.advance(contextDoc);
}
if (contextDoc == currentValues.docID()) {
int ord = currentValues.ordValue();
++count;
BytesRef ref = currentValues.lookupOrd(ord);
ord = (int) segmentOrdinalMap.get(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
} else {
if (globalDoc > values.docID()) {
values.advance(globalDoc);
}
if (globalDoc == values.docID()) {
int ord = values.ordValue();
++count;
BytesRef ref = values.lookupOrd(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
}
}
if (count > 0 && count < 200) {
try {
groupQuery = getGroupQuery(field, count, ordBytes);
} catch (Exception e) {
throw new IOException(e);
}
}
} else {
groupSet = new LongHashSet(docList.size());
NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
int count = 0;
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
}
int contextDoc = globalDoc - currentDocBase;
int valueDocID = collapseValues.docID();
if (valueDocID < contextDoc) {
valueDocID = collapseValues.advance(contextDoc);
}
long value;
if (valueDocID == contextDoc) {
value = collapseValues.longValue();
} else {
value = 0;
}
if (value != nullValue) {
++count;
groupSet.add(value);
collapsedSet.add(globalDoc);
}
}
if (count > 0 && count < 200) {
if (fieldType.isPointField()) {
groupQuery = getPointGroupQuery(schemaField, count, groupSet);
} else {
groupQuery = getGroupQuery(field, fieldType, count, groupSet);
}
}
}
Collector collector;
if (sort != null)
sort = sort.rewrite(searcher);
Collector groupExpandCollector = null;
if (values != null) {
//Get The Top Level SortedDocValues again so we can re-iterate:
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
} else {
groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
}
if (groupQuery != null) {
//Limits the results to documents that are in the same group as the documents in the page.
newFilters.add(groupQuery);
}
SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
if (pfilter.postFilter != null) {
pfilter.postFilter.setLastDelegate(groupExpandCollector);
collector = pfilter.postFilter;
} else {
collector = groupExpandCollector;
}
if (pfilter.filter == null) {
searcher.search(query, collector);
} else {
Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
searcher.search(q, collector);
}
LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
NamedList outMap = new SimpleOrderedMap();
CharsRefBuilder charsRef = new CharsRefBuilder();
for (LongObjectCursor<Collector> cursor : groups) {
long groupValue = cursor.key;
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
TopDocs topDocs = topDocsCollector.topDocs();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs.length > 0) {
int[] docs = new int[scoreDocs.length];
float[] scores = new float[scoreDocs.length];
for (int i = 0; i < docs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
docs[i] = scoreDoc.doc;
scores[i] = scoreDoc.score;
}
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
if (fieldType instanceof StrField) {
final BytesRef bytesRef = ordBytes.get((int) groupValue);
fieldType.indexedToReadable(bytesRef, charsRef);
String group = charsRef.toString();
outMap.add(group, slice);
} else {
outMap.add(numericToString(fieldType, groupValue), slice);
}
}
}
rb.rsp.add("expanded", outMap);
}
Aggregations