use of org.apache.lucene.search.grouping.CollapseTopFieldDocs in project elasticsearch by elastic.
the class SearchPhaseController method getHits.
private SearchHits getHits(ReducedQueryPhase reducedQueryPhase, boolean ignoreFrom, ScoreDoc[] sortedDocs, AtomicArray<? extends QuerySearchResultProvider> fetchResultsArr) {
List<? extends AtomicArray.Entry<? extends QuerySearchResultProvider>> fetchResults = fetchResultsArr.asList();
boolean sorted = false;
int sortScoreIndex = -1;
if (reducedQueryPhase.oneResult.topDocs() instanceof TopFieldDocs) {
TopFieldDocs fieldDocs = (TopFieldDocs) reducedQueryPhase.oneResult.queryResult().topDocs();
if (fieldDocs instanceof CollapseTopFieldDocs && fieldDocs.fields.length == 1 && fieldDocs.fields[0].getType() == SortField.Type.SCORE) {
sorted = false;
} else {
sorted = true;
for (int i = 0; i < fieldDocs.fields.length; i++) {
if (fieldDocs.fields[i].getType() == SortField.Type.SCORE) {
sortScoreIndex = i;
}
}
}
}
// clean the fetch counter
for (AtomicArray.Entry<? extends QuerySearchResultProvider> entry : fetchResults) {
entry.value.fetchResult().initCounter();
}
int from = ignoreFrom ? 0 : reducedQueryPhase.oneResult.queryResult().from();
int numSearchHits = (int) Math.min(reducedQueryPhase.fetchHits - from, reducedQueryPhase.oneResult.size());
// with collapsing we can have more fetch hits than sorted docs
numSearchHits = Math.min(sortedDocs.length, numSearchHits);
// merge hits
List<SearchHit> hits = new ArrayList<>();
if (!fetchResults.isEmpty()) {
for (int i = 0; i < numSearchHits; i++) {
ScoreDoc shardDoc = sortedDocs[i];
QuerySearchResultProvider fetchResultProvider = fetchResultsArr.get(shardDoc.shardIndex);
if (fetchResultProvider == null) {
continue;
}
FetchSearchResult fetchResult = fetchResultProvider.fetchResult();
int index = fetchResult.counterGetAndIncrement();
if (index < fetchResult.hits().internalHits().length) {
SearchHit searchHit = fetchResult.hits().internalHits()[index];
searchHit.score(shardDoc.score);
searchHit.shard(fetchResult.shardTarget());
if (sorted) {
FieldDoc fieldDoc = (FieldDoc) shardDoc;
searchHit.sortValues(fieldDoc.fields, reducedQueryPhase.oneResult.sortValueFormats());
if (sortScoreIndex != -1) {
searchHit.score(((Number) fieldDoc.fields[sortScoreIndex]).floatValue());
}
}
hits.add(searchHit);
}
}
}
return new SearchHits(hits.toArray(new SearchHit[hits.size()]), reducedQueryPhase.totalHits, reducedQueryPhase.maxScore);
}
use of org.apache.lucene.search.grouping.CollapseTopFieldDocs in project elasticsearch by elastic.
the class CollapsingTopDocsCollectorTests method assertSearchCollapse.
private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric, boolean multivalued) throws IOException {
final int numDocs = randomIntBetween(1000, 2000);
int maxGroup = randomIntBetween(2, 500);
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Set<T> values = new HashSet<>();
int totalHits = 0;
for (int i = 0; i < numDocs; i++) {
final T value = dvProducers.randomGroup(maxGroup);
values.add(value);
Document doc = new Document();
dvProducers.add(doc, value, multivalued);
doc.add(new NumericDocValuesField("sort1", randomIntBetween(0, 10)));
doc.add(new NumericDocValuesField("sort2", randomLong()));
w.addDocument(doc);
totalHits++;
}
List<T> valueList = new ArrayList<>(values);
Collections.sort(valueList);
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
final SortField collapseField = dvProducers.sortField(multivalued);
final SortField sort1 = new SortField("sort1", SortField.Type.INT);
final SortField sort2 = new SortField("sort2", SortField.Type.LONG);
Sort sort = new Sort(sort1, sort2, collapseField);
int expectedNumGroups = values.size();
final CollapsingTopDocsCollector collapsingCollector;
if (numeric) {
collapsingCollector = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
} else {
collapsingCollector = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
}
TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, true, false, false);
searcher.search(new MatchAllDocsQuery(), collapsingCollector);
searcher.search(new MatchAllDocsQuery(), topFieldCollector);
CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
TopFieldDocs topDocs = topFieldCollector.topDocs();
assertEquals(collapseField.getField(), collapseTopFieldDocs.field);
assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length);
assertEquals(totalHits, collapseTopFieldDocs.totalHits);
assertEquals(totalHits, topDocs.scoreDocs.length);
assertEquals(totalHits, topDocs.totalHits);
Set<Object> seen = new HashSet<>();
// collapse field is the last sort
int collapseIndex = sort.getSort().length - 1;
int topDocsIndex = 0;
for (int i = 0; i < expectedNumGroups; i++) {
FieldDoc fieldDoc = null;
for (; topDocsIndex < totalHits; topDocsIndex++) {
fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
if (seen.contains(fieldDoc.fields[collapseIndex]) == false) {
break;
}
}
FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i];
assertNotNull(fieldDoc);
assertEquals(collapseFieldDoc.doc, fieldDoc.doc);
assertArrayEquals(collapseFieldDoc.fields, fieldDoc.fields);
seen.add(fieldDoc.fields[fieldDoc.fields.length - 1]);
}
for (; topDocsIndex < totalHits; topDocsIndex++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
assertTrue(seen.contains(fieldDoc.fields[collapseIndex]));
}
// check merge
final IndexReaderContext ctx = searcher.getTopReaderContext();
final SegmentSearcher[] subSearchers;
final int[] docStarts;
if (ctx instanceof LeafReaderContext) {
subSearchers = new SegmentSearcher[1];
docStarts = new int[1];
subSearchers[0] = new SegmentSearcher((LeafReaderContext) ctx, ctx);
docStarts[0] = 0;
} else {
final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
final int size = compCTX.leaves().size();
subSearchers = new SegmentSearcher[size];
docStarts = new int[size];
int docBase = 0;
for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
subSearchers[searcherIDX] = new SegmentSearcher(leave, compCTX);
docStarts[searcherIDX] = docBase;
docBase += leave.reader().maxDoc();
}
}
final CollapseTopFieldDocs[] shardHits = new CollapseTopFieldDocs[subSearchers.length];
final Weight weight = searcher.createNormalizedWeight(new MatchAllDocsQuery(), false);
for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
final SegmentSearcher subSearcher = subSearchers[shardIDX];
final CollapsingTopDocsCollector c;
if (numeric) {
c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
} else {
c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
}
subSearcher.search(weight, c);
shardHits[shardIDX] = c.getTopDocs();
}
CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits);
assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs);
w.close();
reader.close();
dir.close();
}
use of org.apache.lucene.search.grouping.CollapseTopFieldDocs in project elasticsearch by elastic.
the class SearchPhaseController method sortDocs.
/**
* Returns a score doc array of top N search docs across all shards, followed by top suggest docs for each
* named completion suggestion across all shards. If more than one named completion suggestion is specified in the
* request, the suggest docs for a named suggestion are ordered by the suggestion name.
*
* Note: The order of the sorted score docs depends on the shard index in the result array if the merge process needs to disambiguate
* the result. In oder to obtain stable results the shard index (index of the result in the result array) must be the same.
*
* @param ignoreFrom Whether to ignore the from and sort all hits in each shard result.
* Enabled only for scroll search, because that only retrieves hits of length 'size' in the query phase.
* @param resultsArr Shard result holder
*/
public ScoreDoc[] sortDocs(boolean ignoreFrom, AtomicArray<? extends QuerySearchResultProvider> resultsArr) throws IOException {
List<? extends AtomicArray.Entry<? extends QuerySearchResultProvider>> results = resultsArr.asList();
if (results.isEmpty()) {
return EMPTY_DOCS;
}
final QuerySearchResult result;
boolean canOptimize = false;
int shardIndex = -1;
if (results.size() == 1) {
canOptimize = true;
result = results.get(0).value.queryResult();
shardIndex = results.get(0).index;
} else {
boolean hasResult = false;
QuerySearchResult resultToOptimize = null;
// lets see if we only got hits from a single shard, if so, we can optimize...
for (AtomicArray.Entry<? extends QuerySearchResultProvider> entry : results) {
if (entry.value.queryResult().hasHits()) {
if (hasResult) {
// we already have one, can't really optimize
canOptimize = false;
break;
}
canOptimize = true;
hasResult = true;
resultToOptimize = entry.value.queryResult();
shardIndex = entry.index;
}
}
result = canOptimize ? resultToOptimize : results.get(0).value.queryResult();
assert result != null;
}
if (canOptimize) {
int offset = result.from();
if (ignoreFrom) {
offset = 0;
}
ScoreDoc[] scoreDocs = result.topDocs().scoreDocs;
ScoreDoc[] docs;
int numSuggestDocs = 0;
final Suggest suggest = result.queryResult().suggest();
final List<CompletionSuggestion> completionSuggestions;
if (suggest != null) {
completionSuggestions = suggest.filter(CompletionSuggestion.class);
for (CompletionSuggestion suggestion : completionSuggestions) {
numSuggestDocs += suggestion.getOptions().size();
}
} else {
completionSuggestions = Collections.emptyList();
}
int docsOffset = 0;
if (scoreDocs.length == 0 || scoreDocs.length < offset) {
docs = new ScoreDoc[numSuggestDocs];
} else {
int resultDocsSize = result.size();
if ((scoreDocs.length - offset) < resultDocsSize) {
resultDocsSize = scoreDocs.length - offset;
}
docs = new ScoreDoc[resultDocsSize + numSuggestDocs];
for (int i = 0; i < resultDocsSize; i++) {
ScoreDoc scoreDoc = scoreDocs[offset + i];
scoreDoc.shardIndex = shardIndex;
docs[i] = scoreDoc;
docsOffset++;
}
}
for (CompletionSuggestion suggestion : completionSuggestions) {
for (CompletionSuggestion.Entry.Option option : suggestion.getOptions()) {
ScoreDoc doc = option.getDoc();
doc.shardIndex = shardIndex;
docs[docsOffset++] = doc;
}
}
return docs;
}
final int topN = result.queryResult().size();
final int from = ignoreFrom ? 0 : result.queryResult().from();
final TopDocs mergedTopDocs;
final int numShards = resultsArr.length();
if (result.queryResult().topDocs() instanceof CollapseTopFieldDocs) {
CollapseTopFieldDocs firstTopDocs = (CollapseTopFieldDocs) result.queryResult().topDocs();
final Sort sort = new Sort(firstTopDocs.fields);
final CollapseTopFieldDocs[] shardTopDocs = new CollapseTopFieldDocs[numShards];
fillTopDocs(shardTopDocs, results, new CollapseTopFieldDocs(firstTopDocs.field, 0, new FieldDoc[0], sort.getSort(), new Object[0], Float.NaN));
mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs);
} else if (result.queryResult().topDocs() instanceof TopFieldDocs) {
TopFieldDocs firstTopDocs = (TopFieldDocs) result.queryResult().topDocs();
final Sort sort = new Sort(firstTopDocs.fields);
final TopFieldDocs[] shardTopDocs = new TopFieldDocs[resultsArr.length()];
fillTopDocs(shardTopDocs, results, new TopFieldDocs(0, new FieldDoc[0], sort.getSort(), Float.NaN));
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs, true);
} else {
final TopDocs[] shardTopDocs = new TopDocs[resultsArr.length()];
fillTopDocs(shardTopDocs, results, Lucene.EMPTY_TOP_DOCS);
mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs, true);
}
ScoreDoc[] scoreDocs = mergedTopDocs.scoreDocs;
final Map<String, List<Suggestion<CompletionSuggestion.Entry>>> groupedCompletionSuggestions = new HashMap<>();
// group suggestions and assign shard index
for (AtomicArray.Entry<? extends QuerySearchResultProvider> sortedResult : results) {
Suggest shardSuggest = sortedResult.value.queryResult().suggest();
if (shardSuggest != null) {
for (CompletionSuggestion suggestion : shardSuggest.filter(CompletionSuggestion.class)) {
suggestion.setShardIndex(sortedResult.index);
List<Suggestion<CompletionSuggestion.Entry>> suggestions = groupedCompletionSuggestions.computeIfAbsent(suggestion.getName(), s -> new ArrayList<>());
suggestions.add(suggestion);
}
}
}
if (groupedCompletionSuggestions.isEmpty() == false) {
int numSuggestDocs = 0;
List<Suggestion<? extends Entry<? extends Entry.Option>>> completionSuggestions = new ArrayList<>(groupedCompletionSuggestions.size());
for (List<Suggestion<CompletionSuggestion.Entry>> groupedSuggestions : groupedCompletionSuggestions.values()) {
final CompletionSuggestion completionSuggestion = CompletionSuggestion.reduceTo(groupedSuggestions);
assert completionSuggestion != null;
numSuggestDocs += completionSuggestion.getOptions().size();
completionSuggestions.add(completionSuggestion);
}
scoreDocs = new ScoreDoc[mergedTopDocs.scoreDocs.length + numSuggestDocs];
System.arraycopy(mergedTopDocs.scoreDocs, 0, scoreDocs, 0, mergedTopDocs.scoreDocs.length);
int offset = mergedTopDocs.scoreDocs.length;
Suggest suggestions = new Suggest(completionSuggestions);
for (CompletionSuggestion completionSuggestion : suggestions.filter(CompletionSuggestion.class)) {
for (CompletionSuggestion.Entry.Option option : completionSuggestion.getOptions()) {
scoreDocs[offset++] = option.getDoc();
}
}
}
return scoreDocs;
}
use of org.apache.lucene.search.grouping.CollapseTopFieldDocs in project elasticsearch by elastic.
the class Lucene method writeTopDocs.
public static void writeTopDocs(StreamOutput out, TopDocs topDocs) throws IOException {
if (topDocs instanceof CollapseTopFieldDocs) {
out.writeByte((byte) 2);
CollapseTopFieldDocs collapseDocs = (CollapseTopFieldDocs) topDocs;
out.writeVInt(topDocs.totalHits);
out.writeFloat(topDocs.getMaxScore());
out.writeString(collapseDocs.field);
out.writeVInt(collapseDocs.fields.length);
for (SortField sortField : collapseDocs.fields) {
writeSortField(out, sortField);
}
out.writeVInt(topDocs.scoreDocs.length);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc doc = collapseDocs.scoreDocs[i];
writeFieldDoc(out, (FieldDoc) doc);
writeSortValue(out, collapseDocs.collapseValues[i]);
}
} else if (topDocs instanceof TopFieldDocs) {
out.writeByte((byte) 1);
TopFieldDocs topFieldDocs = (TopFieldDocs) topDocs;
out.writeVInt(topDocs.totalHits);
out.writeFloat(topDocs.getMaxScore());
out.writeVInt(topFieldDocs.fields.length);
for (SortField sortField : topFieldDocs.fields) {
writeSortField(out, sortField);
}
out.writeVInt(topDocs.scoreDocs.length);
for (ScoreDoc doc : topFieldDocs.scoreDocs) {
writeFieldDoc(out, (FieldDoc) doc);
}
} else {
out.writeByte((byte) 0);
out.writeVInt(topDocs.totalHits);
out.writeFloat(topDocs.getMaxScore());
out.writeVInt(topDocs.scoreDocs.length);
for (ScoreDoc doc : topDocs.scoreDocs) {
writeScoreDoc(out, doc);
}
}
}
use of org.apache.lucene.search.grouping.CollapseTopFieldDocs in project elasticsearch by elastic.
the class Lucene method readTopDocs.
public static TopDocs readTopDocs(StreamInput in) throws IOException {
byte type = in.readByte();
if (type == 0) {
int totalHits = in.readVInt();
float maxScore = in.readFloat();
ScoreDoc[] scoreDocs = new ScoreDoc[in.readVInt()];
for (int i = 0; i < scoreDocs.length; i++) {
scoreDocs[i] = new ScoreDoc(in.readVInt(), in.readFloat());
}
return new TopDocs(totalHits, scoreDocs, maxScore);
} else if (type == 1) {
int totalHits = in.readVInt();
float maxScore = in.readFloat();
SortField[] fields = new SortField[in.readVInt()];
for (int i = 0; i < fields.length; i++) {
fields[i] = readSortField(in);
}
FieldDoc[] fieldDocs = new FieldDoc[in.readVInt()];
for (int i = 0; i < fieldDocs.length; i++) {
fieldDocs[i] = readFieldDoc(in);
}
return new TopFieldDocs(totalHits, fieldDocs, fields, maxScore);
} else if (type == 2) {
int totalHits = in.readVInt();
float maxScore = in.readFloat();
String field = in.readString();
SortField[] fields = new SortField[in.readVInt()];
for (int i = 0; i < fields.length; i++) {
fields[i] = readSortField(in);
}
int size = in.readVInt();
Object[] collapseValues = new Object[size];
FieldDoc[] fieldDocs = new FieldDoc[size];
for (int i = 0; i < fieldDocs.length; i++) {
fieldDocs[i] = readFieldDoc(in);
collapseValues[i] = readSortValue(in);
}
return new CollapseTopFieldDocs(field, totalHits, fieldDocs, fields, collapseValues, maxScore);
} else {
throw new IllegalStateException("Unknown type " + type);
}
}
Aggregations