use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class TestFieldCacheSortRandom method testRandomStringSort.
private void testRandomStringSort(SortField.Type type) throws Exception {
Random random = new Random(random().nextLong());
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
final int maxLength = TestUtil.nextInt(random, 5, 100);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final Document doc = new Document();
// 10% of the time, the document is missing the value:
final BytesRef br;
if (random().nextInt(10) != 7) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random, maxLength);
} else {
s = TestUtil.randomUnicodeString(random, maxLength);
}
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
doc.add(new StringField("stringdv", s, Field.Store.NO));
docValues.add(new BytesRef(s));
} else {
br = null;
if (VERBOSE) {
System.out.println(" " + numDocs + ": <missing>");
}
docValues.add(null);
}
doc.add(new IntPoint("id", numDocs));
doc.add(new StoredField("id", numDocs));
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
Map<String, UninvertingReader.Type> mapping = new HashMap<>();
mapping.put("stringdv", Type.SORTED);
mapping.put("id", Type.INTEGER_POINT);
final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
writer.close();
if (VERBOSE) {
System.out.println(" reader=" + r);
}
final IndexSearcher s = newSearcher(r, false);
final int ITERS = atLeast(100);
for (int iter = 0; iter < ITERS; iter++) {
final boolean reverse = random.nextBoolean();
final TopFieldDocs hits;
final SortField sf;
final boolean sortMissingLast;
final boolean missingIsNull;
sf = new SortField("stringdv", type, reverse);
sortMissingLast = random().nextBoolean();
missingIsNull = true;
if (sortMissingLast) {
sf.setMissingValue(SortField.STRING_LAST);
}
final Sort sort;
if (random.nextBoolean()) {
sort = new Sort(sf);
} else {
sort = new Sort(sf, SortField.FIELD_DOC);
}
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
int queryType = random.nextInt(2);
if (queryType == 0) {
hits = s.search(new ConstantScoreQuery(f), hitCount, sort, random.nextBoolean(), random.nextBoolean());
} else {
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
}
// Compute expected results:
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
@Override
public int compare(BytesRef a, BytesRef b) {
if (a == null) {
if (b == null) {
return 0;
}
if (sortMissingLast) {
return 1;
} else {
return -1;
}
} else if (b == null) {
if (sortMissingLast) {
return -1;
} else {
return 1;
}
} else {
return a.compareTo(b);
}
}
});
if (reverse) {
Collections.reverse(f.matchValues);
}
final List<BytesRef> expected = f.matchValues;
if (VERBOSE) {
System.out.println(" expected:");
for (int idx = 0; idx < expected.size(); idx++) {
BytesRef br = expected.get(idx);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
if (idx == hitCount - 1) {
break;
}
}
}
if (VERBOSE) {
System.out.println(" actual:");
for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = (BytesRef) fd.fields[0];
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
}
}
for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = expected.get(hitIDX);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
// Normally, the old codecs (that don't support
// docsWithField via doc values) will always return
// an empty BytesRef for the missing case; however,
// if all docs in a given segment were missing, in
// that case it will return null! So we must map
// null here, too:
BytesRef br2 = (BytesRef) fd.fields[0];
if (br2 == null && missingIsNull == false) {
br2 = new BytesRef();
}
assertEquals(br, br2);
}
}
r.close();
dir.close();
}
use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class TopGroups method merge.
/** Merges an array of TopGroups, for example obtained
* from the second-pass collector across multiple
* shards. Each TopGroups must have been sorted by the
* same groupSort and docSort, and the top groups passed
* to all second-pass collectors must be the same.
*
* <b>NOTE</b>: We can't always compute an exact totalGroupCount.
* Documents belonging to a group may occur on more than
* one shard and thus the merged totalGroupCount can be
* higher than the actual totalGroupCount. In this case the
* totalGroupCount represents a upper bound. If the documents
* of one group do only reside in one shard then the
* totalGroupCount is exact.
*
* <b>NOTE</b>: the topDocs in each GroupDocs is actually
* an instance of TopDocsAndShards
*/
public static <T> TopGroups<T> merge(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN, ScoreMergeMode scoreMergeMode) {
if (shardGroups.length == 0) {
return null;
}
int totalHitCount = 0;
int totalGroupedHitCount = 0;
// Optionally merge the totalGroupCount.
Integer totalGroupCount = null;
final int numGroups = shardGroups[0].groups.length;
for (TopGroups<T> shard : shardGroups) {
if (numGroups != shard.groups.length) {
throw new IllegalArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
}
totalHitCount += shard.totalHitCount;
totalGroupedHitCount += shard.totalGroupedHitCount;
if (shard.totalGroupCount != null) {
if (totalGroupCount == null) {
totalGroupCount = 0;
}
totalGroupCount += shard.totalGroupCount;
}
}
@SuppressWarnings({ "unchecked", "rawtypes" }) final GroupDocs<T>[] mergedGroupDocs = new GroupDocs[numGroups];
final TopDocs[] shardTopDocs;
if (docSort.equals(Sort.RELEVANCE)) {
shardTopDocs = new TopDocs[shardGroups.length];
} else {
shardTopDocs = new TopFieldDocs[shardGroups.length];
}
float totalMaxScore = Float.MIN_VALUE;
for (int groupIDX = 0; groupIDX < numGroups; groupIDX++) {
final T groupValue = shardGroups[0].groups[groupIDX].groupValue;
//System.out.println(" merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
float maxScore = Float.MIN_VALUE;
int totalHits = 0;
double scoreSum = 0.0;
for (int shardIDX = 0; shardIDX < shardGroups.length; shardIDX++) {
//System.out.println(" shard=" + shardIDX);
final TopGroups<T> shard = shardGroups[shardIDX];
final GroupDocs<?> shardGroupDocs = shard.groups[groupIDX];
if (groupValue == null) {
if (shardGroupDocs.groupValue != null) {
throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
}
} else if (!groupValue.equals(shardGroupDocs.groupValue)) {
throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
}
if (docSort.equals(Sort.RELEVANCE)) {
shardTopDocs[shardIDX] = new TopDocs(shardGroupDocs.totalHits, shardGroupDocs.scoreDocs, shardGroupDocs.maxScore);
} else {
shardTopDocs[shardIDX] = new TopFieldDocs(shardGroupDocs.totalHits, shardGroupDocs.scoreDocs, docSort.getSort(), shardGroupDocs.maxScore);
}
maxScore = Math.max(maxScore, shardGroupDocs.maxScore);
totalHits += shardGroupDocs.totalHits;
scoreSum += shardGroupDocs.score;
}
final TopDocs mergedTopDocs;
if (docSort.equals(Sort.RELEVANCE)) {
mergedTopDocs = TopDocs.merge(docOffset + docTopN, shardTopDocs);
} else {
mergedTopDocs = TopDocs.merge(docSort, docOffset + docTopN, (TopFieldDocs[]) shardTopDocs);
}
// Slice;
final ScoreDoc[] mergedScoreDocs;
if (docOffset == 0) {
mergedScoreDocs = mergedTopDocs.scoreDocs;
} else if (docOffset >= mergedTopDocs.scoreDocs.length) {
mergedScoreDocs = new ScoreDoc[0];
} else {
mergedScoreDocs = new ScoreDoc[mergedTopDocs.scoreDocs.length - docOffset];
System.arraycopy(mergedTopDocs.scoreDocs, docOffset, mergedScoreDocs, 0, mergedTopDocs.scoreDocs.length - docOffset);
}
final float groupScore;
switch(scoreMergeMode) {
case None:
groupScore = Float.NaN;
break;
case Avg:
if (totalHits > 0) {
groupScore = (float) (scoreSum / totalHits);
} else {
groupScore = Float.NaN;
}
break;
case Total:
groupScore = (float) scoreSum;
break;
default:
throw new IllegalArgumentException("can't handle ScoreMergeMode " + scoreMergeMode);
}
//System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
mergedGroupDocs[groupIDX] = new GroupDocs<>(groupScore, maxScore, totalHits, mergedScoreDocs, groupValue, shardGroups[0].groups[groupIDX].groupSortValues);
totalMaxScore = Math.max(totalMaxScore, maxScore);
}
if (totalGroupCount != null) {
TopGroups<T> result = new TopGroups<>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
return new TopGroups<>(result, totalGroupCount);
} else {
return new TopGroups<>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
}
}
use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class TestDemoExpressions method testTwoOfSameBinding.
/** tests same binding used more than once in an expression */
public void testTwoOfSameBinding() throws Exception {
Expression expr = JavascriptCompiler.compile("_score + _score");
SimpleBindings bindings = new SimpleBindings();
bindings.add(new SortField("_score", SortField.Type.SCORE));
Sort sort = new Sort(expr.getSortField(bindings, true));
Query query = new TermQuery(new Term("body", "contents"));
TopFieldDocs td = searcher.search(query, 3, sort, true, true);
for (int i = 0; i < 3; i++) {
FieldDoc d = (FieldDoc) td.scoreDocs[i];
float expected = 2 * d.score;
float actual = ((Double) d.fields[0]).floatValue();
assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual));
}
}
use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class TestDemoExpressions method testDynamicExtendedVariableExample.
public void testDynamicExtendedVariableExample() throws Exception {
Expression popularity = JavascriptCompiler.compile("doc['popularity'].value + magicarray[0] + fourtytwo");
// The following is an example of how to write bindings which parse the variable name into pieces.
// Note, however, that this requires a lot of error checking. Each "error case" below should be
// filled in with proper error messages for a real use case.
Bindings bindings = new Bindings() {
@Override
public DoubleValuesSource getDoubleValuesSource(String name) {
VariableContext[] var = VariableContext.parse(name);
assert var[0].type == MEMBER;
String base = var[0].text;
if (base.equals("doc")) {
if (var.length > 1 && var[1].type == STR_INDEX) {
String field = var[1].text;
if (var.length > 2 && var[2].type == MEMBER && var[2].text.equals("value")) {
return DoubleValuesSource.fromIntField(field);
} else {
// error case, non/missing "value" member access
fail("member: " + var[2].text);
}
} else {
// error case, doc should be a str indexed array
fail();
}
} else if (base.equals("magicarray")) {
if (var.length > 1 && var[1].type == INT_INDEX) {
return DoubleValuesSource.constant(2048);
} else {
// error case, magic array isn't an array
fail();
}
} else if (base.equals("fourtytwo")) {
return DoubleValuesSource.constant(42);
} else {
// error case (variable doesn't exist)
fail();
}
throw new IllegalArgumentException("Illegal reference '" + name + "'");
}
};
Sort sort = new Sort(popularity.getSortField(bindings, false));
TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
FieldDoc d = (FieldDoc) td.scoreDocs[0];
assertEquals(2092D, (Double) d.fields[0], 1E-4);
d = (FieldDoc) td.scoreDocs[1];
assertEquals(2095D, (Double) d.fields[0], 1E-4);
d = (FieldDoc) td.scoreDocs[2];
assertEquals(2110D, (Double) d.fields[0], 1E-4);
}
use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class TestDemoExpressions method testSortValues.
/** tests the returned sort values are correct */
public void testSortValues() throws Exception {
Expression expr = JavascriptCompiler.compile("sqrt(_score)");
SimpleBindings bindings = new SimpleBindings();
bindings.add(new SortField("_score", SortField.Type.SCORE));
Sort sort = new Sort(expr.getSortField(bindings, true));
Query query = new TermQuery(new Term("body", "contents"));
TopFieldDocs td = searcher.search(query, 3, sort, true, true);
for (int i = 0; i < 3; i++) {
FieldDoc d = (FieldDoc) td.scoreDocs[i];
float expected = (float) Math.sqrt(d.score);
float actual = ((Double) d.fields[0]).floatValue();
assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual));
}
}
Aggregations