use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class CursorPagingTest method testRandomSortsOnLargeIndex.
/** randomized testing of a non-trivial number of docs using assertFullWalkNoDups
*/
public void testRandomSortsOnLargeIndex() throws Exception {
final Collection<String> allFieldNames = getAllSortFieldNames();
final int initialDocs = TestUtil.nextInt(random(), 100, 200);
final int totalDocs = atLeast(500);
for (int i = 1; i <= initialDocs; i++) {
SolrInputDocument doc = buildRandomDocument(i);
assertU(adoc(doc));
}
assertU(commit());
for (String f : allFieldNames) {
for (String order : new String[] { " asc", " desc" }) {
String sort = f + order + ("id".equals(f) ? "" : ", id" + order);
String rows = "" + TestUtil.nextInt(random(), 13, 50);
SentinelIntSet ids = assertFullWalkNoDups(totalDocs, params("q", "*:*", "fl", "id", "rows", rows, "sort", sort));
assertEquals(initialDocs, ids.size());
}
}
// now add a lot more docs, and test a handful of randomized sorts
for (int i = initialDocs + 1; i <= totalDocs; i++) {
SolrInputDocument doc = buildRandomDocument(i);
assertU(adoc(doc));
}
assertU(commit());
final int numRandomSorts = atLeast(3);
for (int i = 0; i < numRandomSorts; i++) {
final String sort = buildRandomSort(allFieldNames);
final String rows = "" + TestUtil.nextInt(random(), 63, 113);
final String fl = random().nextBoolean() ? "id" : "id,score";
final boolean matchAll = random().nextBoolean();
final String q = matchAll ? "*:*" : buildRandomQuery();
SentinelIntSet ids = assertFullWalkNoDups(totalDocs, params("q", q, "fl", fl, "rows", rows, "sort", sort));
if (matchAll) {
assertEquals(totalDocs, ids.size());
}
}
}
use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class CursorPagingTest method assertFullWalkNoDupsWithFacets.
/**
* Given a set of params, executes a cursor query using {@link CursorMarkParams#CURSOR_MARK_START}
* and then continuously walks the results using {@link CursorMarkParams#CURSOR_MARK_START} as long
* as a non-0 number of docs ar returned. This method records the the set of all id's
* (must be positive ints) encountered and throws an assertion failure if any id is
* encountered more than once, or if the set grows above maxSize.
*
* Also checks that facets are the same with each page, and that they are correct.
*/
public SentinelIntSet assertFullWalkNoDupsWithFacets(int maxSize, SolrParams params) throws Exception {
final String facetField = params.get("facet.field");
assertNotNull("facet.field param not specified", facetField);
assertFalse("facet.field param contains multiple values", facetField.contains(","));
assertEquals("facet.limit param not set to -1", "-1", params.get("facet.limit"));
final Map<String, MutableValueInt> facetCounts = new HashMap<>();
SentinelIntSet ids = new SentinelIntSet(maxSize, -1);
String cursorMark = CURSOR_MARK_START;
int docsOnThisPage = Integer.MAX_VALUE;
List previousFacets = null;
while (0 < docsOnThisPage) {
String json = assertJQ(req(params, CURSOR_MARK_PARAM, cursorMark));
Map rsp = (Map) ObjectBuilder.fromJSON(json);
assertTrue("response doesn't contain " + CURSOR_MARK_NEXT + ": " + json, rsp.containsKey(CURSOR_MARK_NEXT));
String nextCursorMark = (String) rsp.get(CURSOR_MARK_NEXT);
assertNotNull(CURSOR_MARK_NEXT + " is null", nextCursorMark);
List<Map<Object, Object>> docs = (List) (((Map) rsp.get("response")).get("docs"));
docsOnThisPage = docs.size();
if (null != params.getInt(CommonParams.ROWS)) {
int rows = params.getInt(CommonParams.ROWS);
assertTrue("Too many docs on this page: " + rows + " < " + docsOnThisPage, docsOnThisPage <= rows);
}
if (0 == docsOnThisPage) {
assertEquals("no more docs, but " + CURSOR_MARK_NEXT + " isn't same", cursorMark, nextCursorMark);
}
for (Map<Object, Object> doc : docs) {
int id = ((Long) doc.get("id")).intValue();
assertFalse("walk already seen: " + id, ids.exists(id));
ids.put(id);
assertFalse("id set bigger then max allowed (" + maxSize + "): " + ids.size(), maxSize < ids.size());
Object facet = doc.get(facetField);
// null: missing facet value
String facetString = null == facet ? null : facet.toString();
MutableValueInt count = facetCounts.get(facetString);
if (null == count) {
count = new MutableValueInt();
facetCounts.put(facetString, count);
}
++count.value;
}
cursorMark = nextCursorMark;
Map facetFields = (Map) ((Map) rsp.get("facet_counts")).get("facet_fields");
List facets = (List) facetFields.get(facetField);
if (null != previousFacets) {
assertEquals("Facets not the same as on previous page:\nprevious page facets: " + Arrays.toString(facets.toArray(new Object[facets.size()])) + "\ncurrent page facets: " + Arrays.toString(previousFacets.toArray(new Object[previousFacets.size()])), previousFacets, facets);
}
previousFacets = facets;
}
assertNotNull("previousFacets is null", previousFacets);
assertEquals("Mismatch in number of facets: ", facetCounts.size(), previousFacets.size() / 2);
int pos;
for (pos = 0; pos < previousFacets.size(); pos += 2) {
String label = (String) previousFacets.get(pos);
int expectedCount = ((Number) previousFacets.get(pos + 1)).intValue();
MutableValueInt count = facetCounts.get(label);
assertNotNull("Expected facet label #" + (pos / 2) + " not found: '" + label + "'", count);
assertEquals("Facet count mismatch for label #" + (pos / 2) + " '" + label + "'", expectedCount, facetCounts.get(label).value);
pos += 2;
}
return ids;
}
use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class DistribCursorPagingTest method doSimpleTest.
private void doSimpleTest() throws Exception {
String cursorMark = CURSOR_MARK_START;
SolrParams params = null;
QueryResponse rsp = null;
final String intsort = "int" + (random().nextBoolean() ? "" : "_dv");
final String intmissingsort = intsort;
// trivial base case: ensure cursorMark against an empty index doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "4", "fl", "id", "sort", "id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(0, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals(cursorMark, assertHashNextCursorMark(rsp));
// don't add in order of either field to ensure we aren't inadvertantly
// counting on internal docid ordering
indexDoc(sdoc("id", "9", "str", "c", "float", "-3.2", "int", "42"));
indexDoc(sdoc("id", "7", "str", "c", "float", "-3.2", "int", "-1976"));
indexDoc(sdoc("id", "2", "str", "c", "float", "-3.2", "int", "666"));
indexDoc(sdoc("id", "0", "str", "b", "float", "64.5", "int", "-42"));
indexDoc(sdoc("id", "5", "str", "b", "float", "64.5", "int", "2001"));
indexDoc(sdoc("id", "8", "str", "b", "float", "64.5", "int", "4055"));
indexDoc(sdoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
indexDoc(sdoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
indexDoc(sdoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
// int is missing
indexDoc(sdoc("id", "3", "str", "a", "float", "11.1"));
commit();
// base case: ensure cursorMark that matches no docs doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "id:9999999", "rows", "4", "fl", "id", "sort", "id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(0, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals(cursorMark, assertHashNextCursorMark(rsp));
// edge case: ensure rows=0 doesn't blow up and gives back same cursor for next
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "0", "fl", "id", "sort", "id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals(cursorMark, assertHashNextCursorMark(rsp));
// simple id sort
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:6", "rows", "4", "fl", "id", "sort", "id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 9, 8, 7, 6);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 5, 3, 2, 1);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 0);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed", cursorMark, assertHashNextCursorMark(rsp));
// NOTE: because field stats and queryNorms can vary amongst shards,
// not all "obvious" score based sorts can be iterated cleanly.
// queries that seem like they should result in an obvious "tie" score
// between two documents (and would tie in a single node case) may actually
// get diff scores for diff docs if they are on diff shards
//
// so here, in this test, we can't assert a hardcoded score ordering -- we trust
// the full walk testing (below)
// int sort with dups, id tie breaker ... and some faceting
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055", "rows", "3", "fl", "id", "facet", "true", "facet.field", "str", "json.nl", "map", "sort", intsort + " asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 7, 0, 3);
assertEquals("a", rsp.getFacetField("str").getValues().get(0).getName());
assertEquals(4, rsp.getFacetField("str").getValues().get(0).getCount());
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 4, 1, 6);
assertEquals("a", rsp.getFacetField("str").getValues().get(0).getName());
assertEquals(4, rsp.getFacetField("str").getValues().get(0).getCount());
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 9, 2);
assertEquals("a", rsp.getFacetField("str").getValues().get(0).getName());
assertEquals(4, rsp.getFacetField("str").getValues().get(0).getCount());
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("a", rsp.getFacetField("str").getValues().get(0).getName());
assertEquals(4, rsp.getFacetField("str").getValues().get(0).getCount());
assertEquals("no more docs, but cursorMark has changed", cursorMark, assertHashNextCursorMark(rsp));
// int missing first sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055", "rows", "3", "fl", "id", "json.nl", "map", "sort", intmissingsort + "_first asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 3, 7, 0);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 4, 1, 6);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 9, 2);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed", cursorMark, assertHashNextCursorMark(rsp));
// int missing last sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055", "rows", "3", "fl", "id", "json.nl", "map", "sort", intmissingsort + "_last asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 7, 0, 4);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 1, 6, 9);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 2, 3);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed", cursorMark, assertHashNextCursorMark(rsp));
// string sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "6", "fl", "id", "sort", "str asc, id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 6, 4, 3, 1, 8, 5);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 0, 9, 7, 2);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed", cursorMark, assertHashNextCursorMark(rsp));
// (order should be the same in all cases)
for (String primarysort : new String[] { "float", "field('float')", "sum(float,42)" }) {
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "2", "fl", "id", "sort", primarysort + " asc, " + intsort + " desc, id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 2, 9);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 7, 4);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 3, 8);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 5, 6);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 1, 0);
cursorMark = assertHashNextCursorMark(rsp);
// we've exactly exhausted all the results, but solr had no way of know that
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed", cursorMark, assertHashNextCursorMark(rsp));
}
// trivial base case: rows bigger then number of matches
cursorMark = CURSOR_MARK_START;
params = params("q", "id:3 id:7", "rows", "111", "fl", "id", "sort", intsort + " asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(2, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 7, 3);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(2, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed", cursorMark, assertHashNextCursorMark(rsp));
// sanity check our full walk method
SentinelIntSet ids;
ids = assertFullWalkNoDups(10, params("q", "*:*", "rows", "4", "sort", "id desc"));
assertEquals(10, ids.size());
ids = assertFullWalkNoDups(9, params("q", "*:*", "rows", "1", "fq", "-id:4", "sort", "id asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:4 unexpectedly", ids.exists(4));
ids = assertFullWalkNoDups(9, params("q", "*:*", "rows", "3", "fq", "-id:6", "sort", "float desc, id asc, int asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:6 unexpectedly", ids.exists(6));
ids = assertFullWalkNoDups(9, params("q", "float:[0 TO *] int:7 id:6", "rows", "3", "sort", "score desc, id desc"));
assertEquals(7, ids.size());
assertFalse("matched on id:9 unexpectedly", ids.exists(9));
assertFalse("matched on id:7 unexpectedly", ids.exists(7));
assertFalse("matched on id:2 unexpectedly", ids.exists(2));
// strategically delete/add some docs in the middle of walking the cursor
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "2", "fl", "id", "sort", "str asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 1, 3);
cursorMark = assertHashNextCursorMark(rsp);
// delete the last guy we got
del("id:3");
commit();
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 4, 6);
cursorMark = assertHashNextCursorMark(rsp);
// delete the next guy we expect
del("id:0");
commit();
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 5, 8);
cursorMark = assertHashNextCursorMark(rsp);
// update a doc we've already seen so it repeats
indexDoc(sdoc("id", "5", "str", "c"));
commit();
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 2, 5);
cursorMark = assertHashNextCursorMark(rsp);
// update the next doc we expect so it's now in the past
indexDoc(sdoc("id", "7", "str", "a"));
commit();
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertDocList(rsp, 9);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed", cursorMark, assertHashNextCursorMark(rsp));
}
use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class DistribCursorPagingTest method doRandomSortsOnLargeIndex.
/** randomized testing of a non-trivial number of docs using assertFullWalkNoDups
*/
public void doRandomSortsOnLargeIndex() throws Exception {
final Collection<String> allFieldNames = getAllSortFieldNames();
final int numInitialDocs = TestUtil.nextInt(random(), 100, 200);
final int totalDocs = atLeast(500);
// start with a smallish number of documents, and test that we can do a full walk using a
// sort on *every* field in the schema...
List<SolrInputDocument> initialDocs = new ArrayList<>();
for (int i = 1; i <= numInitialDocs; i++) {
SolrInputDocument doc = CursorPagingTest.buildRandomDocument(i);
initialDocs.add(doc);
indexDoc(doc);
}
commit();
for (String f : allFieldNames) {
for (String order : new String[] { " asc", " desc" }) {
String sort = f + order + ("id".equals(f) ? "" : ", id" + order);
String rows = "" + TestUtil.nextInt(random(), 13, 50);
SentinelIntSet ids = assertFullWalkNoDups(numInitialDocs, params("q", "*:*", "fl", "id," + f, "rows", rows, "sort", sort));
if (numInitialDocs != ids.size()) {
StringBuilder message = new StringBuilder("Expected " + numInitialDocs + " docs but got " + ids.size() + ". ");
message.append("sort=");
message.append(sort);
message.append(". ");
if (ids.size() < numInitialDocs) {
message.append("Missing doc(s): ");
for (SolrInputDocument doc : initialDocs) {
int id = ((Integer) doc.get("id").getValue()).intValue();
if (!ids.exists(id)) {
QueryResponse rsp = cloudClient.query(params("q", "id:" + id, "rows", "1"));
if (0 == rsp.getResults().size()) {
message.append("<NOT RETRIEVABLE>:");
message.append(doc.values());
} else {
message.append(rsp.getResults().get(0).getFieldValueMap().toString());
}
message.append("; ");
}
}
}
fail(message.toString());
}
}
}
// now add a lot more docs, and test a handful of randomized multi-level sorts
for (int i = numInitialDocs + 1; i <= totalDocs; i++) {
SolrInputDocument doc = CursorPagingTest.buildRandomDocument(i);
indexDoc(doc);
}
commit();
final int numRandomSorts = atLeast(3);
for (int i = 0; i < numRandomSorts; i++) {
final String sort = CursorPagingTest.buildRandomSort(allFieldNames);
final String rows = "" + TestUtil.nextInt(random(), 63, 113);
final String fl = random().nextBoolean() ? "id" : "id,score";
final boolean matchAll = random().nextBoolean();
final String q = matchAll ? "*:*" : CursorPagingTest.buildRandomQuery();
SentinelIntSet ids = assertFullWalkNoDups(totalDocs, params("q", q, "fl", fl, "rows", rows, "sort", sort));
if (matchAll) {
assertEquals(totalDocs, ids.size());
}
}
}
Aggregations