use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class CursorPagingTest method assertFullWalkNoDups.
/**
* Given a set of params, executes a cursor query using {@link CursorMarkParams#CURSOR_MARK_START}
* and then continuously walks the results using {@link CursorMarkParams#CURSOR_MARK_START} as long
* as a non-0 number of docs ar returned. This method records the the set of all id's
* (must be positive ints) encountered and throws an assertion failure if any id is
* encountered more than once, or if the set grows above maxSize
*/
public SentinelIntSet assertFullWalkNoDups(int maxSize, SolrParams params) throws Exception {
SentinelIntSet ids = new SentinelIntSet(maxSize, -1);
String cursorMark = CURSOR_MARK_START;
int docsOnThisPage = Integer.MAX_VALUE;
while (0 < docsOnThisPage) {
String json = assertJQ(req(params, CURSOR_MARK_PARAM, cursorMark));
Map rsp = (Map) ObjectBuilder.fromJSON(json);
assertTrue("response doesn't contain " + CURSOR_MARK_NEXT + ": " + json, rsp.containsKey(CURSOR_MARK_NEXT));
String nextCursorMark = (String) rsp.get(CURSOR_MARK_NEXT);
assertNotNull(CURSOR_MARK_NEXT + " is null", nextCursorMark);
List<Map<Object, Object>> docs = (List) (((Map) rsp.get("response")).get("docs"));
docsOnThisPage = docs.size();
if (null != params.getInt(CommonParams.ROWS)) {
int rows = params.getInt(CommonParams.ROWS);
assertTrue("Too many docs on this page: " + rows + " < " + docsOnThisPage, docsOnThisPage <= rows);
}
if (0 == docsOnThisPage) {
assertEquals("no more docs, but " + CURSOR_MARK_NEXT + " isn't same", cursorMark, nextCursorMark);
}
for (Map<Object, Object> doc : docs) {
int id = ((Long) doc.get("id")).intValue();
assertFalse("walk already seen: " + id, ids.exists(id));
ids.put(id);
assertFalse("id set bigger then max allowed (" + maxSize + "): " + ids.size(), maxSize < ids.size());
}
cursorMark = nextCursorMark;
}
return ids;
}
use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class CursorPagingTest method testCacheImpacts.
/**
* test that our assumptions about how caches are affected hold true
*/
public void testCacheImpacts() throws Exception {
// cursor queryies can't live in the queryResultCache, but independent filters
// should still be cached & reused
// don't add in order of any field to ensure we aren't inadvertantly
// counting on internal docid ordering
assertU(adoc("id", "9", "str", "c", "float", "-3.2", "int", "42"));
assertU(adoc("id", "7", "str", "c", "float", "-3.2", "int", "-1976"));
assertU(adoc("id", "2", "str", "c", "float", "-3.2", "int", "666"));
assertU(adoc("id", "0", "str", "b", "float", "64.5", "int", "-42"));
assertU(adoc("id", "5", "str", "b", "float", "64.5", "int", "2001"));
assertU(adoc("id", "8", "str", "b", "float", "64.5", "int", "4055"));
assertU(adoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
assertU(adoc("id", "3", "str", "a", "float", "11.1", "int", "3"));
assertU(commit());
final Collection<String> allFieldNames = getAllSortFieldNames();
final MetricsMap filterCacheStats = (MetricsMap) h.getCore().getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.filterCache");
assertNotNull(filterCacheStats);
final MetricsMap queryCacheStats = (MetricsMap) h.getCore().getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.queryResultCache");
assertNotNull(queryCacheStats);
final long preQcIn = (Long) queryCacheStats.getValue().get("inserts");
final long preFcIn = (Long) filterCacheStats.getValue().get("inserts");
final long preFcHits = (Long) filterCacheStats.getValue().get("hits");
SentinelIntSet ids = assertFullWalkNoDups(10, params("q", "*:*", "rows", "" + TestUtil.nextInt(random(), 1, 11), "fq", "-id:[1 TO 2]", "fq", "-id:[6 TO 7]", "fl", "id", "sort", buildRandomSort(allFieldNames)));
assertEquals(6, ids.size());
final long postQcIn = (Long) queryCacheStats.getValue().get("inserts");
final long postFcIn = (Long) filterCacheStats.getValue().get("inserts");
final long postFcHits = (Long) filterCacheStats.getValue().get("hits");
assertEquals("query cache inserts changed", preQcIn, postQcIn);
// NOTE: use of pure negative filters causees "*:* to be tracked in filterCache
assertEquals("filter cache did not grow correctly", 3, postFcIn - preFcIn);
assertTrue("filter cache did not have any new cache hits", 0 < postFcHits - preFcHits);
}
use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class DistribCursorPagingTest method assertFullWalkNoDups.
/**
* <p>
* Given a set of params, executes a cursor query using {@link CursorMarkParams#CURSOR_MARK_START}
* and then continuously walks the results using {@link CursorMarkParams#CURSOR_MARK_START} as long
* as a non-0 number of docs ar returned. This method records the the set of all id's
* (must be positive ints) encountered and throws an assertion failure if any id is
* encountered more then once, or if the set grows above maxSize
* </p>
*
* <p>
* Note that this method explicitly uses the "cloudClient" for executing the queries,
* instead of relying on the test infrastructure to execute the queries redundently
* against both the cloud client as well as a control client. This is because term stat
* differences in a sharded setup can result in different scores for documents compared
* to the control index -- which can affect the sorting in some cases and cause false
* negatives in the response comparisons (even if we don't include "score" in the "fl")
* </p>
*/
public SentinelIntSet assertFullWalkNoDups(int maxSize, SolrParams params) throws Exception {
SentinelIntSet ids = new SentinelIntSet(maxSize, -1);
String cursorMark = CURSOR_MARK_START;
int docsOnThisPage = Integer.MAX_VALUE;
while (0 < docsOnThisPage) {
final SolrParams p = p(params, CURSOR_MARK_PARAM, cursorMark);
QueryResponse rsp = cloudClient.query(p);
String nextCursorMark = assertHashNextCursorMark(rsp);
SolrDocumentList docs = extractDocList(rsp);
docsOnThisPage = docs.size();
if (null != params.getInt(CommonParams.ROWS)) {
int rows = params.getInt(CommonParams.ROWS);
assertTrue("Too many docs on this page: " + rows + " < " + docsOnThisPage, docsOnThisPage <= rows);
}
if (0 == docsOnThisPage) {
assertEquals("no more docs, but " + CURSOR_MARK_NEXT + " isn't same", cursorMark, nextCursorMark);
}
for (SolrDocument doc : docs) {
int id = ((Integer) doc.get("id")).intValue();
if (ids.exists(id)) {
String msg = "(" + p + ") walk already seen: " + id;
try {
queryAndCompareShards(params("distrib", "false", "q", "id:" + id));
} catch (AssertionError ae) {
throw new AssertionError(msg + ", found shard inconsistency that would explain it...", ae);
}
rsp = cloudClient.query(params("q", "id:" + id));
throw new AssertionError(msg + ", don't know why; q=id:" + id + " gives: " + rsp.toString());
}
ids.put(id);
assertFalse("id set bigger then max allowed (" + maxSize + "): " + ids.size(), maxSize < ids.size());
}
cursorMark = nextCursorMark;
}
return ids;
}
use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class CursorPagingTest method testSimple.
/** simple static test of some carefully crafted docs */
public void testSimple() throws Exception {
String cursorMark;
SolrParams params = null;
final String intsort = "int" + (random().nextBoolean() ? "" : "_dv");
final String intmissingsort = intsort;
// trivial base case: ensure cursorMark against an empty index doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "4", "fl", "id", "sort", "id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==0", "/response/start==0", "/response/docs==[]");
assertEquals(CURSOR_MARK_START, cursorMark);
// don't add in order of any field to ensure we aren't inadvertantly
// counting on internal docid ordering
assertU(adoc("id", "9", "str", "c", "float", "-3.2", "int", "42"));
assertU(adoc("id", "7", "str", "c", "float", "-3.2", "int", "-1976"));
assertU(adoc("id", "2", "str", "c", "float", "-3.2", "int", "666"));
assertU(adoc("id", "0", "str", "b", "float", "64.5", "int", "-42"));
assertU(adoc("id", "5", "str", "b", "float", "64.5", "int", "2001"));
assertU(adoc("id", "8", "str", "b", "float", "64.5", "int", "4055"));
assertU(adoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
// int is missing
assertU(adoc("id", "3", "str", "a", "float", "11.1"));
assertU(commit());
// base case: ensure cursorMark that matches no docs doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "id:9999999", "rows", "4", "fl", "id", "sort", "id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==0", "/response/start==0", "/response/docs==[]");
assertEquals(CURSOR_MARK_START, cursorMark);
// edge case: ensure rows=0 doesn't blow up and gives back same cursor for next
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "0", "fl", "id", "sort", "id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[]");
assertEquals(CURSOR_MARK_START, cursorMark);
// simple id sort w/some faceting
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:6", "rows", "4", "fl", "id", "sort", "id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==9", "/response/start==0", "/response/docs==[{'id':9},{'id':8},{'id':7},{'id':6}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==9", "/response/start==0", "/response/docs==[{'id':5},{'id':3},{'id':2},{'id':1}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==9", "/response/start==0", "/response/docs==[{'id':0}]");
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==9", "/response/start==0", "/response/docs==[]"));
// simple score sort w/some faceting
cursorMark = CURSOR_MARK_START;
params = params("q", "float:[0 TO *] int:7 id:6", "rows", "4", "fl", "id", "facet", "true", "facet.field", "str", "json.nl", "map", "sort", "score desc, id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==7", "/response/start==0", "/response/docs==[{'id':6},{'id':1},{'id':8},{'id':5}]", "/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==7", "/response/start==0", "/response/docs==[{'id':4},{'id':3},{'id':0}]", "/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}");
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==7", "/response/start==0", "/response/docs==[]", "/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}"));
// int sort with dups, id tie breaker ... and some faceting
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055", "rows", "3", "fl", "id", "facet", "true", "facet.field", "str", "json.nl", "map", "sort", intsort + " asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':7},{'id':0},{'id':3}]", "/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':4},{'id':1},{'id':6}]", "/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':9},{'id':2}]", "/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}");
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[]", "/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"));
// int missing first sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055", "rows", "3", "fl", "id", "json.nl", "map", "sort", intmissingsort + "_first asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':3},{'id':7},{'id':0}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':4},{'id':1},{'id':6}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':9},{'id':2}]");
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[]"));
// int missing last sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055", "rows", "3", "fl", "id", "json.nl", "map", "sort", intmissingsort + "_last asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':7},{'id':0},{'id':4}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':1},{'id':6},{'id':9}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':2},{'id':3}]");
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[]"));
// string sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "6", "fl", "id", "sort", "str asc, id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[{'id':6},{'id':4},{'id':3},{'id':1},{'id':8},{'id':5}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[{'id':0},{'id':9},{'id':7},{'id':2}]");
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[]"));
// tri-level sort with more dups of primary then fit on a page
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "2", "fl", "id", "sort", "float asc, " + intsort + " desc, id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[{'id':2},{'id':9}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[{'id':7},{'id':4}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[{'id':3},{'id':8}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[{'id':5},{'id':6}]");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[{'id':1},{'id':0}]");
// we've exactly exhausted all the results, but solr had no way of know that
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[]"));
// trivial base case: rows bigger then number of matches
cursorMark = CURSOR_MARK_START;
params = params("q", "id:3 id:7", "rows", "111", "fl", "id", "sort", intsort + " asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==2", "/response/start==0", "/response/docs==[{'id':7},{'id':3}]");
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==2", "/response/start==0", "/response/docs==[]"));
// sanity check our full walk method
SentinelIntSet ids;
ids = assertFullWalkNoDups(10, params("q", "*:*", "rows", "4", "sort", "id desc"));
assertEquals(10, ids.size());
ids = assertFullWalkNoDups(9, params("q", "*:*", "rows", "1", "fq", "-id:4", "sort", "id asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:4 unexpectedly", ids.exists(4));
ids = assertFullWalkNoDups(9, params("q", "*:*", "rows", "3", "fq", "-id:6", "sort", "float desc, id asc, " + intsort + " asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:6 unexpectedly", ids.exists(6));
ids = assertFullWalkNoDups(9, params("q", "float:[0 TO *] int:7 id:6", "rows", "3", "sort", "score desc, id desc"));
assertEquals(7, ids.size());
assertFalse("matched on id:9 unexpectedly", ids.exists(9));
assertFalse("matched on id:7 unexpectedly", ids.exists(7));
assertFalse("matched on id:2 unexpectedly", ids.exists(2));
// strategically delete/add some docs in the middle of walking the cursor
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*", "rows", "2", "fl", "id", "sort", "str asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==10", "/response/start==0", "/response/docs==[{'id':1},{'id':3}]");
// delete the last guy we got
assertU(delI("3"));
assertU(commit());
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==9", "/response/start==0", "/response/docs==[{'id':4},{'id':6}]");
// delete the next guy we expect
assertU(delI("0"));
assertU(commit());
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':5},{'id':8}]");
// update a doc we've already seen so it repeats
assertU(adoc("id", "5", "str", "c"));
assertU(commit());
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':2},{'id':5}]");
// update the next doc we expect so it's now in the past
assertU(adoc("id", "7", "str", "a"));
assertU(commit());
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[{'id':9}]");
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark, assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark), "/response/numFound==8", "/response/start==0", "/response/docs==[]"));
}
use of org.apache.lucene.util.SentinelIntSet in project lucene-solr by apache.
the class CursorPagingTest method testFacetingWithRandomSorts.
/**
* test faceting with deep paging
*/
public void testFacetingWithRandomSorts() throws Exception {
final int numDocs = TestUtil.nextInt(random(), 1000, 3000);
String[] fieldsToFacetOn = { "int", "long", "str" };
String[] facetMethods = { "enum", "fc", "fcs" };
for (int i = 1; i <= numDocs; i++) {
SolrInputDocument doc = buildRandomDocument(i);
assertU(adoc(doc));
}
assertU(commit());
Collection<String> allFieldNames = getAllSortFieldNames();
String[] fieldNames = new String[allFieldNames.size()];
allFieldNames.toArray(fieldNames);
String f = fieldNames[TestUtil.nextInt(random(), 0, fieldNames.length - 1)];
String order = 0 == TestUtil.nextInt(random(), 0, 1) ? " asc" : " desc";
String sort = f + order + (f.equals("id") ? "" : ", id" + order);
String rows = "" + TestUtil.nextInt(random(), 13, 50);
String facetField = fieldsToFacetOn[TestUtil.nextInt(random(), 0, fieldsToFacetOn.length - 1)];
String facetMethod = facetMethods[TestUtil.nextInt(random(), 0, facetMethods.length - 1)];
SentinelIntSet ids = assertFullWalkNoDupsWithFacets(numDocs, params("q", "*:*", "fl", "id," + facetField, "facet", "true", "facet.field", facetField, "facet.method", facetMethod, "facet.missing", "true", // unlimited
"facet.limit", // unlimited
"-1", "rows", rows, "sort", sort));
assertEquals(numDocs, ids.size());
}
Aggregations