use of org.apache.solr.client.solrj.response.PivotField in project lucene-solr by apache.
the class DistributedFacetPivotSmallTest method doTestDeepPivotStats.
/**
* @param justMean - only the mean stat is requested/computed
*/
private void doTestDeepPivotStats(boolean justMean) throws Exception {
SolrParams params = params("q", "*:*", "rows", "0", "facet", "true", "stats", "true", "facet.pivot", "{!stats=s1}place_t,company_t", "stats.field", ("{!key=avg_price tag=s1 " + (justMean ? "mean=true" : "") + "}price_ti"));
QueryResponse rsp = query(params);
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
PivotField dublinPivotField = placePivots.get(0);
assertEquals("dublin", dublinPivotField.getValue());
assertEquals(4, dublinPivotField.getCount());
PivotField microsoftPivotField = dublinPivotField.getPivot().get(0);
assertEquals("microsoft", microsoftPivotField.getValue());
assertEquals(4, microsoftPivotField.getCount());
FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("avg_price");
assertEquals(21.0, (double) dublinMicrosoftStatsInfo.getMean(), 0.1E-7);
if (justMean) {
assertNull(dublinMicrosoftStatsInfo.getMin());
assertNull(dublinMicrosoftStatsInfo.getMax());
assertNull(dublinMicrosoftStatsInfo.getCount());
assertNull(dublinMicrosoftStatsInfo.getMissing());
assertNull(dublinMicrosoftStatsInfo.getSum());
assertNull(dublinMicrosoftStatsInfo.getSumOfSquares());
assertNull(dublinMicrosoftStatsInfo.getStddev());
} else {
assertEquals(15.0, dublinMicrosoftStatsInfo.getMin());
assertEquals(29.0, dublinMicrosoftStatsInfo.getMax());
assertEquals(3, (long) dublinMicrosoftStatsInfo.getCount());
assertEquals(1, (long) dublinMicrosoftStatsInfo.getMissing());
assertEquals(63.0, dublinMicrosoftStatsInfo.getSum());
assertEquals(1427.0, dublinMicrosoftStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(7.211102550927978, dublinMicrosoftStatsInfo.getStddev(), 0.1E-7);
}
PivotField cardiffPivotField = placePivots.get(2);
assertEquals("cardiff", cardiffPivotField.getValue());
assertEquals(3, cardiffPivotField.getCount());
PivotField polecatPivotField = cardiffPivotField.getPivot().get(0);
assertEquals("polecat", polecatPivotField.getValue());
assertEquals(3, polecatPivotField.getCount());
FieldStatsInfo cardiffPolecatStatsInfo = polecatPivotField.getFieldStatsInfo().get("avg_price");
assertEquals(27.0, (double) cardiffPolecatStatsInfo.getMean(), 0.1E-7);
if (justMean) {
assertNull(cardiffPolecatStatsInfo.getMin());
assertNull(cardiffPolecatStatsInfo.getMax());
assertNull(cardiffPolecatStatsInfo.getCount());
assertNull(cardiffPolecatStatsInfo.getMissing());
assertNull(cardiffPolecatStatsInfo.getSum());
assertNull(cardiffPolecatStatsInfo.getSumOfSquares());
assertNull(cardiffPolecatStatsInfo.getStddev());
} else {
assertEquals(15.0, cardiffPolecatStatsInfo.getMin());
assertEquals(39.0, cardiffPolecatStatsInfo.getMax());
assertEquals(2, (long) cardiffPolecatStatsInfo.getCount());
assertEquals(1, (long) cardiffPolecatStatsInfo.getMissing());
assertEquals(54.0, cardiffPolecatStatsInfo.getSum());
assertEquals(1746.0, cardiffPolecatStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(16.97056274847714, cardiffPolecatStatsInfo.getStddev(), 0.1E-7);
}
PivotField krakowPivotField = placePivots.get(3);
assertEquals("krakow", krakowPivotField.getValue());
assertEquals(3, krakowPivotField.getCount());
PivotField fujitsuPivotField = krakowPivotField.getPivot().get(3);
assertEquals("fujitsu", fujitsuPivotField.getValue());
assertEquals(1, fujitsuPivotField.getCount());
FieldStatsInfo krakowFujitsuStatsInfo = fujitsuPivotField.getFieldStatsInfo().get("avg_price");
assertEquals(Double.NaN, (double) krakowFujitsuStatsInfo.getMean(), 0.1E-7);
if (justMean) {
assertNull(krakowFujitsuStatsInfo.getMin());
assertNull(krakowFujitsuStatsInfo.getMax());
assertNull(krakowFujitsuStatsInfo.getCount());
assertNull(krakowFujitsuStatsInfo.getMissing());
assertNull(krakowFujitsuStatsInfo.getSum());
assertNull(krakowFujitsuStatsInfo.getSumOfSquares());
assertNull(krakowFujitsuStatsInfo.getStddev());
} else {
assertEquals(null, krakowFujitsuStatsInfo.getMin());
assertEquals(null, krakowFujitsuStatsInfo.getMax());
assertEquals(0, (long) krakowFujitsuStatsInfo.getCount());
assertEquals(1, (long) krakowFujitsuStatsInfo.getMissing());
assertEquals(0.0, krakowFujitsuStatsInfo.getSum());
assertEquals(0.0, krakowFujitsuStatsInfo.getSumOfSquares(), 0.1E-7);
assertEquals(Double.NaN, (double) krakowFujitsuStatsInfo.getMean(), 0.1E-7);
assertEquals(0.0, krakowFujitsuStatsInfo.getStddev(), 0.1E-7);
}
}
use of org.apache.solr.client.solrj.response.PivotField in project lucene-solr by apache.
the class DistributedFacetPivotWhiteBoxTest method doTestRefinementRequest.
/**
* recreates a pivot refinement request to a shard in a distributed query
* confirming that the per-pivot stats are returned, but not the top level stats
* because they shouldn't be overcounted.
*/
private void doTestRefinementRequest() throws Exception {
SolrParams params = params("facet.missing", "true", "facet", "true", "facet.limit", "4", "distrib", "false", // "version", "2",
"rows", "0", "facet.sort", "index", "fpt0", "~krakow", "facet.pivot.mincount", "-1", "isShard", "true", "facet.pivot", "{!fpt=0 stats=st1}place_t,company_t", "stats", "false", "stats.field", "{!key=sk1 tag=st1,st2}price_ti");
QueryResponse rsp = clients.get(0).query(new ModifiableSolrParams(params));
assertNull("pivot refine request should *NOT* include top level stats", rsp.getFieldStatsInfo());
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
assertEquals("asked to refine exactly one place", 1, placePivots.size());
assertFalse("pivot stats should not be empty in refinement request", placePivots.get(0).getFieldStatsInfo().isEmpty());
}
use of org.apache.solr.client.solrj.response.PivotField in project lucene-solr by apache.
the class DistributedFacetPivotLargeTest method test.
@Test
@ShardsFixed(num = 4)
public void test() throws Exception {
this.stress = 0;
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("maxScore", SKIPVAL);
setupDistributedPivotFacetDocuments();
QueryResponse rsp = null;
List<PivotField> pivots = null;
PivotField firstInt = null;
PivotField firstBool = null;
PivotField firstDate = null;
PivotField firstPlace = null;
PivotField firstCompany = null;
// basic check w/ limit & default sort (count)
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "place_s,company_t", FacetParams.FACET_LIMIT, "12");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(12, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
// Microsoft will come back wrong if refinement was not done correctly
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
// trivial mincount=0 check
rsp = query("q", "does_not_exist_s:foo", "rows", "0", "facet", "true", "facet.pivot", "company_t", FacetParams.FACET_LIMIT, "10", FacetParams.FACET_PIVOT_MINCOUNT, "0");
pivots = rsp.getFacetPivot().get("company_t");
assertEquals(10, pivots.size());
for (PivotField p : pivots) {
assertEquals(0, p.getCount());
}
// sanity check limit=0 w/ mincount=0 & missing=true
//
// SOLR-6328: doesn't work for single node, so can't work for distrib either (yet)
//
// PivotFacetField's init of needRefinementAtThisLevel as needing potential change
//
// rsp = query( "q", "*:*",
// "rows", "0",
// "facet","true",
// "f.company_t.facet.limit", "10",
// "facet.pivot","special_s,bogus_s,company_t",
// "facet.missing", "true",
// FacetParams.FACET_LIMIT, "0",
// FacetParams.FACET_PIVOT_MINCOUNT,"0");
// pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t");
// assertEquals(1, pivots.size()); // only the missing
// assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s
// assertEquals(pivots.toString(), 1, pivots.get(0).getPivot());
// assertPivot("bogus_s", null, docNumber, pivots.get(0).getPivot().get(0));
// // TODO: some asserts on company results
// basic check w/ default sort, limit, & mincount==0
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "place_s,company_t", FacetParams.FACET_LIMIT, "50", FacetParams.FACET_PIVOT_MINCOUNT, "0");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(50, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
// Microsoft will come back wrong if refinement was not done correctly
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
// sort=index + offset + limit w/ some variables
for (SolrParams variableParams : new SolrParams[] { // defauts
params(), // force refinement
params(FacetParams.FACET_OVERREQUEST_RATIO, "1", FacetParams.FACET_OVERREQUEST_COUNT, "0") }) {
SolrParams p = SolrParams.wrapDefaults(params("q", "*:*", "rows", "0", "facet", "true", "facet.sort", "index", "f.place_s.facet.limit", "20", "f.place_s.facet.offset", "40", "facet.pivot", "place_s,company_t"), variableParams);
try {
rsp = query(p);
pivots = rsp.getFacetPivot().get("place_s,company_t");
// limit
assertEquals(20, pivots.size());
for (int i = 0; i < 10; i++) {
PivotField place = pivots.get(i);
assertTrue(place.toString(), place.getValue().toString().endsWith("placeholder"));
assertEquals(3, place.getPivot().size());
assertPivot("company_t", "bbc", 6, place.getPivot().get(0));
assertPivot("company_t", "microsoft", 6, place.getPivot().get(1));
assertPivot("company_t", "polecat", 6, place.getPivot().get(2));
}
assertPivot("place_s", "cardiff", 257, pivots.get(10));
assertPivot("place_s", "krakaw", 1, pivots.get(11));
assertPivot("place_s", "medical staffing network holdings, inc.", 51, pivots.get(12));
for (int i = 13; i < 20; i++) {
PivotField place = pivots.get(i);
assertTrue(place.toString(), place.getValue().toString().startsWith("placeholder"));
assertEquals(1, place.getPivot().size());
PivotField company = place.getPivot().get(0);
assertTrue(company.toString(), company.getValue().toString().startsWith("compholder"));
assertEquals(company.toString(), 1, company.getCount());
}
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// sort=index + mincount=0
//
// SOLR-6329: facet.pivot.mincount=0 doesn't work well with distrib
//
// broken honda
//
// This is tricky, here's what i think is happening....
// - "company:honda" only exists on twoShard, and only w/ "place:cardiff"
// - twoShard has no other places in its docs
// - twoShard can't return any other places to w/ honda as a count=0 sub-value
// - if we refined all other companies places, would twoShard return honda==0 ?
// ... but there's no refinement since mincount==0
// - would it even matter
//
// should we remove the refinement short circuit?
//
// rsp = query( params( "q", "*:*",
// "rows", "0",
// "facet","true",
// "facet.sort","index",
// "f.place_s.facet.limit", "20",
// "f.place_s.facet.offset", "40",
// FacetParams.FACET_PIVOT_MINCOUNT,"0",
// "facet.pivot", "place_s,company_t") );
// // TODO: more asserts
//
//
// really trivial demonstration of the above problem
//
// rsp = query( params( "q", "*:*",
// "rows", "0",
// "facet","true",
// FacetParams.FACET_PIVOT_MINCOUNT,"0",
// "facet.pivot", "top_s,sub_s") );
// facet.missing=true + facet.sort=index + facet.pivot.mincount > 0 (SOLR-7829)
final int expectedNumDocsMissingBool = 111;
for (String facetSort : new String[] { "count", "index" }) {
for (int mincount : new int[] { 1, 20, (expectedNumDocsMissingBool / 2) - 1, (expectedNumDocsMissingBool / 2) + 1, expectedNumDocsMissingBool }) {
SolrParams p = params("q", "*:*", // simplify asserts by ruling out true counts
"fq", // simplify asserts by ruling out true counts
"-real_b:true", "rows", "0", "facet", "true", "facet.pivot", "real_b", "facet.missing", "true", "facet.pivot.mincount", "" + mincount, "facet.sort", facetSort);
try {
rsp = query(p);
pivots = rsp.getFacetPivot().get("real_b");
// false, missing - in that order, regardless of sort
assertEquals(2, pivots.size());
assertPivot("real_b", false, 300, pivots.get(0));
assertPivot("real_b", null, expectedNumDocsMissingBool, pivots.get(1));
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
}
// basic check w/ limit & index sort
for (SolrParams facetParams : // results should be the same regardless of whether local params are used
new SolrParams[] { // FacetParams.FACET_LIMIT, "4"),
params("facet.pivot", "place_s,company_t", FacetParams.FACET_LIMIT, "4", "facet.sort", "index") }) {
SolrParams p = SolrParams.wrapDefaults(params("q", "*:*", "rows", "0", "facet", "true"), facetParams);
try {
rsp = query(p);
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "0placeholder", 6, firstPlace);
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 6, firstCompany);
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// Pivot Faceting (combined wtih Field Faceting)
for (SolrParams facetParams : // (either way, facet results should be the same)
new SolrParams[] { params("facet.pivot", "place_s,company_t", "facet.field", "place_s"), params("facet.pivot", "{!ex=ok}place_s,company_t", "facet.field", "{!ex=ok}place_s", "fq", "{!tag=ok}place_s:cardiff"), params("facet.pivot", "{!ex=pl,co}place_s,company_t", "fq", "{!tag=pl}place_s:cardiff", "fq", "{!tag=co}company_t:bbc") }) {
// default order (count)
rsp = query(SolrParams.wrapDefaults(params("q", "*:*", "rows", "0", "facet", "true", FacetParams.FACET_LIMIT, "4"), facetParams));
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
// Index Order
rsp = query(SolrParams.wrapDefaults(params("q", "*:*", "rows", "0", "facet", "true", FacetParams.FACET_LIMIT, "4", "facet.sort", "index"), facetParams));
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "0placeholder", 6, firstPlace);
// num vals in data < limit==3
assertEquals(3, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 6, firstCompany);
// Field level limits
rsp = query(SolrParams.wrapDefaults(params("q", "*:*", "rows", "0", "facet", "true", "f.place_s.facet.limit", "2", "f.company_t.facet.limit", "4"), facetParams));
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(2, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
}
// Pivot Faceting Count w/fq (not excluded)
rsp = query("q", "*:*", "rows", "0", "fq", "place_s:cardiff", "facet", "true", "facet.pivot", "place_s,company_t", FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(1, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
// Same Pivot - one with exclusion and one w/o
rsp = query("q", "*:*", "rows", "0", "fq", "{!tag=ff}pay_i:[2000 TO *]", "facet", "true", "facet.pivot", "{!key=filt}place_s,company_t", "facet.pivot", "{!key=nofilt ex=ff}place_s,company_t", FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("filt");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 105, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1));
//
pivots = rsp.getFacetPivot().get("nofilt");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
// Same Pivot - one in default (count) order and one in index order
//
// Broken: SOLR-6193 - the facet.sort localparam isn't being picked up correctly
//
// rsp = query( "q", "*:*",
// "rows", "0",
// "facet","true",
// "fq","pay_i:[2000 TO *]",
// "facet.pivot","{!key=sc}place_s,company_t",
// "facet.pivot","{!key=si facet.sort=index}place_s,company_t",
// FacetParams.FACET_LIMIT, "4");
// pivots = rsp.getFacetPivot().get("sc");
// assertEquals(4, pivots.size());
// firstPlace = pivots.get(0);
// assertPivot("place_s", "cardiff", 105, firstPlace);
// assertEquals(4, firstPlace.getPivot().size());
// assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
// assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1));
// //
// pivots = rsp.getFacetPivot().get("si");
// assertEquals(4, pivots.size());
// firstPlace = pivots.get(0);
// assertPivot("place_s", "0placeholder", 6, firstPlace);
// assertEquals(3, firstPlace.getPivot().size()); // only 3 in the data < facet.limit
// assertPivot("company_t", "bbc", 6, firstPlace.getPivot().get(0));
// assertPivot("company_t", "microsoft", 6, firstPlace.getPivot().get(1));
// Field level limits and small offset
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "place_s,company_t", "f.place_s.facet.limit", "2", "f.company_t.facet.limit", "4", "facet.offset", "1");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(2, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace);
// num vals in data < limit==4
assertEquals(2, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 50, firstCompany);
// Field level offsets and limit
rsp = query("q", "*:*", "rows", "0", "fq", "{!tag=pl}place_s:cardiff", "facet", "true", "facet.pivot", "{!ex=pl}place_s,company_t", "f.place_s.facet.offset", "1", "f.company_t.facet.offset", "2", FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(4, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace);
// num vals in data < limit==4
assertEquals(1, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "polecat", 50, firstCompany);
// datetime
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "hiredate_dt,place_s,company_t", "f.hiredate_dt.facet.limit", "2", "f.hiredate_dt.facet.offset", "1", FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("hiredate_dt,place_s,company_t");
assertEquals(2, pivots.size());
// 2012-09-01T12:30:00Z
firstDate = pivots.get(0);
assertPivot("hiredate_dt", new Date(1346502600000L), 200, firstDate);
// num vals in data < limit==4
assertEquals(1, firstDate.getPivot().size());
firstPlace = firstDate.getPivot().get(0);
assertPivot("place_s", "cardiff", 200, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 50, firstCompany);
// int
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "pay_i,place_s,company_t", "f.pay_i.facet.limit", "2", "f.pay_i.facet.offset", "1", FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("pay_i,place_s,company_t");
assertEquals(2, pivots.size());
firstInt = pivots.get(0);
assertPivot("pay_i", 2000, 50, firstInt);
assertEquals(4, firstInt.getPivot().size());
firstPlace = firstInt.getPivot().get(0);
assertPivot("place_s", "0placeholder", 1, firstPlace);
assertEquals(3, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 1, firstCompany);
// boolean
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "real_b,place_s,company_t", "f.real_b.facet.missing", "true", "f.real_b.facet.limit", "2", FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("real_b,place_s,company_t");
assertEquals(3, pivots.size());
firstBool = pivots.get(0);
assertPivot("real_b", false, 300, firstBool);
assertEquals(4, firstBool.getPivot().size());
firstPlace = firstBool.getPivot().get(0);
assertPivot("place_s", "0placeholder", 6, firstPlace);
assertEquals(3, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 6, firstCompany);
// bogus fields
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "doesntexist_t,neitherdoi_i", FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i");
assertEquals(0, pivots.size());
// bogus fields with facet.missing
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "doesntexist_t,neitherdoi_i", "facet.missing", "true", FacetParams.FACET_LIMIT, "4");
pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i");
assertEquals(1, pivots.size());
assertPivot("doesntexist_t", null, docNumber, pivots.get(0));
assertEquals(1, pivots.get(0).getPivot().size());
assertPivot("neitherdoi_i", null, docNumber, pivots.get(0).getPivot().get(0));
// Negative facet limit
for (SolrParams facetParams : // a local param, or specified as a per-field override for both fields
new SolrParams[] { params(FacetParams.FACET_LIMIT, "-1", "facet.pivot", "place_s,company_t"), // params("facet.pivot","{!facet.limit=-1}place_s,company_t"),
params("f.place_s.facet.limit", "-1", "f.company_t.facet.limit", "-1", "facet.pivot", "place_s,company_t") }) {
SolrParams p = SolrParams.wrapDefaults(params("q", "*:*", "rows", "0", "facet", "true", "facet.sort", "count"), facetParams);
try {
rsp = query(p);
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(103, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(54, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// Negative per-field facet limit (outer)
for (SolrParams facetParams : // a global or a local param
new SolrParams[] { // params( "facet.pivot","{!f.id.facet.limit=-1}place_s,id" ),
params("facet.pivot", "place_s,id", "f.id.facet.limit", "-1") }) {
SolrParams p = SolrParams.wrapDefaults(params("q", "*:*", "rows", "0", "facet", "true", "facet.sort", "count"), facetParams);
try {
rsp = query(p);
pivots = rsp.getFacetPivot().get("place_s,id");
// default
assertEquals(100, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(257, firstPlace.getPivot().size());
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// Negative per-field facet limit (inner)
for (SolrParams facetParams : // a global or a local param
new SolrParams[] { // params( "facet.pivot","{!f.place_s.facet.limit=-1}place_s,id" ),
params("facet.pivot", "place_s,id", "f.place_s.facet.limit", "-1") }) {
SolrParams p = SolrParams.wrapDefaults(params("q", "*:*", "rows", "0", "facet", "true", "facet.sort", "count"), facetParams);
try {
rsp = query(p);
pivots = rsp.getFacetPivot().get("place_s,id");
assertEquals(103, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
// default
assertEquals(100, firstPlace.getPivot().size());
} catch (AssertionFailedError ae) {
throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
}
}
// Mincount + facet.pivot 2 different ways (swap field order)
rsp = query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "place_s,company_t", "facet.pivot", "company_t,place_s", FacetParams.FACET_PIVOT_MINCOUNT, "6");
pivots = rsp.getFacetPivot().get("place_s,company_t");
assertEquals(52, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
assertEquals(4, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "bbc", 101, firstCompany);
//
pivots = rsp.getFacetPivot().get("company_t,place_s");
assertEquals(4, pivots.size());
firstCompany = pivots.get(0);
assertPivot("company_t", "bbc", 451, firstCompany);
assertEquals(52, firstCompany.getPivot().size());
firstPlace = firstCompany.getPivot().get(0);
assertPivot("place_s", "cardiff", 101, firstPlace);
// refine on SPECIAL empty string
rsp = query("q", "*:*", "fq", "-place_s:0placeholder", "rows", "0", "facet", "true", "facet.limit", "1", // force refinement
FacetParams.FACET_OVERREQUEST_RATIO, // force refinement
"0", // force refinement
FacetParams.FACET_OVERREQUEST_COUNT, // force refinement
"1", "facet.pivot", "special_s,company_t");
// all docs but 0place
assertEquals(docNumber - 6, rsp.getResults().getNumFound());
pivots = rsp.getFacetPivot().get("special_s,company_t");
assertEquals(1, pivots.size());
firstPlace = pivots.get(0);
assertPivot("special_s", SPECIAL, 3, firstPlace);
assertEquals(1, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "microsoft", 2, firstCompany);
// TODO test "company_t,special_s" as well
// refine on SPECIAL empty string & facet.missing
// Also proves refinement on non-top elements occurs and allows them to get into the top
rsp = query("q", "*:*", "fq", "-place_s:0placeholder", "rows", "0", "facet", "true", "facet.limit", "1", "facet.missing", "true", // force refinement
FacetParams.FACET_OVERREQUEST_RATIO, // force refinement
"0", // force refinement
FacetParams.FACET_OVERREQUEST_COUNT, // force refinement
"2", "facet.pivot", "special_s,company_t");
// all docs but 0place
assertEquals(docNumber - 6, rsp.getResults().getNumFound());
pivots = rsp.getFacetPivot().get("special_s,company_t");
assertEquals(2, pivots.size());
firstPlace = pivots.get(0);
assertPivot("special_s", SPECIAL, 3, firstPlace);
assertEquals(1, firstPlace.getPivot().size());
firstCompany = firstPlace.getPivot().get(0);
assertPivot("company_t", "microsoft", 2, firstCompany);
// last is "missing" val
// -0place -SPECIAL -xxx
assertPivot("special_s", null, docNumber - 6 - 3 - 2, pivots.get(1));
// forced refinement on facet.missing
rsp = query("q", "*:*", "rows", "0", "facet", "true", "f.bogus_x_s.facet.missing", "true", "f.bogus_y_s.facet.missing", "true", "facet.pivot", "bogus_x_s,place_s,bogus_y_s,company_t", FacetParams.FACET_LIMIT, "12");
pivots = rsp.getFacetPivot().get("bogus_x_s,place_s,bogus_y_s,company_t");
// just the missing value for bogus_x_s
assertEquals(1, pivots.size());
assertPivot("bogus_x_s", null, docNumber, pivots.get(0));
pivots = pivots.get(0).getPivot();
// places
assertEquals(12, pivots.size());
firstPlace = pivots.get(0);
assertPivot("place_s", "cardiff", 257, firstPlace);
// just the missing value for bogus_y_s
assertEquals(1, firstPlace.getPivot().size());
assertPivot("bogus_y_s", null, 257, firstPlace.getPivot().get(0));
assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0).getPivot().get(0));
// Microsoft will come back wrong if refinement was not done correctly
assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(0).getPivot().get(1));
// Overrequesting a lot
this.query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "place_s,company_t", FacetParams.FACET_OVERREQUEST_RATIO, "10", FacetParams.FACET_OVERREQUEST_COUNT, "100");
// Overrequesting off
this.query("q", "*:*", "rows", "0", "facet", "true", "facet.pivot", "place_s,company_t", FacetParams.FACET_OVERREQUEST_RATIO, "0", FacetParams.FACET_OVERREQUEST_COUNT, "0");
doTestDeepPivotStats();
doTestPivotRanges();
}
use of org.apache.solr.client.solrj.response.PivotField in project lucene-solr by apache.
the class DistributedFacetPivotLongTailTest method test.
@Test
@ShardsFixed(num = 3)
public void test() throws Exception {
final SolrClient shard0 = clients.get(0);
final SolrClient shard1 = clients.get(1);
final SolrClient shard2 = clients.get(2);
// the 5 top foo_s terms have 100 docs each on every shard
for (int i = 0; i < 100; i++) {
for (int j = 0; j < 5; j++) {
shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", j * 13 - i));
shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", j * 3 + i));
shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", i * 7 + j));
}
}
// on both shard0 & shard1 ("bbb_")
for (int i = 0; i < 50; i++) {
for (int j = 0; j < 20; j++) {
shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb" + j, "stat_i", 0));
shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb" + j, "stat_i", 1));
}
// distracting term appears on only on shard2 50 times
shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA"));
}
// put "bbb0" on shard2 exactly once to sanity check refinement
shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0", "stat_i", -2));
// foo_s:tail is the only term with bar_s sub-pivot terms
for (int i = 0; i < 45; i++) {
// for sub-pivot, shard0 & shard1 have 6 docs each for "tailB"
// but the top 5 terms are ccc(0-4) -- 7 on each shard
// (4 docs each have junk terms)
String sub_term = (i < 35) ? "ccc" + (i % 5) : ((i < 41) ? "tailB" : "junkA");
shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i));
shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i));
// shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
// and 5 docs that use "tailB"
// NOTE: none of these get stat_i ! !
sub_term = (i < 40) ? "junkB" + (i % 5) : "tailB";
shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
}
// really long tail uncommon foo_s terms on shard2
for (int i = 0; i < 30; i++) {
shard2.add(sdoc("id", getDocNum(), "foo_s", "zzz" + i));
}
commit();
SolrParams req = params("q", "*:*", "distrib", "false", "facet", "true", "facet.limit", "10", "facet.pivot", "foo_s,bar_s");
// sanity check that our expectations about each shard (non-distrib) are correct
PivotField pivot = null;
List<PivotField> pivots = null;
List<PivotField>[] shardPivots = new List[3];
shardPivots[0] = shard0.query(req).getFacetPivot().get("foo_s,bar_s");
shardPivots[1] = shard1.query(req).getFacetPivot().get("foo_s,bar_s");
shardPivots[2] = shard2.query(req).getFacetPivot().get("foo_s,bar_s");
// top 5 same on all shards
for (int i = 0; i < 3; i++) {
assertEquals(10, shardPivots[i].size());
for (int j = 0; j < 5; j++) {
pivot = shardPivots[i].get(j);
assertEquals(pivot.toString(), "aaa" + j, pivot.getValue());
assertEquals(pivot.toString(), 100, pivot.getCount());
}
}
// top 6-10 same on shard0 & shard11
for (int i = 0; i < 2; i++) {
for (int j = 5; j < 10; j++) {
pivot = shardPivots[i].get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("bbb"));
assertEquals(pivot.toString(), 50, pivot.getCount());
}
}
// 6-10 on shard2
assertEquals("junkA", shardPivots[2].get(5).getValue());
assertEquals(50, shardPivots[2].get(5).getCount());
assertEquals("tail", shardPivots[2].get(6).getValue());
assertEquals(45, shardPivots[2].get(6).getCount());
assertEquals("bbb0", shardPivots[2].get(7).getValue());
assertEquals(1, shardPivots[2].get(7).getCount());
for (int j = 8; j < 10; j++) {
pivot = shardPivots[2].get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("zzz"));
assertEquals(pivot.toString(), 1, pivot.getCount());
}
// check sub-shardPivots on "tail" from shard2
pivots = shardPivots[2].get(6).getPivot();
assertEquals(6, pivots.size());
for (int j = 0; j < 5; j++) {
pivot = pivots.get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("junkB"));
assertEquals(pivot.toString(), 8, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals("tailB", pivot.getValue());
assertEquals(5, pivot.getCount());
// if we disable overrequesting, we don't find the long tail
pivots = queryServer(params("q", "*:*", "shards", getShardsString(), FacetParams.FACET_OVERREQUEST_COUNT, "0", FacetParams.FACET_OVERREQUEST_RATIO, "0", "facet", "true", "facet.limit", "6", "facet.pivot", "{!stats=sxy}foo_s,bar_s", "stats", "true", "stats.field", "{!tag=sxy}stat_i")).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
{
// even w/o the long tail, we should have still asked shard2 to refine bbb0
pivot = pivots.get(5);
assertTrue(pivot.toString(), pivot.getValue().equals("bbb0"));
assertEquals(pivot.toString(), 101, pivot.getCount());
// basic check of refined stats
FieldStatsInfo bbb0Stats = pivot.getFieldStatsInfo().get("stat_i");
assertEquals("stat_i", bbb0Stats.getName());
assertEquals(-2.0, bbb0Stats.getMin());
assertEquals(1.0, bbb0Stats.getMax());
assertEquals(101, (long) bbb0Stats.getCount());
assertEquals(0, (long) bbb0Stats.getMissing());
assertEquals(48.0, bbb0Stats.getSum());
assertEquals(0.475247524752475, (double) bbb0Stats.getMean(), 0.1E-7);
assertEquals(54.0, bbb0Stats.getSumOfSquares(), 0.1E-7);
assertEquals(0.55846323792, bbb0Stats.getStddev(), 0.1E-7);
}
// (even if we disable overrequesting on the sub-pivot)
for (ModifiableSolrParams q : new ModifiableSolrParams[] { params(), params("f.bar_s.facet.overrequest.ratio", "0", "f.bar_s.facet.overrequest.count", "0") }) {
q.add(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "facet.pivot", "foo_s,bar_s"));
pivots = queryServer(q).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(6, pivots.size());
pivot = pivots.get(0);
assertEquals(pivot.toString(), "tailB", pivot.getValue());
assertEquals(pivot.toString(), 17, pivot.getCount());
for (int i = 1; i < 6; i++) {
// ccc(0-4)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
}
// if we lower the facet.limit on the sub-pivot, overrequesting should still ensure
// that we get the correct top5 including "tailB"
pivots = queryServer(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "f.bar_s.facet.limit", "5", "facet.pivot", "foo_s,bar_s")).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(5, pivots.size());
pivot = pivots.get(0);
assertEquals(pivot.toString(), "tailB", pivot.getValue());
assertEquals(pivot.toString(), 17, pivot.getCount());
for (int i = 1; i < 5; i++) {
// ccc(0-3)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
// however with a lower limit and overrequesting disabled,
// we're going to miss out on tailB
pivots = queryServer(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "f.bar_s.facet.overrequest.ratio", "0", "f.bar_s.facet.overrequest.count", "0", "f.bar_s.facet.limit", "5", "facet.pivot", "foo_s,bar_s")).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(5, pivots.size());
for (int i = 0; i < 5; i++) {
// ccc(0-4)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
doTestDeepPivotStats();
}
use of org.apache.solr.client.solrj.response.PivotField in project lucene-solr by apache.
the class DistributedFacetPivotWhiteBoxTest method doShardTestTopStats.
/**
* recreates the initial request to a shard in a distributed query
* confirming that both top level stats, and per-pivot stats are returned.
*/
private void doShardTestTopStats() throws Exception {
SolrParams params = params("facet", "true", "q", "*:*", // "wt", "javabin",
"facet.pivot", "{!stats=s1}place_t,company_t", // "version", "2",
"start", "0", "rows", "0", "fsv", "true", "fl", "id,score", "stats", "true", "stats.field", "{!key=avg_price tag=s1}price_ti", "f.place_t.facet.limit", "160", "f.place_t.facet.pivot.mincount", "0", "f.company_t.facet.limit", "160", "f.company_t.facet.pivot.mincount", "0", "isShard", "true", "distrib", "false");
QueryResponse rsp = queryServer(new ModifiableSolrParams(params));
assertNotNull("initial shard request should include non-null top level stats", rsp.getFieldStatsInfo());
assertFalse("initial shard request should include top level stats", rsp.getFieldStatsInfo().isEmpty());
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
for (PivotField pivotField : placePivots) {
assertFalse("pivot stats should not be empty in initial request", pivotField.getFieldStatsInfo().isEmpty());
}
}
Aggregations