use of org.apache.solr.client.solrj.response.FieldStatsInfo in project lucene-solr by apache.
the class DistributedFacetPivotLongTailTest method test.
@Test
@ShardsFixed(num = 3)
public void test() throws Exception {
final SolrClient shard0 = clients.get(0);
final SolrClient shard1 = clients.get(1);
final SolrClient shard2 = clients.get(2);
// the 5 top foo_s terms have 100 docs each on every shard
for (int i = 0; i < 100; i++) {
for (int j = 0; j < 5; j++) {
shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", j * 13 - i));
shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", j * 3 + i));
shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", i * 7 + j));
}
}
// on both shard0 & shard1 ("bbb_")
for (int i = 0; i < 50; i++) {
for (int j = 0; j < 20; j++) {
shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb" + j, "stat_i", 0));
shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb" + j, "stat_i", 1));
}
// distracting term appears on only on shard2 50 times
shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA"));
}
// put "bbb0" on shard2 exactly once to sanity check refinement
shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0", "stat_i", -2));
// foo_s:tail is the only term with bar_s sub-pivot terms
for (int i = 0; i < 45; i++) {
// for sub-pivot, shard0 & shard1 have 6 docs each for "tailB"
// but the top 5 terms are ccc(0-4) -- 7 on each shard
// (4 docs each have junk terms)
String sub_term = (i < 35) ? "ccc" + (i % 5) : ((i < 41) ? "tailB" : "junkA");
shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i));
shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i));
// shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
// and 5 docs that use "tailB"
// NOTE: none of these get stat_i ! !
sub_term = (i < 40) ? "junkB" + (i % 5) : "tailB";
shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
}
// really long tail uncommon foo_s terms on shard2
for (int i = 0; i < 30; i++) {
shard2.add(sdoc("id", getDocNum(), "foo_s", "zzz" + i));
}
commit();
SolrParams req = params("q", "*:*", "distrib", "false", "facet", "true", "facet.limit", "10", "facet.pivot", "foo_s,bar_s");
// sanity check that our expectations about each shard (non-distrib) are correct
PivotField pivot = null;
List<PivotField> pivots = null;
List<PivotField>[] shardPivots = new List[3];
shardPivots[0] = shard0.query(req).getFacetPivot().get("foo_s,bar_s");
shardPivots[1] = shard1.query(req).getFacetPivot().get("foo_s,bar_s");
shardPivots[2] = shard2.query(req).getFacetPivot().get("foo_s,bar_s");
// top 5 same on all shards
for (int i = 0; i < 3; i++) {
assertEquals(10, shardPivots[i].size());
for (int j = 0; j < 5; j++) {
pivot = shardPivots[i].get(j);
assertEquals(pivot.toString(), "aaa" + j, pivot.getValue());
assertEquals(pivot.toString(), 100, pivot.getCount());
}
}
// top 6-10 same on shard0 & shard11
for (int i = 0; i < 2; i++) {
for (int j = 5; j < 10; j++) {
pivot = shardPivots[i].get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("bbb"));
assertEquals(pivot.toString(), 50, pivot.getCount());
}
}
// 6-10 on shard2
assertEquals("junkA", shardPivots[2].get(5).getValue());
assertEquals(50, shardPivots[2].get(5).getCount());
assertEquals("tail", shardPivots[2].get(6).getValue());
assertEquals(45, shardPivots[2].get(6).getCount());
assertEquals("bbb0", shardPivots[2].get(7).getValue());
assertEquals(1, shardPivots[2].get(7).getCount());
for (int j = 8; j < 10; j++) {
pivot = shardPivots[2].get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("zzz"));
assertEquals(pivot.toString(), 1, pivot.getCount());
}
// check sub-shardPivots on "tail" from shard2
pivots = shardPivots[2].get(6).getPivot();
assertEquals(6, pivots.size());
for (int j = 0; j < 5; j++) {
pivot = pivots.get(j);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("junkB"));
assertEquals(pivot.toString(), 8, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals("tailB", pivot.getValue());
assertEquals(5, pivot.getCount());
// if we disable overrequesting, we don't find the long tail
pivots = queryServer(params("q", "*:*", "shards", getShardsString(), FacetParams.FACET_OVERREQUEST_COUNT, "0", FacetParams.FACET_OVERREQUEST_RATIO, "0", "facet", "true", "facet.limit", "6", "facet.pivot", "{!stats=sxy}foo_s,bar_s", "stats", "true", "stats.field", "{!tag=sxy}stat_i")).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
{
// even w/o the long tail, we should have still asked shard2 to refine bbb0
pivot = pivots.get(5);
assertTrue(pivot.toString(), pivot.getValue().equals("bbb0"));
assertEquals(pivot.toString(), 101, pivot.getCount());
// basic check of refined stats
FieldStatsInfo bbb0Stats = pivot.getFieldStatsInfo().get("stat_i");
assertEquals("stat_i", bbb0Stats.getName());
assertEquals(-2.0, bbb0Stats.getMin());
assertEquals(1.0, bbb0Stats.getMax());
assertEquals(101, (long) bbb0Stats.getCount());
assertEquals(0, (long) bbb0Stats.getMissing());
assertEquals(48.0, bbb0Stats.getSum());
assertEquals(0.475247524752475, (double) bbb0Stats.getMean(), 0.1E-7);
assertEquals(54.0, bbb0Stats.getSumOfSquares(), 0.1E-7);
assertEquals(0.55846323792, bbb0Stats.getStddev(), 0.1E-7);
}
// (even if we disable overrequesting on the sub-pivot)
for (ModifiableSolrParams q : new ModifiableSolrParams[] { params(), params("f.bar_s.facet.overrequest.ratio", "0", "f.bar_s.facet.overrequest.count", "0") }) {
q.add(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "facet.pivot", "foo_s,bar_s"));
pivots = queryServer(q).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(6, pivots.size());
pivot = pivots.get(0);
assertEquals(pivot.toString(), "tailB", pivot.getValue());
assertEquals(pivot.toString(), 17, pivot.getCount());
for (int i = 1; i < 6; i++) {
// ccc(0-4)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
}
// if we lower the facet.limit on the sub-pivot, overrequesting should still ensure
// that we get the correct top5 including "tailB"
pivots = queryServer(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "f.bar_s.facet.limit", "5", "facet.pivot", "foo_s,bar_s")).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(5, pivots.size());
pivot = pivots.get(0);
assertEquals(pivot.toString(), "tailB", pivot.getValue());
assertEquals(pivot.toString(), 17, pivot.getCount());
for (int i = 1; i < 5; i++) {
// ccc(0-3)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
// however with a lower limit and overrequesting disabled,
// we're going to miss out on tailB
pivots = queryServer(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "f.bar_s.facet.overrequest.ratio", "0", "f.bar_s.facet.overrequest.count", "0", "f.bar_s.facet.limit", "5", "facet.pivot", "foo_s,bar_s")).getFacetPivot().get("foo_s,bar_s");
assertEquals(6, pivots.size());
for (int i = 0; i < 5; i++) {
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
assertEquals(pivot.toString(), 300, pivot.getCount());
}
pivot = pivots.get(5);
assertEquals(pivot.toString(), "tail", pivot.getValue());
assertEquals(pivot.toString(), 135, pivot.getCount());
// check the sub pivots
pivots = pivot.getPivot();
assertEquals(5, pivots.size());
for (int i = 0; i < 5; i++) {
// ccc(0-4)
pivot = pivots.get(i);
assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
assertEquals(pivot.toString(), 14, pivot.getCount());
}
doTestDeepPivotStats();
}
use of org.apache.solr.client.solrj.response.FieldStatsInfo in project lucene-solr by apache.
the class DistributedFacetPivotSmallAdvancedTest method doTestDeepPivotStatsOnString.
private void doTestDeepPivotStatsOnString() throws Exception {
SolrParams params = params("q", "*:*", "rows", "0", "shards", getShardsString(), "facet", "true", "stats", "true", "facet.pivot", "{!stats=s1}place_t,company_t", "stats.field", "{!key=avg_price tag=s1}foo_s");
QueryResponse rsp = queryServer(new ModifiableSolrParams(params));
List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
PivotField dublinPivotField = placePivots.get(0);
assertEquals("dublin", dublinPivotField.getValue());
assertEquals(4, dublinPivotField.getCount());
PivotField microsoftPivotField = dublinPivotField.getPivot().get(0);
assertEquals("microsoft", microsoftPivotField.getValue());
assertEquals(4, microsoftPivotField.getCount());
FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("aaa", dublinMicrosoftStatsInfo.getMin());
assertEquals("bbb", dublinMicrosoftStatsInfo.getMax());
assertEquals(4, (long) dublinMicrosoftStatsInfo.getCount());
assertEquals(0, (long) dublinMicrosoftStatsInfo.getMissing());
PivotField cardiffPivotField = placePivots.get(2);
assertEquals("cardiff", cardiffPivotField.getValue());
assertEquals(3, cardiffPivotField.getCount());
PivotField polecatPivotField = cardiffPivotField.getPivot().get(0);
assertEquals("polecat", polecatPivotField.getValue());
assertEquals(3, polecatPivotField.getCount());
FieldStatsInfo cardiffPolecatStatsInfo = polecatPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("aaa", cardiffPolecatStatsInfo.getMin());
assertEquals("bbb", cardiffPolecatStatsInfo.getMax());
assertEquals(3, (long) cardiffPolecatStatsInfo.getCount());
assertEquals(0, (long) cardiffPolecatStatsInfo.getMissing());
PivotField krakowPivotField = placePivots.get(3);
assertEquals("krakow", krakowPivotField.getValue());
assertEquals(3, krakowPivotField.getCount());
PivotField fujitsuPivotField = krakowPivotField.getPivot().get(3);
assertEquals("fujitsu", fujitsuPivotField.getValue());
assertEquals(1, fujitsuPivotField.getCount());
FieldStatsInfo krakowFujitsuStatsInfo = fujitsuPivotField.getFieldStatsInfo().get("avg_price");
assertEquals("aaa", krakowFujitsuStatsInfo.getMin());
assertEquals("aaa", krakowFujitsuStatsInfo.getMax());
assertEquals(1, (long) krakowFujitsuStatsInfo.getCount());
assertEquals(0, (long) krakowFujitsuStatsInfo.getMissing());
}
Aggregations