Search in sources :

Example 11 with FieldStatsInfo

use of org.apache.solr.client.solrj.response.FieldStatsInfo in project lucene-solr by apache.

the class DistributedFacetPivotLongTailTest method test.

@Test
@ShardsFixed(num = 3)
public void test() throws Exception {
    final SolrClient shard0 = clients.get(0);
    final SolrClient shard1 = clients.get(1);
    final SolrClient shard2 = clients.get(2);
    // the 5 top foo_s terms have 100 docs each on every shard
    for (int i = 0; i < 100; i++) {
        for (int j = 0; j < 5; j++) {
            shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", j * 13 - i));
            shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", j * 3 + i));
            shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa" + j, "stat_i", i * 7 + j));
        }
    }
    // on both shard0 & shard1 ("bbb_")
    for (int i = 0; i < 50; i++) {
        for (int j = 0; j < 20; j++) {
            shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb" + j, "stat_i", 0));
            shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb" + j, "stat_i", 1));
        }
        // distracting term appears on only on shard2 50 times
        shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA"));
    }
    // put "bbb0" on shard2 exactly once to sanity check refinement
    shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0", "stat_i", -2));
    // foo_s:tail is the only term with bar_s sub-pivot terms
    for (int i = 0; i < 45; i++) {
        // for sub-pivot, shard0 & shard1 have 6 docs each for "tailB"
        // but the top 5 terms are ccc(0-4) -- 7 on each shard
        // (4 docs each have junk terms)
        String sub_term = (i < 35) ? "ccc" + (i % 5) : ((i < 41) ? "tailB" : "junkA");
        shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i));
        shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term, "stat_i", i));
        // shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
        // and 5 docs that use "tailB"
        // NOTE: none of these get stat_i ! !
        sub_term = (i < 40) ? "junkB" + (i % 5) : "tailB";
        shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
    }
    // really long tail uncommon foo_s terms on shard2
    for (int i = 0; i < 30; i++) {
        shard2.add(sdoc("id", getDocNum(), "foo_s", "zzz" + i));
    }
    commit();
    SolrParams req = params("q", "*:*", "distrib", "false", "facet", "true", "facet.limit", "10", "facet.pivot", "foo_s,bar_s");
    // sanity check that our expectations about each shard (non-distrib) are correct
    PivotField pivot = null;
    List<PivotField> pivots = null;
    List<PivotField>[] shardPivots = new List[3];
    shardPivots[0] = shard0.query(req).getFacetPivot().get("foo_s,bar_s");
    shardPivots[1] = shard1.query(req).getFacetPivot().get("foo_s,bar_s");
    shardPivots[2] = shard2.query(req).getFacetPivot().get("foo_s,bar_s");
    // top 5 same on all shards
    for (int i = 0; i < 3; i++) {
        assertEquals(10, shardPivots[i].size());
        for (int j = 0; j < 5; j++) {
            pivot = shardPivots[i].get(j);
            assertEquals(pivot.toString(), "aaa" + j, pivot.getValue());
            assertEquals(pivot.toString(), 100, pivot.getCount());
        }
    }
    // top 6-10 same on shard0 & shard11
    for (int i = 0; i < 2; i++) {
        for (int j = 5; j < 10; j++) {
            pivot = shardPivots[i].get(j);
            assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("bbb"));
            assertEquals(pivot.toString(), 50, pivot.getCount());
        }
    }
    // 6-10 on shard2
    assertEquals("junkA", shardPivots[2].get(5).getValue());
    assertEquals(50, shardPivots[2].get(5).getCount());
    assertEquals("tail", shardPivots[2].get(6).getValue());
    assertEquals(45, shardPivots[2].get(6).getCount());
    assertEquals("bbb0", shardPivots[2].get(7).getValue());
    assertEquals(1, shardPivots[2].get(7).getCount());
    for (int j = 8; j < 10; j++) {
        pivot = shardPivots[2].get(j);
        assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("zzz"));
        assertEquals(pivot.toString(), 1, pivot.getCount());
    }
    // check sub-shardPivots on "tail" from shard2
    pivots = shardPivots[2].get(6).getPivot();
    assertEquals(6, pivots.size());
    for (int j = 0; j < 5; j++) {
        pivot = pivots.get(j);
        assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("junkB"));
        assertEquals(pivot.toString(), 8, pivot.getCount());
    }
    pivot = pivots.get(5);
    assertEquals("tailB", pivot.getValue());
    assertEquals(5, pivot.getCount());
    // if we disable overrequesting, we don't find the long tail
    pivots = queryServer(params("q", "*:*", "shards", getShardsString(), FacetParams.FACET_OVERREQUEST_COUNT, "0", FacetParams.FACET_OVERREQUEST_RATIO, "0", "facet", "true", "facet.limit", "6", "facet.pivot", "{!stats=sxy}foo_s,bar_s", "stats", "true", "stats.field", "{!tag=sxy}stat_i")).getFacetPivot().get("foo_s,bar_s");
    assertEquals(6, pivots.size());
    for (int i = 0; i < 5; i++) {
        pivot = pivots.get(i);
        assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
        assertEquals(pivot.toString(), 300, pivot.getCount());
    }
    {
        // even w/o the long tail, we should have still asked shard2 to refine bbb0
        pivot = pivots.get(5);
        assertTrue(pivot.toString(), pivot.getValue().equals("bbb0"));
        assertEquals(pivot.toString(), 101, pivot.getCount());
        // basic check of refined stats
        FieldStatsInfo bbb0Stats = pivot.getFieldStatsInfo().get("stat_i");
        assertEquals("stat_i", bbb0Stats.getName());
        assertEquals(-2.0, bbb0Stats.getMin());
        assertEquals(1.0, bbb0Stats.getMax());
        assertEquals(101, (long) bbb0Stats.getCount());
        assertEquals(0, (long) bbb0Stats.getMissing());
        assertEquals(48.0, bbb0Stats.getSum());
        assertEquals(0.475247524752475, (double) bbb0Stats.getMean(), 0.1E-7);
        assertEquals(54.0, bbb0Stats.getSumOfSquares(), 0.1E-7);
        assertEquals(0.55846323792, bbb0Stats.getStddev(), 0.1E-7);
    }
    // (even if we disable overrequesting on the sub-pivot)
    for (ModifiableSolrParams q : new ModifiableSolrParams[] { params(), params("f.bar_s.facet.overrequest.ratio", "0", "f.bar_s.facet.overrequest.count", "0") }) {
        q.add(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "facet.pivot", "foo_s,bar_s"));
        pivots = queryServer(q).getFacetPivot().get("foo_s,bar_s");
        assertEquals(6, pivots.size());
        for (int i = 0; i < 5; i++) {
            pivot = pivots.get(i);
            assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
            assertEquals(pivot.toString(), 300, pivot.getCount());
        }
        pivot = pivots.get(5);
        assertEquals(pivot.toString(), "tail", pivot.getValue());
        assertEquals(pivot.toString(), 135, pivot.getCount());
        // check the sub pivots
        pivots = pivot.getPivot();
        assertEquals(6, pivots.size());
        pivot = pivots.get(0);
        assertEquals(pivot.toString(), "tailB", pivot.getValue());
        assertEquals(pivot.toString(), 17, pivot.getCount());
        for (int i = 1; i < 6; i++) {
            // ccc(0-4)
            pivot = pivots.get(i);
            assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
            assertEquals(pivot.toString(), 14, pivot.getCount());
        }
    }
    // if we lower the facet.limit on the sub-pivot, overrequesting should still ensure 
    // that we get the correct top5 including "tailB"
    pivots = queryServer(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "f.bar_s.facet.limit", "5", "facet.pivot", "foo_s,bar_s")).getFacetPivot().get("foo_s,bar_s");
    assertEquals(6, pivots.size());
    for (int i = 0; i < 5; i++) {
        pivot = pivots.get(i);
        assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
        assertEquals(pivot.toString(), 300, pivot.getCount());
    }
    pivot = pivots.get(5);
    assertEquals(pivot.toString(), "tail", pivot.getValue());
    assertEquals(pivot.toString(), 135, pivot.getCount());
    // check the sub pivots
    pivots = pivot.getPivot();
    assertEquals(5, pivots.size());
    pivot = pivots.get(0);
    assertEquals(pivot.toString(), "tailB", pivot.getValue());
    assertEquals(pivot.toString(), 17, pivot.getCount());
    for (int i = 1; i < 5; i++) {
        // ccc(0-3)
        pivot = pivots.get(i);
        assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
        assertEquals(pivot.toString(), 14, pivot.getCount());
    }
    // however with a lower limit and overrequesting disabled, 
    // we're going to miss out on tailB
    pivots = queryServer(params("q", "*:*", "shards", getShardsString(), "facet", "true", "facet.limit", "6", "f.bar_s.facet.overrequest.ratio", "0", "f.bar_s.facet.overrequest.count", "0", "f.bar_s.facet.limit", "5", "facet.pivot", "foo_s,bar_s")).getFacetPivot().get("foo_s,bar_s");
    assertEquals(6, pivots.size());
    for (int i = 0; i < 5; i++) {
        pivot = pivots.get(i);
        assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
        assertEquals(pivot.toString(), 300, pivot.getCount());
    }
    pivot = pivots.get(5);
    assertEquals(pivot.toString(), "tail", pivot.getValue());
    assertEquals(pivot.toString(), 135, pivot.getCount());
    // check the sub pivots
    pivots = pivot.getPivot();
    assertEquals(5, pivots.size());
    for (int i = 0; i < 5; i++) {
        // ccc(0-4)
        pivot = pivots.get(i);
        assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
        assertEquals(pivot.toString(), 14, pivot.getCount());
    }
    doTestDeepPivotStats();
}
Also used : FieldStatsInfo(org.apache.solr.client.solrj.response.FieldStatsInfo) SolrClient(org.apache.solr.client.solrj.SolrClient) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrParams(org.apache.solr.common.params.SolrParams) PivotField(org.apache.solr.client.solrj.response.PivotField) List(java.util.List) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) Test(org.junit.Test)

Example 12 with FieldStatsInfo

use of org.apache.solr.client.solrj.response.FieldStatsInfo in project lucene-solr by apache.

the class DistributedFacetPivotSmallAdvancedTest method doTestDeepPivotStatsOnString.

private void doTestDeepPivotStatsOnString() throws Exception {
    SolrParams params = params("q", "*:*", "rows", "0", "shards", getShardsString(), "facet", "true", "stats", "true", "facet.pivot", "{!stats=s1}place_t,company_t", "stats.field", "{!key=avg_price tag=s1}foo_s");
    QueryResponse rsp = queryServer(new ModifiableSolrParams(params));
    List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
    PivotField dublinPivotField = placePivots.get(0);
    assertEquals("dublin", dublinPivotField.getValue());
    assertEquals(4, dublinPivotField.getCount());
    PivotField microsoftPivotField = dublinPivotField.getPivot().get(0);
    assertEquals("microsoft", microsoftPivotField.getValue());
    assertEquals(4, microsoftPivotField.getCount());
    FieldStatsInfo dublinMicrosoftStatsInfo = microsoftPivotField.getFieldStatsInfo().get("avg_price");
    assertEquals("aaa", dublinMicrosoftStatsInfo.getMin());
    assertEquals("bbb", dublinMicrosoftStatsInfo.getMax());
    assertEquals(4, (long) dublinMicrosoftStatsInfo.getCount());
    assertEquals(0, (long) dublinMicrosoftStatsInfo.getMissing());
    PivotField cardiffPivotField = placePivots.get(2);
    assertEquals("cardiff", cardiffPivotField.getValue());
    assertEquals(3, cardiffPivotField.getCount());
    PivotField polecatPivotField = cardiffPivotField.getPivot().get(0);
    assertEquals("polecat", polecatPivotField.getValue());
    assertEquals(3, polecatPivotField.getCount());
    FieldStatsInfo cardiffPolecatStatsInfo = polecatPivotField.getFieldStatsInfo().get("avg_price");
    assertEquals("aaa", cardiffPolecatStatsInfo.getMin());
    assertEquals("bbb", cardiffPolecatStatsInfo.getMax());
    assertEquals(3, (long) cardiffPolecatStatsInfo.getCount());
    assertEquals(0, (long) cardiffPolecatStatsInfo.getMissing());
    PivotField krakowPivotField = placePivots.get(3);
    assertEquals("krakow", krakowPivotField.getValue());
    assertEquals(3, krakowPivotField.getCount());
    PivotField fujitsuPivotField = krakowPivotField.getPivot().get(3);
    assertEquals("fujitsu", fujitsuPivotField.getValue());
    assertEquals(1, fujitsuPivotField.getCount());
    FieldStatsInfo krakowFujitsuStatsInfo = fujitsuPivotField.getFieldStatsInfo().get("avg_price");
    assertEquals("aaa", krakowFujitsuStatsInfo.getMin());
    assertEquals("aaa", krakowFujitsuStatsInfo.getMax());
    assertEquals(1, (long) krakowFujitsuStatsInfo.getCount());
    assertEquals(0, (long) krakowFujitsuStatsInfo.getMissing());
}
Also used : FieldStatsInfo(org.apache.solr.client.solrj.response.FieldStatsInfo) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrParams(org.apache.solr.common.params.SolrParams) PivotField(org.apache.solr.client.solrj.response.PivotField) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams)

Aggregations

FieldStatsInfo (org.apache.solr.client.solrj.response.FieldStatsInfo)12 QueryResponse (org.apache.solr.client.solrj.response.QueryResponse)9 PivotField (org.apache.solr.client.solrj.response.PivotField)8 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)7 SolrParams (org.apache.solr.common.params.SolrParams)6 Test (org.junit.Test)4 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 SolrClient (org.apache.solr.client.solrj.SolrClient)2 ErrorTrackingConcurrentUpdateSolrClient (org.apache.solr.client.solrj.embedded.SolrExampleStreamingTest.ErrorTrackingConcurrentUpdateSolrClient)2 SolrInputDocument (org.apache.solr.common.SolrInputDocument)2 NamedList (org.apache.solr.common.util.NamedList)2 StringContains.containsString (org.junit.internal.matchers.StringContains.containsString)2 Iterator (java.util.Iterator)1 Map (java.util.Map)1 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)1 EnumFieldValue (org.apache.solr.common.EnumFieldValue)1 SolrDocumentList (org.apache.solr.common.SolrDocumentList)1 SolrException (org.apache.solr.common.SolrException)1