Search in sources :

Example 26 with LindenResult

use of com.xiaomi.linden.thrift.common.LindenResult in project linden by XiaoMi.

the class TestLindenMMSeg4jAnalyzer method testQueryString.

@Test
public void testQueryString() throws IOException {
    String bql = "select * from linden by query is \"title:刘华清\"";
    LindenSearchRequest request = bqlCompiler.compile(bql).getSearchRequest();
    LindenResult result = lindenCore.search(request);
    Assert.assertEquals(3, result.getTotalHits());
    // phrase test
    bql = "select * from linden by query is 'title:\"刘华清\"'";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result = lindenCore.search(request);
    Assert.assertEquals(2, result.getTotalHits());
    bql = "select * from linden by query is 'title:\"海军上将刘华清\"'";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result = lindenCore.search(request);
    Assert.assertEquals(2, result.getTotalHits());
    // snippet test
    bql = "select * from linden by query is 'title:(上将刘华清中国龙)' snippet title";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result = lindenCore.search(request);
    Assert.assertEquals(3, result.getTotalHits());
    Assert.assertEquals("海军<b>上将</b><b>刘</b><b>华</b><b>清</b>!!! <b>中国</b> 人民 李玉洁 尚铁<b>龙</b> 胡晓光", result.getHits().get(0).getSnippets().get("title").getSnippet());
    bql = "select * from linden by query is 'title:(海军中国铁龙)' snippet title";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result = lindenCore.search(request);
    Assert.assertEquals(2, result.getTotalHits());
    Assert.assertEquals("<b>海军</b>上将刘华清!!! <b>中国</b> 人民 李玉洁 尚<b>铁</b><b>龙</b> 胡晓光", result.getHits().get(0).getSnippets().get("title").getSnippet());
}
Also used : LindenResult(com.xiaomi.linden.thrift.common.LindenResult) LindenSearchRequest(com.xiaomi.linden.thrift.common.LindenSearchRequest) Test(org.junit.Test)

Example 27 with LindenResult

use of com.xiaomi.linden.thrift.common.LindenResult in project linden by XiaoMi.

the class TestFlexibleQuery method testDynamicField.

@Test
public void testDynamicField() throws IOException {
    String bql = "SELECT * FROM LINDEN BY query is 'text:(hello world)'";
    LindenSearchRequest request = bqlCompiler.compile(bql).getSearchRequest();
    LindenResult result0 = lindenCore.search(request);
    Assert.assertEquals(4, result0.getHitsSize());
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world\" in (text.STRING) USING MODEL simplest BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    LindenResult result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals(result0.getHits(), result1.getHits());
    // Field boost is 2
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world\" in (text.string^2) USING MODEL fieldBoost1 BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals(result0.getHits().get(0).getScore() * 2, result1.getHits().get(0).getScore(), 0.001);
    Assert.assertEquals(result0.getHits().get(1).getScore() * 2, result1.getHits().get(1).getScore(), 0.001);
    Assert.assertEquals(result0.getHits().get(2).getScore() * 2, result1.getHits().get(2).getScore(), 0.001);
    Assert.assertEquals(result0.getHits().get(3).getScore() * 2, result1.getHits().get(3).getScore(), 0.001);
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello lucene\" in (text.STRING, title.STRING^2) USING MODEL fieldBoost2 BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals("0", result1.getHits().get(0).id);
    Assert.assertEquals("2", result1.getHits().get(1).id);
    Assert.assertEquals("1", result1.getHits().get(2).id);
    Assert.assertEquals("3", result1.getHits().get(3).id);
}
Also used : LindenResult(com.xiaomi.linden.thrift.common.LindenResult) LindenSearchRequest(com.xiaomi.linden.thrift.common.LindenSearchRequest) Test(org.junit.Test)

Example 28 with LindenResult

use of com.xiaomi.linden.thrift.common.LindenResult in project linden by XiaoMi.

the class TestFlexibleQuery method testMatchRatio.

@Test
public void testMatchRatio() throws IOException {
    String bql = "SELECT * FROM LINDEN BY flexible_query is \"hello lucene\" full_match in (title) USING MODEL simplest BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    LindenSearchRequest request = bqlCompiler.compile(bql).getSearchRequest();
    LindenResult result = lindenCore.search(request);
    Assert.assertEquals(1, result.getHitsSize());
    Assert.assertEquals("0", result.getHits().get(0).id);
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world lucene\" match 1 in (title) USING MODEL simplest BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result = lindenCore.search(request);
    Assert.assertEquals(0, result.getHitsSize());
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world lucene\" match 0.5 in (title) USING MODEL simplest BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    System.out.println(bql);
    request = bqlCompiler.compile(bql).getSearchRequest();
    result = lindenCore.search(request);
    Assert.assertEquals(2, result.getHitsSize());
    Assert.assertEquals("0", result.getHits().get(0).id);
    Assert.assertEquals("1", result.getHits().get(1).id);
}
Also used : LindenResult(com.xiaomi.linden.thrift.common.LindenResult) LindenSearchRequest(com.xiaomi.linden.thrift.common.LindenSearchRequest) Test(org.junit.Test)

Example 29 with LindenResult

use of com.xiaomi.linden.thrift.common.LindenResult in project linden by XiaoMi.

the class TestFlexibleQuery method testBasic.

@Test
public void testBasic() throws IOException {
    String bql = "SELECT * FROM LINDEN BY query is 'text:(hello world)'";
    LindenSearchRequest request = bqlCompiler.compile(bql).getSearchRequest();
    LindenResult result0 = lindenCore.search(request);
    Assert.assertEquals(4, result0.getHitsSize());
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world\" in (text) USING MODEL simplest BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    LindenResult result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals(result0.getHits(), result1.getHits());
    // Field boost is 2
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world\" in (text^2) USING MODEL fieldBoost1 BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals(result0.getHits().get(0).getScore() * 2, result1.getHits().get(0).getScore(), 0.001);
    Assert.assertEquals(result0.getHits().get(1).getScore() * 2, result1.getHits().get(1).getScore(), 0.001);
    Assert.assertEquals(result0.getHits().get(2).getScore() * 2, result1.getHits().get(2).getScore(), 0.001);
    Assert.assertEquals(result0.getHits().get(3).getScore() * 2, result1.getHits().get(3).getScore(), 0.001);
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello lucene\" in (text, title^2) USING MODEL fieldBoost2 BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals("0", result1.getHits().get(0).id);
    Assert.assertEquals("2", result1.getHits().get(1).id);
    Assert.assertEquals("1", result1.getHits().get(2).id);
    Assert.assertEquals("3", result1.getHits().get(3).id);
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello lucene\" in (text, title) USING MODEL noBoost BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals("0", result1.getHits().get(0).id);
    Assert.assertEquals("1", result1.getHits().get(1).id);
    Assert.assertEquals("2", result1.getHits().get(2).id);
    Assert.assertEquals("3", result1.getHits().get(3).id);
    // Term "text:world" boost is 3
    bql = "SELECT * FROM LINDEN BY query is 'text:(hello world^3)'";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result0 = lindenCore.search(request);
    Assert.assertEquals(4, result0.getHitsSize());
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world^3\" in (text) USING MODEL termBoost BEGIN\n" + "   float sum = 0f;\n" + "    for (int i = 0; i < getFieldLength(); ++i) {\n" + "        for (int j = 0; j < getTermLength(); ++j) {\n" + "            if (isMatched(i, j)) {\n" + "                sum += getScore(i, j);\n" + "            }\n" + "        } \n" + "    } \n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals(result0.getHits(), result1.getHits());
    bql = "SELECT * FROM LINDEN BY query is 'text:(hello world lucene)'";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result0 = lindenCore.search(request);
    Assert.assertEquals(4, result0.getHitsSize());
    Assert.assertEquals("1", result0.getHits().get(0).id);
    Assert.assertEquals("3", result0.getHits().get(1).id);
    Assert.assertEquals("0", result0.getHits().get(2).id);
    Assert.assertEquals("2", result0.getHits().get(3).id);
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world lucene\" in (text) USING MODEL continuousMatchBoost BEGIN\n" + "   float sum = 0f;\n" + "   int continuousMatches = 0;\n" + "     for (int i = 0; i < getFieldLength(); ++i) {\n" + "      int lastMatechedTermIdx = Integer.MIN_VALUE;\n" + "      int[] lastPositions = null;\n" + "      int[] curPositions;\n" + "      for (int j = 0; j < getTermLength(); ++j) {\n" + "        if (isMatched(i, j)) {\n" + "          curPositions = positions(i, j);\n" + "          if (lastMatechedTermIdx + 1 == j) {\n" + "            for (int ii = 0; ii < lastPositions.length; ++ii)\n" + "              for (int jj = 0; jj < curPositions.length; ++jj) {\n" + "                if (lastPositions[ii] + 1 == curPositions[jj]) {\n" + "                  ++continuousMatches;\n" + "                }\n" + "              }\n" + "          }\n" + "          lastMatechedTermIdx = j;\n" + "          lastPositions = curPositions;\n" + "          sum += getScore(i, j);\n" + "        }\n" + "      }\n" + "    }\n" + "    sum  += continuousMatches * 0.5;\n" + "    return sum;\n" + "END\n" + "Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals("3", result1.getHits().get(0).id);
    Assert.assertEquals("1", result1.getHits().get(1).id);
    Assert.assertEquals("0", result1.getHits().get(2).id);
    Assert.assertEquals("2", result1.getHits().get(3).id);
    Assert.assertEquals(1.0, result1.getHits().get(0).score - result0.getHits().get(1).score, 0.001);
    Assert.assertEquals(0.5, result1.getHits().get(1).score - result0.getHits().get(0).score, 0.001);
    Assert.assertEquals(0.5, result1.getHits().get(2).score - result0.getHits().get(2).score, 0.001);
    Assert.assertEquals(0, result1.getHits().get(3).score - result0.getHits().get(3).score, 0.001);
    bql = "SELECT * FROM LINDEN BY flexible_query is \"hello world lucene\" in (text) USING MODEL continuousMatchBoostExplained BEGIN\n" + "   setRootExpl(\"Explanation of continuousMatchBoostExplained model:\");\n" + "   float sum = 0f;\n" + "   for (int i = 0; i < getFieldLength(); ++i) {\n" + "      int continuousMatches = 0;\n" + "      float fieldScore = 0f;\n" + "      int lastMatechedTermIdx = Integer.MIN_VALUE;\n" + "      int[] lastPositions = null;\n" + "      int[] curPositions;\n" + "      for (int j = 0; j < getTermLength(); ++j) {\n" + "        if (isMatched(i, j)) {\n" + "          curPositions = positions(i, j);\n" + "          if (lastMatechedTermIdx + 1 == j) {\n" + "            for (int ii = 0; ii < lastPositions.length; ++ii)\n" + "              for (int jj = 0; jj < curPositions.length; ++jj) {\n" + "                if (lastPositions[ii] + 1 == curPositions[jj]) {\n" + "                  ++continuousMatches;\n" + "                }\n" + "              }\n" + "          }\n" + "          lastMatechedTermIdx = j;\n" + "          lastPositions = curPositions;\n" + "          float termScore = getScore(i, j);\n" + "          fieldScore += termScore;\n" + "          addTermExpl(i, j, termScore, getExpl(\"%s is matched in %s field, positions are %s\", text(i, j), field(i, j), curPositions));\n" + "        }\n" + "      }\n" + "      fieldScore += continuousMatches * 0.5;\n" + "      addFieldExpl(i, fieldScore, getExpl(\"%d continuous matches in %s field\", continuousMatches, field(i, 0)));\n" + "      sum += fieldScore;\n" + "    }\n" + "    return sum;\n" + "END\n" + "EXPLAIN Limit 0, 10\n";
    request = bqlCompiler.compile(bql).getSearchRequest();
    result1 = lindenCore.search(request);
    Assert.assertEquals(4, result1.getHitsSize());
    Assert.assertEquals("3", result1.getHits().get(0).id);
    Assert.assertEquals("1", result1.getHits().get(1).id);
    Assert.assertEquals("0", result1.getHits().get(2).id);
    Assert.assertEquals("2", result1.getHits().get(3).id);
    Assert.assertEquals(1.0, result1.getHits().get(0).score - result0.getHits().get(1).score, 0.001);
    Assert.assertEquals(0.5, result1.getHits().get(1).score - result0.getHits().get(0).score, 0.001);
    Assert.assertEquals(0.5, result1.getHits().get(2).score - result0.getHits().get(2).score, 0.001);
    Assert.assertEquals(0, result1.getHits().get(3).score - result0.getHits().get(3).score, 0.001);
    for (int i = 0; i < 4; ++i) {
        System.out.println("\nExplanation of Doc " + result1.getHits().get(i).id);
        OutputExplanation(result1.getHits().get(i).getExplanation(), "*");
    }
}
Also used : LindenResult(com.xiaomi.linden.thrift.common.LindenResult) LindenSearchRequest(com.xiaomi.linden.thrift.common.LindenSearchRequest) Test(org.junit.Test)

Example 30 with LindenResult

use of com.xiaomi.linden.thrift.common.LindenResult in project linden by XiaoMi.

the class TestGlobalIDF method testGlobalIdfV2.

@Test
public void testGlobalIdfV2() throws IOException {
    String bql1 = "select * from linden by flexible_query is 'lucene' global_idf of (title, field1) in (field1)\n" + "using model test\n" + "begin\n" + "    return 1;\n" + "end\n" + "source explain;";
    LindenSearchRequest request1 = bqlCompiler.compile(bql1).getSearchRequest();
    LindenResult result1 = lindenCore.search(request1);
    String detail1 = result1.getHits().get(0).getExplanation().getDetails().get(0).getDetails().get(0).getDescription();
    Assert.assertEquals(detail1.split(" \\* ")[0], "0.82");
    String bql2 = "select * from linden by flexible_query is 'lucene' global_idf of (field1, field2) in (field1)\n" + "using model test\n" + "begin\n" + "    return 1;\n" + "end\n" + "source explain;";
    LindenSearchRequest request2 = bqlCompiler.compile(bql2).getSearchRequest();
    LindenResult result2 = lindenCore.search(request2);
    String detail2 = result2.getHits().get(0).getExplanation().getDetails().get(0).getDetails().get(0).getDescription();
    Assert.assertEquals(detail2.split(" \\* ")[0], "1.22");
    String bql3 = "select * from linden by flexible_query is 'lucene' global_idf in (field1)\n" + "using model test\n" + "begin\n" + "    return 1;\n" + "end\n" + "source explain;";
    LindenSearchRequest request3 = bqlCompiler.compile(bql3).getSearchRequest();
    LindenResult result3 = lindenCore.search(request3);
    String detail3 = result3.getHits().get(0).getExplanation().getDetails().get(0).getDetails().get(0).getDescription();
    Assert.assertEquals(detail3.split(" \\* ")[0], "1.51");
}
Also used : LindenResult(com.xiaomi.linden.thrift.common.LindenResult) LindenSearchRequest(com.xiaomi.linden.thrift.common.LindenSearchRequest) Test(org.junit.Test)

Aggregations

LindenResult (com.xiaomi.linden.thrift.common.LindenResult)79 LindenSearchRequest (com.xiaomi.linden.thrift.common.LindenSearchRequest)69 Test (org.junit.Test)69 IOException (java.io.IOException)7 LindenQuery (com.xiaomi.linden.thrift.common.LindenQuery)5 JSONObject (com.alibaba.fastjson.JSONObject)4 Stopwatch (com.google.common.base.Stopwatch)4 MultiLindenCoreImpl (com.xiaomi.linden.core.search.MultiLindenCoreImpl)3 LindenFlexibleQueryBuilder (com.xiaomi.linden.thrift.builder.query.LindenFlexibleQueryBuilder)3 LindenDeleteRequest (com.xiaomi.linden.thrift.common.LindenDeleteRequest)3 LindenHit (com.xiaomi.linden.thrift.common.LindenHit)3 LindenRequest (com.xiaomi.linden.thrift.common.LindenRequest)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 FlexibleQuery (com.xiaomi.linden.lucene.query.flexiblequery.FlexibleQuery)2 Response (com.xiaomi.linden.thrift.common.Response)2 BooleanQuery (org.apache.lucene.search.BooleanQuery)2 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)2 FilteredQuery (org.apache.lucene.search.FilteredQuery)2 Query (org.apache.lucene.search.Query)2