Search in sources :

Example 16 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class TokenizerTestCase method testExactMatchHeuristics.

@Test
public void testExactMatchHeuristics() {
    Index index1 = new Index("testexact1");
    index1.setExact(true, null);
    Index index2 = new Index("testexact2");
    index2.setExact(true, "()/aa*::*&");
    IndexFacts indexFacts = new IndexFacts();
    indexFacts.addIndex("testsd", index1);
    indexFacts.addIndex("testsd", index2);
    IndexFacts.Session facts = indexFacts.newSession(Collections.emptySet(), Collections.emptySet());
    Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
    List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:foo) testexact2:bar", facts);
    assertEquals(new Token(WORD, "normal"), tokens.get(0));
    assertEquals(new Token(SPACE, " "), tokens.get(1));
    assertEquals(new Token(WORD, "a"), tokens.get(2));
    assertEquals(new Token(COLON, ":"), tokens.get(3));
    assertEquals(new Token(WORD, "b"), tokens.get(4));
    assertEquals(new Token(SPACE, " "), tokens.get(5));
    assertEquals(new Token(LBRACE, "("), tokens.get(6));
    assertEquals(new Token(WORD, "normal"), tokens.get(7));
    assertEquals(new Token(SPACE, " "), tokens.get(8));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(9));
    assertEquals(new Token(COLON, ":"), tokens.get(10));
    assertEquals(new Token(WORD, "foo"), tokens.get(11));
    assertEquals(new Token(RBRACE, ")"), tokens.get(12));
    assertEquals(new Token(SPACE, " "), tokens.get(13));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(14));
    assertEquals(new Token(COLON, ":"), tokens.get(15));
    assertEquals(new Token(WORD, "bar"), tokens.get(16));
    tokens = tokenizer.tokenize("testexact1:a*teens", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "a*teens"), tokens.get(2));
    tokens = tokenizer.tokenize("testexact1:foo\"bar", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo\"bar"), tokens.get(2));
    tokens = tokenizer.tokenize("testexact1:foo!bar", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo!bar"), tokens.get(2));
    tokens = tokenizer.tokenize("testexact1:foo! ", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo"), tokens.get(2));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
    assertEquals(new Token(SPACE, " "), tokens.get(4));
    tokens = tokenizer.tokenize("testexact1:foo!! ", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo"), tokens.get(2));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(4));
    assertEquals(new Token(SPACE, " "), tokens.get(5));
    tokens = tokenizer.tokenize("testexact1:foo!100 ", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo"), tokens.get(2));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
    assertEquals(new Token(NUMBER, "100"), tokens.get(4));
    assertEquals(new Token(SPACE, " "), tokens.get(5));
    tokens = tokenizer.tokenize("testexact1:foo*!100 ", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo"), tokens.get(2));
    assertEquals(new Token(STAR, "*"), tokens.get(3));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(4));
    assertEquals(new Token(NUMBER, "100"), tokens.get(5));
    assertEquals(new Token(SPACE, " "), tokens.get(6));
    tokens = tokenizer.tokenize("testexact1: *\"foo bar\"*!100 ", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(STAR, "*"), tokens.get(2));
    assertEquals(new Token(WORD, "foo bar"), tokens.get(3));
    assertEquals(new Token(STAR, "*"), tokens.get(4));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(5));
    assertEquals(new Token(NUMBER, "100"), tokens.get(6));
    assertEquals(new Token(SPACE, " "), tokens.get(7));
    tokens = tokenizer.tokenize("testexact1: *\"foo bar\"*!100", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(STAR, "*"), tokens.get(2));
    assertEquals(new Token(WORD, "foo bar"), tokens.get(3));
    assertEquals(new Token(STAR, "*"), tokens.get(4));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(5));
    assertEquals(new Token(NUMBER, "100"), tokens.get(6));
    tokens = tokenizer.tokenize("testexact1: *foobar*!100", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(STAR, "*"), tokens.get(2));
    assertEquals(new Token(WORD, "foobar"), tokens.get(3));
    assertEquals(new Token(STAR, "*"), tokens.get(4));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(5));
    assertEquals(new Token(NUMBER, "100"), tokens.get(6));
    tokens = tokenizer.tokenize("testexact1: *foobar*!100!", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(STAR, "*"), tokens.get(2));
    assertEquals(new Token(WORD, "foobar*!100"), tokens.get(3));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(4));
    tokens = tokenizer.tokenize("testexact1:foo(bar)", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo(bar)"), tokens.get(2));
    tokens = tokenizer.tokenize("testexact1:\"foo\"", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo"), tokens.get(2));
    tokens = tokenizer.tokenize("testexact1: foo", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo"), tokens.get(2));
    tokens = tokenizer.tokenize("testexact1: \"foo\"", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo"), tokens.get(2));
    tokens = tokenizer.tokenize("testexact1: \"foo\"", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "foo"), tokens.get(2));
    tokens = tokenizer.tokenize("testexact1:vespa testexact2:resolved", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "vespa"), tokens.get(2));
    assertEquals(new Token(SPACE, " "), tokens.get(3));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(4));
    assertEquals(new Token(COLON, ":"), tokens.get(5));
    assertEquals(new Token(WORD, "resolved"), tokens.get(6));
    tokens = tokenizer.tokenize("testexact1:\"news search\" testexact2:resolved", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "news search"), tokens.get(2));
    assertEquals(new Token(SPACE, " "), tokens.get(3));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(4));
    assertEquals(new Token(COLON, ":"), tokens.get(5));
    assertEquals(new Token(WORD, "resolved"), tokens.get(6));
    tokens = tokenizer.tokenize("(testexact1:\"news search\" testexact1:vespa)", facts);
    assertEquals(new Token(LBRACE, "("), tokens.get(0));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(1));
    assertEquals(new Token(COLON, ":"), tokens.get(2));
    assertEquals(new Token(WORD, "news search"), tokens.get(3));
    assertEquals(new Token(SPACE, " "), tokens.get(4));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(5));
    assertEquals(new Token(COLON, ":"), tokens.get(6));
    assertEquals(new Token(WORD, "vespa"), tokens.get(7));
    assertEquals(new Token(RBRACE, ")"), tokens.get(8));
    tokens = tokenizer.tokenize("testexact1:news*", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "news"), tokens.get(2));
    assertEquals(new Token(STAR, "*"), tokens.get(3));
    tokens = tokenizer.tokenize("testexact1:\"news\"*", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "news"), tokens.get(2));
    assertEquals(new Token(STAR, "*"), tokens.get(3));
    tokens = tokenizer.tokenize("testexact1:\"news search\"!200", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "news search"), tokens.get(2));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
    assertEquals(new Token(NUMBER, "200"), tokens.get(4));
    tokens = tokenizer.tokenize("testexact1:vespa!200", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(WORD, "vespa"), tokens.get(2));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
    assertEquals(new Token(NUMBER, "200"), tokens.get(4));
    tokens = tokenizer.tokenize("testexact1:*\"news\"*", facts);
    assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
    assertEquals(new Token(COLON, ":"), tokens.get(1));
    assertEquals(new Token(STAR, "*"), tokens.get(2));
    assertEquals(new Token(WORD, "news"), tokens.get(3));
    assertEquals(new Token(STAR, "*"), tokens.get(4));
    tokens = tokenizer.tokenize("normal(testexact1:foo) testexact2:bar", facts);
    assertEquals(new Token(WORD, "normal"), tokens.get(0));
    assertEquals(new Token(LBRACE, "("), tokens.get(1));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
    assertEquals(new Token(COLON, ":"), tokens.get(3));
    assertEquals(new Token(WORD, "foo"), tokens.get(4));
    assertEquals(new Token(RBRACE, ")"), tokens.get(5));
    assertEquals(new Token(SPACE, " "), tokens.get(6));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
    assertEquals(new Token(COLON, ":"), tokens.get(8));
    assertEquals(new Token(WORD, "bar"), tokens.get(9));
    tokens = tokenizer.tokenize("normal testexact1:(foo testexact2:bar", facts);
    assertEquals(new Token(WORD, "normal"), tokens.get(0));
    assertEquals(new Token(SPACE, " "), tokens.get(1));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
    assertEquals(new Token(COLON, ":"), tokens.get(3));
    assertEquals(new Token(WORD, "(foo"), tokens.get(4));
    assertEquals(new Token(SPACE, " "), tokens.get(5));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(6));
    assertEquals(new Token(COLON, ":"), tokens.get(7));
    assertEquals(new Token(WORD, "bar"), tokens.get(8));
    tokens = tokenizer.tokenize("normal testexact1:foo! testexact2:bar", facts);
    assertEquals(new Token(WORD, "normal"), tokens.get(0));
    assertEquals(new Token(SPACE, " "), tokens.get(1));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
    assertEquals(new Token(COLON, ":"), tokens.get(3));
    assertEquals(new Token(WORD, "foo"), tokens.get(4));
    assertEquals(new Token(EXCLAMATION, "!"), tokens.get(5));
    assertEquals(new Token(SPACE, " "), tokens.get(6));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
    assertEquals(new Token(COLON, ":"), tokens.get(8));
    assertEquals(new Token(WORD, "bar"), tokens.get(9));
    tokens = tokenizer.tokenize("normal testexact1:foo* testexact2:bar", facts);
    assertEquals(new Token(WORD, "normal"), tokens.get(0));
    assertEquals(new Token(SPACE, " "), tokens.get(1));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
    assertEquals(new Token(COLON, ":"), tokens.get(3));
    assertEquals(new Token(WORD, "foo"), tokens.get(4));
    assertEquals(new Token(STAR, "*"), tokens.get(5));
    assertEquals(new Token(SPACE, " "), tokens.get(6));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
    assertEquals(new Token(COLON, ":"), tokens.get(8));
    assertEquals(new Token(WORD, "bar"), tokens.get(9));
    tokens = tokenizer.tokenize("normal testexact1: foo* testexact2:bar", facts);
    assertEquals(new Token(WORD, "normal"), tokens.get(0));
    assertEquals(new Token(SPACE, " "), tokens.get(1));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
    assertEquals(new Token(COLON, ":"), tokens.get(3));
    assertEquals(new Token(WORD, "foo"), tokens.get(4));
    assertEquals(new Token(STAR, "*"), tokens.get(5));
    assertEquals(new Token(SPACE, " "), tokens.get(6));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
    assertEquals(new Token(COLON, ":"), tokens.get(8));
    assertEquals(new Token(WORD, "bar"), tokens.get(9));
    tokens = tokenizer.tokenize("normal testexact1:\" foo\"* testexact2:bar", facts);
    assertEquals(new Token(WORD, "normal"), tokens.get(0));
    assertEquals(new Token(SPACE, " "), tokens.get(1));
    assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
    assertEquals(new Token(COLON, ":"), tokens.get(3));
    assertEquals(new Token(WORD, " foo"), tokens.get(4));
    assertEquals(new Token(STAR, "*"), tokens.get(5));
    assertEquals(new Token(SPACE, " "), tokens.get(6));
    assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
    assertEquals(new Token(COLON, ":"), tokens.get(8));
    assertEquals(new Token(WORD, "bar"), tokens.get(9));
}
Also used : SimpleLinguistics(com.yahoo.language.simple.SimpleLinguistics) IndexFacts(com.yahoo.prelude.IndexFacts) Index(com.yahoo.prelude.Index) Token(com.yahoo.prelude.query.parser.Token) Tokenizer(com.yahoo.prelude.query.parser.Tokenizer) Test(org.junit.Test)

Example 17 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class IndexCombinatorTestCase method setUp.

@Before
public void setUp() throws Exception {
    transformer = new IndexCombinatorSearcher();
    f = new IndexFacts();
    f.addIndex("one", "z");
    Index i = new Index("default");
    i.addCommand("match-group a i");
    f.addIndex("one", i);
}
Also used : IndexFacts(com.yahoo.prelude.IndexFacts) IndexCombinatorSearcher(com.yahoo.prelude.querytransform.IndexCombinatorSearcher) Index(com.yahoo.prelude.Index) Before(org.junit.Before)

Example 18 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class NGramSearcherTestCase method getMixedSetup.

private IndexFacts getMixedSetup() {
    IndexFacts indexFacts = new IndexFacts();
    String musicDoctype = "music";
    String songDoctype = "song";
    Index musicDefault = new Index("default");
    musicDefault.setNGram(true, 1);
    indexFacts.addIndex(musicDoctype, musicDefault);
    Index songDefault = new Index("default");
    indexFacts.addIndex(songDoctype, songDefault);
    Map<String, List<String>> clusters = new HashMap<>();
    clusters.put("musicOnly", Arrays.asList(new String[] { musicDoctype }));
    clusters.put("songOnly", Arrays.asList(new String[] { songDoctype }));
    clusters.put("musicAndSong", Arrays.asList(new String[] { musicDoctype, songDoctype }));
    indexFacts.setClusters(clusters);
    return indexFacts;
}
Also used : IndexFacts(com.yahoo.prelude.IndexFacts) HashMap(java.util.HashMap) Index(com.yahoo.prelude.Index) List(java.util.List) XMLString(com.yahoo.prelude.hitfield.XMLString) JSONString(com.yahoo.prelude.hitfield.JSONString)

Example 19 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class NGramSearcherTestCase method setUp.

@Override
public void setUp() {
    searcher = new NGramSearcher(new SimpleLinguistics());
    indexFacts = new IndexFacts();
    Index defaultIndex = new Index("default");
    defaultIndex.setNGram(true, 3);
    defaultIndex.setDynamicSummary(true);
    indexFacts.addIndex("default", defaultIndex);
    Index test = new Index("test");
    test.setHighlightSummary(true);
    indexFacts.addIndex("default", test);
    Index gram2 = new Index("gram2");
    gram2.setNGram(true, 2);
    gram2.setDynamicSummary(true);
    indexFacts.addIndex("default", gram2);
    Index gram3 = new Index("gram3");
    gram3.setNGram(true, 3);
    gram3.setHighlightSummary(true);
    indexFacts.addIndex("default", gram3);
    Index gram14 = new Index("gram14");
    gram14.setNGram(true, 14);
    gram14.setDynamicSummary(true);
    indexFacts.addIndex("default", gram14);
}
Also used : SimpleLinguistics(com.yahoo.language.simple.SimpleLinguistics) IndexFacts(com.yahoo.prelude.IndexFacts) Index(com.yahoo.prelude.Index) NGramSearcher(com.yahoo.search.querytransform.NGramSearcher)

Example 20 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class QueryTestCase method assertDetectionText.

private void assertDetectionText(String expectedDetectionText, String queryString, String... indexSpecs) {
    Query q = new Query(httpEncode("/?query=" + queryString));
    IndexFacts indexFacts = new IndexFacts();
    for (String indexSpec : indexSpecs) {
        String[] specParts = indexSpec.split(":");
        Index tokenIndex = new Index(specParts[1]);
        if (specParts[0].equals("text"))
            tokenIndex.setPlainTokens(true);
        indexFacts.addIndex("testSearchDefinition", tokenIndex);
    }
    MockLinguistics mockLinguistics = new MockLinguistics();
    q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, indexFacts, mockLinguistics)));
    // cause parsing
    q.getModel().getQueryTree();
    assertEquals(expectedDetectionText, mockLinguistics.detector.lastDetectionText);
}
Also used : Execution(com.yahoo.search.searchchain.Execution) Query(com.yahoo.search.Query) IndexFacts(com.yahoo.prelude.IndexFacts) Index(com.yahoo.prelude.Index) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString)

Aggregations

Index (com.yahoo.prelude.Index)36 IndexFacts (com.yahoo.prelude.IndexFacts)23 Test (org.junit.Test)13 SimpleLinguistics (com.yahoo.language.simple.SimpleLinguistics)7 Query (com.yahoo.search.Query)6 SearchDefinition (com.yahoo.prelude.SearchDefinition)5 Token (com.yahoo.prelude.query.parser.Token)5 Tokenizer (com.yahoo.prelude.query.parser.Tokenizer)5 Execution (com.yahoo.search.searchchain.Execution)4 JSONString (com.yahoo.prelude.hitfield.JSONString)2 XMLString (com.yahoo.prelude.hitfield.XMLString)2 CompositeItem (com.yahoo.prelude.query.CompositeItem)2 Hit (com.yahoo.search.result.Hit)2 List (java.util.List)2 Before (org.junit.Before)2 Chain (com.yahoo.component.chain.Chain)1 Language (com.yahoo.language.Language)1 StemMode (com.yahoo.language.process.StemMode)1 IndexModel (com.yahoo.prelude.IndexModel)1 FastHit (com.yahoo.prelude.fastsearch.FastHit)1