use of com.yahoo.prelude.Index in project vespa by vespa-engine.
the class TokenizerTestCase method testExactMatchHeuristics.
@Test
public void testExactMatchHeuristics() {
Index index1 = new Index("testexact1");
index1.setExact(true, null);
Index index2 = new Index("testexact2");
index2.setExact(true, "()/aa*::*&");
IndexFacts indexFacts = new IndexFacts();
indexFacts.addIndex("testsd", index1);
indexFacts.addIndex("testsd", index2);
IndexFacts.Session facts = indexFacts.newSession(Collections.emptySet(), Collections.emptySet());
Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:foo) testexact2:bar", facts);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
assertEquals(new Token(WORD, "a"), tokens.get(2));
assertEquals(new Token(COLON, ":"), tokens.get(3));
assertEquals(new Token(WORD, "b"), tokens.get(4));
assertEquals(new Token(SPACE, " "), tokens.get(5));
assertEquals(new Token(LBRACE, "("), tokens.get(6));
assertEquals(new Token(WORD, "normal"), tokens.get(7));
assertEquals(new Token(SPACE, " "), tokens.get(8));
assertEquals(new Token(WORD, "testexact1"), tokens.get(9));
assertEquals(new Token(COLON, ":"), tokens.get(10));
assertEquals(new Token(WORD, "foo"), tokens.get(11));
assertEquals(new Token(RBRACE, ")"), tokens.get(12));
assertEquals(new Token(SPACE, " "), tokens.get(13));
assertEquals(new Token(WORD, "testexact2"), tokens.get(14));
assertEquals(new Token(COLON, ":"), tokens.get(15));
assertEquals(new Token(WORD, "bar"), tokens.get(16));
tokens = tokenizer.tokenize("testexact1:a*teens", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "a*teens"), tokens.get(2));
tokens = tokenizer.tokenize("testexact1:foo\"bar", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo\"bar"), tokens.get(2));
tokens = tokenizer.tokenize("testexact1:foo!bar", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo!bar"), tokens.get(2));
tokens = tokenizer.tokenize("testexact1:foo! ", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo"), tokens.get(2));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
assertEquals(new Token(SPACE, " "), tokens.get(4));
tokens = tokenizer.tokenize("testexact1:foo!! ", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo"), tokens.get(2));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(4));
assertEquals(new Token(SPACE, " "), tokens.get(5));
tokens = tokenizer.tokenize("testexact1:foo!100 ", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo"), tokens.get(2));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
assertEquals(new Token(NUMBER, "100"), tokens.get(4));
assertEquals(new Token(SPACE, " "), tokens.get(5));
tokens = tokenizer.tokenize("testexact1:foo*!100 ", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo"), tokens.get(2));
assertEquals(new Token(STAR, "*"), tokens.get(3));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(4));
assertEquals(new Token(NUMBER, "100"), tokens.get(5));
assertEquals(new Token(SPACE, " "), tokens.get(6));
tokens = tokenizer.tokenize("testexact1: *\"foo bar\"*!100 ", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(STAR, "*"), tokens.get(2));
assertEquals(new Token(WORD, "foo bar"), tokens.get(3));
assertEquals(new Token(STAR, "*"), tokens.get(4));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(5));
assertEquals(new Token(NUMBER, "100"), tokens.get(6));
assertEquals(new Token(SPACE, " "), tokens.get(7));
tokens = tokenizer.tokenize("testexact1: *\"foo bar\"*!100", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(STAR, "*"), tokens.get(2));
assertEquals(new Token(WORD, "foo bar"), tokens.get(3));
assertEquals(new Token(STAR, "*"), tokens.get(4));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(5));
assertEquals(new Token(NUMBER, "100"), tokens.get(6));
tokens = tokenizer.tokenize("testexact1: *foobar*!100", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(STAR, "*"), tokens.get(2));
assertEquals(new Token(WORD, "foobar"), tokens.get(3));
assertEquals(new Token(STAR, "*"), tokens.get(4));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(5));
assertEquals(new Token(NUMBER, "100"), tokens.get(6));
tokens = tokenizer.tokenize("testexact1: *foobar*!100!", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(STAR, "*"), tokens.get(2));
assertEquals(new Token(WORD, "foobar*!100"), tokens.get(3));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(4));
tokens = tokenizer.tokenize("testexact1:foo(bar)", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo(bar)"), tokens.get(2));
tokens = tokenizer.tokenize("testexact1:\"foo\"", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo"), tokens.get(2));
tokens = tokenizer.tokenize("testexact1: foo", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo"), tokens.get(2));
tokens = tokenizer.tokenize("testexact1: \"foo\"", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo"), tokens.get(2));
tokens = tokenizer.tokenize("testexact1: \"foo\"", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "foo"), tokens.get(2));
tokens = tokenizer.tokenize("testexact1:vespa testexact2:resolved", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "vespa"), tokens.get(2));
assertEquals(new Token(SPACE, " "), tokens.get(3));
assertEquals(new Token(WORD, "testexact2"), tokens.get(4));
assertEquals(new Token(COLON, ":"), tokens.get(5));
assertEquals(new Token(WORD, "resolved"), tokens.get(6));
tokens = tokenizer.tokenize("testexact1:\"news search\" testexact2:resolved", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "news search"), tokens.get(2));
assertEquals(new Token(SPACE, " "), tokens.get(3));
assertEquals(new Token(WORD, "testexact2"), tokens.get(4));
assertEquals(new Token(COLON, ":"), tokens.get(5));
assertEquals(new Token(WORD, "resolved"), tokens.get(6));
tokens = tokenizer.tokenize("(testexact1:\"news search\" testexact1:vespa)", facts);
assertEquals(new Token(LBRACE, "("), tokens.get(0));
assertEquals(new Token(WORD, "testexact1"), tokens.get(1));
assertEquals(new Token(COLON, ":"), tokens.get(2));
assertEquals(new Token(WORD, "news search"), tokens.get(3));
assertEquals(new Token(SPACE, " "), tokens.get(4));
assertEquals(new Token(WORD, "testexact1"), tokens.get(5));
assertEquals(new Token(COLON, ":"), tokens.get(6));
assertEquals(new Token(WORD, "vespa"), tokens.get(7));
assertEquals(new Token(RBRACE, ")"), tokens.get(8));
tokens = tokenizer.tokenize("testexact1:news*", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "news"), tokens.get(2));
assertEquals(new Token(STAR, "*"), tokens.get(3));
tokens = tokenizer.tokenize("testexact1:\"news\"*", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "news"), tokens.get(2));
assertEquals(new Token(STAR, "*"), tokens.get(3));
tokens = tokenizer.tokenize("testexact1:\"news search\"!200", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "news search"), tokens.get(2));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
assertEquals(new Token(NUMBER, "200"), tokens.get(4));
tokens = tokenizer.tokenize("testexact1:vespa!200", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(WORD, "vespa"), tokens.get(2));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(3));
assertEquals(new Token(NUMBER, "200"), tokens.get(4));
tokens = tokenizer.tokenize("testexact1:*\"news\"*", facts);
assertEquals(new Token(WORD, "testexact1"), tokens.get(0));
assertEquals(new Token(COLON, ":"), tokens.get(1));
assertEquals(new Token(STAR, "*"), tokens.get(2));
assertEquals(new Token(WORD, "news"), tokens.get(3));
assertEquals(new Token(STAR, "*"), tokens.get(4));
tokens = tokenizer.tokenize("normal(testexact1:foo) testexact2:bar", facts);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(LBRACE, "("), tokens.get(1));
assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
assertEquals(new Token(COLON, ":"), tokens.get(3));
assertEquals(new Token(WORD, "foo"), tokens.get(4));
assertEquals(new Token(RBRACE, ")"), tokens.get(5));
assertEquals(new Token(SPACE, " "), tokens.get(6));
assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
assertEquals(new Token(COLON, ":"), tokens.get(8));
assertEquals(new Token(WORD, "bar"), tokens.get(9));
tokens = tokenizer.tokenize("normal testexact1:(foo testexact2:bar", facts);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
assertEquals(new Token(COLON, ":"), tokens.get(3));
assertEquals(new Token(WORD, "(foo"), tokens.get(4));
assertEquals(new Token(SPACE, " "), tokens.get(5));
assertEquals(new Token(WORD, "testexact2"), tokens.get(6));
assertEquals(new Token(COLON, ":"), tokens.get(7));
assertEquals(new Token(WORD, "bar"), tokens.get(8));
tokens = tokenizer.tokenize("normal testexact1:foo! testexact2:bar", facts);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
assertEquals(new Token(COLON, ":"), tokens.get(3));
assertEquals(new Token(WORD, "foo"), tokens.get(4));
assertEquals(new Token(EXCLAMATION, "!"), tokens.get(5));
assertEquals(new Token(SPACE, " "), tokens.get(6));
assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
assertEquals(new Token(COLON, ":"), tokens.get(8));
assertEquals(new Token(WORD, "bar"), tokens.get(9));
tokens = tokenizer.tokenize("normal testexact1:foo* testexact2:bar", facts);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
assertEquals(new Token(COLON, ":"), tokens.get(3));
assertEquals(new Token(WORD, "foo"), tokens.get(4));
assertEquals(new Token(STAR, "*"), tokens.get(5));
assertEquals(new Token(SPACE, " "), tokens.get(6));
assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
assertEquals(new Token(COLON, ":"), tokens.get(8));
assertEquals(new Token(WORD, "bar"), tokens.get(9));
tokens = tokenizer.tokenize("normal testexact1: foo* testexact2:bar", facts);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
assertEquals(new Token(COLON, ":"), tokens.get(3));
assertEquals(new Token(WORD, "foo"), tokens.get(4));
assertEquals(new Token(STAR, "*"), tokens.get(5));
assertEquals(new Token(SPACE, " "), tokens.get(6));
assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
assertEquals(new Token(COLON, ":"), tokens.get(8));
assertEquals(new Token(WORD, "bar"), tokens.get(9));
tokens = tokenizer.tokenize("normal testexact1:\" foo\"* testexact2:bar", facts);
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
assertEquals(new Token(WORD, "testexact1"), tokens.get(2));
assertEquals(new Token(COLON, ":"), tokens.get(3));
assertEquals(new Token(WORD, " foo"), tokens.get(4));
assertEquals(new Token(STAR, "*"), tokens.get(5));
assertEquals(new Token(SPACE, " "), tokens.get(6));
assertEquals(new Token(WORD, "testexact2"), tokens.get(7));
assertEquals(new Token(COLON, ":"), tokens.get(8));
assertEquals(new Token(WORD, "bar"), tokens.get(9));
}
use of com.yahoo.prelude.Index in project vespa by vespa-engine.
the class IndexCombinatorTestCase method setUp.
@Before
public void setUp() throws Exception {
transformer = new IndexCombinatorSearcher();
f = new IndexFacts();
f.addIndex("one", "z");
Index i = new Index("default");
i.addCommand("match-group a i");
f.addIndex("one", i);
}
use of com.yahoo.prelude.Index in project vespa by vespa-engine.
the class NGramSearcherTestCase method getMixedSetup.
private IndexFacts getMixedSetup() {
IndexFacts indexFacts = new IndexFacts();
String musicDoctype = "music";
String songDoctype = "song";
Index musicDefault = new Index("default");
musicDefault.setNGram(true, 1);
indexFacts.addIndex(musicDoctype, musicDefault);
Index songDefault = new Index("default");
indexFacts.addIndex(songDoctype, songDefault);
Map<String, List<String>> clusters = new HashMap<>();
clusters.put("musicOnly", Arrays.asList(new String[] { musicDoctype }));
clusters.put("songOnly", Arrays.asList(new String[] { songDoctype }));
clusters.put("musicAndSong", Arrays.asList(new String[] { musicDoctype, songDoctype }));
indexFacts.setClusters(clusters);
return indexFacts;
}
use of com.yahoo.prelude.Index in project vespa by vespa-engine.
the class NGramSearcherTestCase method setUp.
@Override
public void setUp() {
searcher = new NGramSearcher(new SimpleLinguistics());
indexFacts = new IndexFacts();
Index defaultIndex = new Index("default");
defaultIndex.setNGram(true, 3);
defaultIndex.setDynamicSummary(true);
indexFacts.addIndex("default", defaultIndex);
Index test = new Index("test");
test.setHighlightSummary(true);
indexFacts.addIndex("default", test);
Index gram2 = new Index("gram2");
gram2.setNGram(true, 2);
gram2.setDynamicSummary(true);
indexFacts.addIndex("default", gram2);
Index gram3 = new Index("gram3");
gram3.setNGram(true, 3);
gram3.setHighlightSummary(true);
indexFacts.addIndex("default", gram3);
Index gram14 = new Index("gram14");
gram14.setNGram(true, 14);
gram14.setDynamicSummary(true);
indexFacts.addIndex("default", gram14);
}
use of com.yahoo.prelude.Index in project vespa by vespa-engine.
the class QueryTestCase method assertDetectionText.
private void assertDetectionText(String expectedDetectionText, String queryString, String... indexSpecs) {
Query q = new Query(httpEncode("/?query=" + queryString));
IndexFacts indexFacts = new IndexFacts();
for (String indexSpec : indexSpecs) {
String[] specParts = indexSpec.split(":");
Index tokenIndex = new Index(specParts[1]);
if (specParts[0].equals("text"))
tokenIndex.setPlainTokens(true);
indexFacts.addIndex("testSearchDefinition", tokenIndex);
}
MockLinguistics mockLinguistics = new MockLinguistics();
q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, indexFacts, mockLinguistics)));
// cause parsing
q.getModel().getQueryTree();
assertEquals(expectedDetectionText, mockLinguistics.detector.lastDetectionText);
}
Aggregations