use of com.yahoo.prelude.query.parser.Tokenizer in project vespa by vespa-engine.
the class TokenizerTestCase method testSpecialTokenNonMatch.
@Test
public void testSpecialTokenNonMatch() {
Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
tokenizer.setSpecialTokens(createSpecialTokens());
List<?> tokens = tokenizer.tokenize("c++ c+ aS/400 i/o .net i/ooo ap.net");
assertEquals(new Token(WORD, "c++"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
assertEquals(new Token(WORD, "c+"), tokens.get(2));
assertEquals(new Token(SPACE, " "), tokens.get(3));
assertEquals(new Token(WORD, "as/400"), tokens.get(4));
assertEquals(new Token(SPACE, " "), tokens.get(5));
assertEquals(new Token(WORD, "i/o"), tokens.get(6));
assertEquals(new Token(SPACE, " "), tokens.get(7));
assertEquals(new Token(WORD, ".net"), tokens.get(8));
assertEquals(new Token(SPACE, " "), tokens.get(9));
assertEquals(new Token(WORD, "i"), tokens.get(10));
assertEquals(new Token(NOISE, "<NOISE>"), tokens.get(11));
assertEquals(new Token(WORD, "ooo"), tokens.get(12));
assertEquals(new Token(SPACE, " "), tokens.get(13));
assertEquals(new Token(WORD, "ap"), tokens.get(14));
assertEquals(new Token(WORD, ".net"), tokens.get(15));
}
use of com.yahoo.prelude.query.parser.Tokenizer in project vespa by vespa-engine.
the class TokenizerTestCase method testExactMatchTokenization.
@Test
public void testExactMatchTokenization() {
Index index1 = new Index("testexact1");
index1.setExact(true, null);
Index index2 = new Index("testexact2");
index2.setExact(true, "()/aa*::*&");
IndexFacts facts = new IndexFacts();
facts.addIndex("testsd", index1);
facts.addIndex("testsd", index2);
IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet());
Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics());
List<?> tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*& b:c", "default", session);
// tokenizer.print();
assertEquals(new Token(WORD, "normal"), tokens.get(0));
assertEquals(new Token(SPACE, " "), tokens.get(1));
assertEquals(new Token(WORD, "a"), tokens.get(2));
assertEquals(new Token(COLON, ":"), tokens.get(3));
assertEquals(new Token(WORD, "b"), tokens.get(4));
assertEquals(new Token(SPACE, " "), tokens.get(5));
assertEquals(new Token(LBRACE, "("), tokens.get(6));
assertEquals(new Token(WORD, "normal"), tokens.get(7));
assertEquals(new Token(SPACE, " "), tokens.get(8));
assertEquals(new Token(WORD, "testexact1"), tokens.get(9));
assertEquals(new Token(COLON, ":"), tokens.get(10));
assertEquals(new Token(WORD, "/,%#%&+-+"), tokens.get(11));
assertEquals(new Token(SPACE, " "), tokens.get(12));
assertEquals(new Token(RBRACE, ")"), tokens.get(13));
assertEquals(new Token(SPACE, " "), tokens.get(14));
assertEquals(new Token(WORD, "testexact2"), tokens.get(15));
assertEquals(new Token(COLON, ":"), tokens.get(16));
assertEquals(new Token(WORD, "ho_/&%&/"), tokens.get(17));
assertEquals(new Token(SPACE, " "), tokens.get(18));
assertEquals(new Token(WORD, "b"), tokens.get(19));
assertEquals(new Token(COLON, ":"), tokens.get(20));
assertEquals(new Token(WORD, "c"), tokens.get(21));
assertTrue(((Token) tokens.get(11)).isSpecial());
assertFalse(((Token) tokens.get(15)).isSpecial());
assertTrue(((Token) tokens.get(17)).isSpecial());
}
Aggregations