use of com.yahoo.prelude.query.WordItem in project vespa by vespa-engine.
the class LowercasingTestCase method smoke.
@Test
public void smoke() {
Query q = new Query();
AndItem root = new AndItem();
WordItem tmp;
tmp = new WordItem("Gnuff", BAMSE, true);
root.addItem(tmp);
tmp = new WordItem("Blaff", TEDDY, true);
root.addItem(tmp);
tmp = new WordItem("Blyant", "", true);
root.addItem(tmp);
q.getModel().getQueryTree().setRoot(root);
Result r = execution.search(q);
root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot();
WordItem w0 = (WordItem) root.getItem(0);
WordItem w1 = (WordItem) root.getItem(1);
WordItem w2 = (WordItem) root.getItem(2);
assertEquals("gnuff", w0.getWord());
assertEquals("Blaff", w1.getWord());
assertEquals("blyant", w2.getWord());
}
use of com.yahoo.prelude.query.WordItem in project vespa by vespa-engine.
the class YqlParser method instantiateWordItem.
@NonNull
private Item instantiateWordItem(String field, String rawWord, OperatorNode<ExpressionOperator> ast, Class<?> parent, SegmentWhen segmentPolicy, boolean exactMatch, Language language) {
String wordData = rawWord;
if (getAnnotation(ast, NFKC, Boolean.class, Boolean.FALSE, "setting for whether to NFKC normalize input data")) {
// NOTE: If this is set to FALSE (default), we will still NFKC normalize text data
// during tokenization/segmentation, as that is always turned on also on the indexing side.
wordData = normalizer.normalize(wordData);
}
boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION);
boolean prefixMatch = getAnnotation(ast, PREFIX, Boolean.class, Boolean.FALSE, "setting for whether to use prefix match of input data");
boolean suffixMatch = getAnnotation(ast, SUFFIX, Boolean.class, Boolean.FALSE, "setting for whether to use suffix match of input data");
boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, Boolean.FALSE, "setting for whether to use substring match of input data");
Preconditions.checkArgument((prefixMatch ? 1 : 0) + (substrMatch ? 1 : 0) + (suffixMatch ? 1 : 0) < 2, "Only one of prefix, substring and suffix can be set.");
@NonNull final TaggableItem wordItem;
if (exactMatch) {
wordItem = new ExactStringItem(wordData, fromQuery);
} else if (prefixMatch) {
wordItem = new PrefixItem(wordData, fromQuery);
} else if (suffixMatch) {
wordItem = new SuffixItem(wordData, fromQuery);
} else if (substrMatch) {
wordItem = new SubstringItem(wordData, fromQuery);
} else {
switch(segmentPolicy) {
case NEVER:
wordItem = new WordItem(wordData, fromQuery);
break;
case POSSIBLY:
if (shouldResegmentWord(field, fromQuery)) {
wordItem = resegment(field, ast, wordData, fromQuery, parent, language);
} else {
wordItem = new WordItem(wordData, fromQuery);
}
break;
case ALWAYS:
wordItem = resegment(field, ast, wordData, fromQuery, parent, language);
break;
default:
throw new IllegalArgumentException("Unexpected segmenting rule: " + segmentPolicy);
}
}
if (wordItem instanceof WordItem) {
prepareWord(field, ast, fromQuery, (WordItem) wordItem);
}
if (// mark the language used, unless it's the default
language != Language.ENGLISH)
((Item) wordItem).setLanguage(language);
return (Item) leafStyleSettings(ast, wordItem);
}
use of com.yahoo.prelude.query.WordItem in project vespa by vespa-engine.
the class ItemEncodingTestCase method testNoRankedNoPositionDataWordItemEncoding.
@Test
public void testNoRankedNoPositionDataWordItemEncoding() {
WordItem word = new WordItem("test");
word.setRanked(false);
word.setPositionData(false);
ByteBuffer buffer = ByteBuffer.allocate(128);
int count = word.encode(buffer);
buffer.flip();
assertEquals("Serialization count", 1, count);
assertType(buffer, 4, 4);
assertEquals(0x05, buffer.get());
assertEquals("Index length", 0, buffer.get());
assertEquals("Word length", 4, buffer.get());
assertEquals("Word length", 4, buffer.remaining());
assertEquals('t', buffer.get());
assertEquals('e', buffer.get());
assertEquals('s', buffer.get());
assertEquals('t', buffer.get());
}
use of com.yahoo.prelude.query.WordItem in project vespa by vespa-engine.
the class ItemEncodingTestCase method testEndHostMarkerEncoding.
@Test
public void testEndHostMarkerEncoding() {
WordItem word = MarkerWordItem.createEndOfHost();
ByteBuffer buffer = ByteBuffer.allocate(128);
int count = word.encode(buffer);
buffer.flip();
assertEquals("Serialization count", 1, count);
assertType(buffer, 4, 0);
assertEquals("Index length", 0, buffer.get());
assertEquals("Word length", 7, buffer.get());
assertEquals("Word length", 7, buffer.remaining());
assertEquals('E', buffer.get());
assertEquals('n', buffer.get());
assertEquals('D', buffer.get());
assertEquals('h', buffer.get());
assertEquals('O', buffer.get());
assertEquals('s', buffer.get());
assertEquals('T', buffer.get());
}
use of com.yahoo.prelude.query.WordItem in project vespa by vespa-engine.
the class ItemEncodingTestCase method testStartHostMarkerEncoding.
@Test
public void testStartHostMarkerEncoding() {
WordItem word = MarkerWordItem.createStartOfHost();
ByteBuffer buffer = ByteBuffer.allocate(128);
int count = word.encode(buffer);
buffer.flip();
assertEquals("Serialization count", 1, count);
assertType(buffer, 4, 0);
assertEquals("Index length", 0, buffer.get());
assertEquals("Word length", 9, buffer.get());
assertEquals("Word length", 9, buffer.remaining());
assertEquals('S', buffer.get());
assertEquals('t', buffer.get());
assertEquals('A', buffer.get());
assertEquals('r', buffer.get());
assertEquals('T', buffer.get());
assertEquals('h', buffer.get());
assertEquals('O', buffer.get());
assertEquals('s', buffer.get());
assertEquals('T', buffer.get());
}
Aggregations