use of com.yahoo.language.Linguistics in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method newLinguistics.
private static Linguistics newLinguistics(List<? extends Token> tokens, Map<String, String> replacementTerms) {
Linguistics linguistics = Mockito.mock(Linguistics.class);
Mockito.when(linguistics.getTokenizer()).thenReturn(new MyTokenizer(tokens, replacementTerms));
return linguistics;
}
use of com.yahoo.language.Linguistics in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatExistingAnnotationsAreKept.
@Test
public void requireThatExistingAnnotationsAreKept() {
SpanTree spanTree = new SpanTree(SpanTrees.LINGUISTICS);
spanTree.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz")));
StringFieldValue val = new StringFieldValue("foo");
val.setSpanTree(spanTree);
Linguistics linguistics = newLinguistics(Arrays.asList(newToken("foo", "bar", TokenType.ALPHABETIC, false)), Collections.<String, String>emptyMap());
new LinguisticsAnnotator(linguistics, CONFIG).annotate(val);
assertTrue(new LinguisticsAnnotator(linguistics, CONFIG).annotate(val));
assertEquals(spanTree, val.getSpanTree(SpanTrees.LINGUISTICS));
}
use of com.yahoo.language.Linguistics in project vespa by vespa-engine.
the class NGramTestCase method requireThatAccessorsWork.
@Test
public void requireThatAccessorsWork() {
Linguistics linguistics = new SimpleLinguistics();
NGramExpression exp = new NGramExpression(linguistics, 69);
assertSame(linguistics, exp.getLinguistics());
assertEquals(69, exp.getGramSize());
}
use of com.yahoo.language.Linguistics in project vespa by vespa-engine.
the class Model method getParsingLanguage.
/**
* Gets the language to use for parsing. If this is explicitly set in the model, that language is returned.
* Otherwise, if a query tree is already produced and any node in it specifies a language the first such
* node encountered in a depth first
* left to right search is returned. Otherwise the language is guessed from the query string.
* If this does not yield an actual language, English is returned as the default.
*
* @return the language determined, never null
*/
// TODO: We can support multiple languages per query by changing searchers which call this
// to look up the query to use at each point from item.getLanguage
// with this as fallback for query branches where no parent item specifies language
public Language getParsingLanguage(String languageDetectionText) {
Language language = getLanguage();
if (language != null)
return language;
language = Language.fromEncoding(encoding);
if (language != Language.UNKNOWN)
return language;
if (queryTree != null)
language = languageBelow(queryTree);
if (language != Language.UNKNOWN)
return language;
Linguistics linguistics = execution.context().getLinguistics();
if (linguistics != null)
// TODO: Set language if detected
language = linguistics.getDetector().detect(languageDetectionText, null).getLanguage();
if (language != Language.UNKNOWN)
return language;
return Language.ENGLISH;
}
use of com.yahoo.language.Linguistics in project vespa by vespa-engine.
the class QueryTestCase method testSimpleFunctionality.
@Test
public void testSimpleFunctionality() {
Query q = new Query(QueryTestCase.httpEncode("/sdfsd.html?query=this is a simple query&aParameter"));
assertEquals("this is a simple query", q.getModel().getQueryString());
assertNotNull(q.getModel().getQueryTree());
assertNull(q.getModel().getDefaultIndex());
assertEquals("", q.properties().get("aParameter"));
assertNull(q.properties().get("notSetParameter"));
Query query = q;
String body = "a bb. ccc??!";
Linguistics linguistics = new SimpleLinguistics();
AndItem and = new AndItem();
for (Token token : linguistics.getTokenizer().tokenize(body, Language.ENGLISH, StemMode.SHORTEST, true)) {
if (token.isIndexable())
and.addItem(new WordItem(token.getTokenString(), "body"));
}
query.getModel().getQueryTree().setRoot(and);
System.out.println(query);
}
Aggregations