use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.
the class TextMatch method findAnnotatorConfig.
private AnnotatorConfig findAnnotatorConfig(Search search, SDField field) {
AnnotatorConfig ret = new AnnotatorConfig();
Stemming activeStemming = field.getStemming();
if (activeStemming == null) {
activeStemming = search.getStemming();
}
ret.setStemMode(activeStemming.toStemMode());
ret.setRemoveAccents(field.getNormalizing().doRemoveAccents());
if ((field.getMatching() != null) && (field.getMatching().maxLength() != null)) {
ret.setMaxTokenLength(field.getMatching().maxLength());
}
return ret;
}
use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.
the class IndexingOperation method fromStream.
public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine, Linguistics linguistics) throws ParseException {
ScriptParserContext config = new ScriptParserContext(linguistics);
config.setAnnotatorConfig(new AnnotatorConfig());
config.setInputStream(input);
ScriptExpression exp;
try {
if (multiLine) {
exp = ScriptExpression.newInstance(config);
} else {
exp = new ScriptExpression(StatementExpression.newInstance(config));
}
} catch (com.yahoo.vespa.indexinglanguage.parser.ParseException e) {
ParseException t = new ParseException("Error reported by IL parser: " + e.getMessage());
t.initCause(e);
throw t;
}
return new IndexingOperation(exp);
}
use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.
the class ExpressionVisitorTestCase method requireThatAllExpressionsAreVisited.
@SuppressWarnings("unchecked")
@Test
public void requireThatAllExpressionsAreVisited() {
assertCount(3, new ArithmeticExpression(new InputExpression("foo"), ArithmeticExpression.Operator.ADD, new InputExpression("bar")));
assertCount(1, new AttributeExpression("foo"));
assertCount(1, new Base64DecodeExpression());
assertCount(1, new Base64EncodeExpression());
assertCount(3, new CatExpression(new InputExpression("foo"), new IndexExpression("bar")));
assertCount(1, new ClearStateExpression());
assertCount(1, new EchoExpression());
assertCount(2, new ForEachExpression(new IndexExpression("foo")));
assertCount(1, new GetFieldExpression("foo"));
assertCount(1, new GetVarExpression("foo"));
assertCount(2, new GuardExpression(new IndexExpression("foo")));
assertCount(1, new HexDecodeExpression());
assertCount(1, new HexEncodeExpression());
assertCount(1, new HostNameExpression());
assertCount(5, new IfThenExpression(new InputExpression("foo"), IfThenExpression.Comparator.EQ, new InputExpression("bar"), new IndexExpression("baz"), new IndexExpression("cox")));
assertCount(1, new IndexExpression("foo"));
assertCount(1, new InputExpression("foo"));
assertCount(1, new JoinExpression("foo"));
assertCount(1, new LowerCaseExpression());
assertCount(1, new NormalizeExpression(new SimpleLinguistics()));
assertCount(1, new NowExpression());
assertCount(1, new OptimizePredicateExpression());
assertCount(2, new ParenthesisExpression(new InputExpression("foo")));
assertCount(1, new RandomExpression(69));
assertCount(3, new ScriptExpression(new StatementExpression(new InputExpression("foo"))));
assertCount(3, new SelectInputExpression(new Pair<String, Expression>("foo", new IndexExpression("bar")), new Pair<String, Expression>("bar", new IndexExpression("foo"))));
assertCount(1, new SetLanguageExpression());
assertCount(1, new SetValueExpression(new IntegerFieldValue(69)));
assertCount(1, new SetVarExpression("foo"));
assertCount(1, new SplitExpression("foo"));
assertCount(2, new StatementExpression(new InputExpression("foo")));
assertCount(1, new SummaryExpression("foo"));
assertCount(1, new SubstringExpression(6, 9));
assertCount(3, new SwitchExpression(Collections.singletonMap("foo", (Expression) new IndexExpression("bar")), new InputExpression("baz")));
assertCount(1, new ThisExpression());
assertCount(1, new ToArrayExpression());
assertCount(1, new ToByteExpression());
assertCount(1, new ToDoubleExpression());
assertCount(1, new ToFloatExpression());
assertCount(1, new ToIntegerExpression());
assertCount(1, new TokenizeExpression(new SimpleLinguistics(), new AnnotatorConfig()));
assertCount(1, new ToLongExpression());
assertCount(1, new ToPositionExpression());
assertCount(1, new ToStringExpression());
assertCount(1, new ToWsetExpression(false, false));
assertCount(1, new TrimExpression());
assertCount(1, new ZCurveExpression());
}
use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.
the class TokenizeExpression method doExecute.
@Override
protected void doExecute(ExecutionContext context) {
StringFieldValue input = (StringFieldValue) context.getValue();
StringFieldValue output = input.clone();
context.setValue(output);
AnnotatorConfig cfg = new AnnotatorConfig(config);
Language lang = context.resolveLanguage(linguistics);
if (lang != null) {
cfg.setLanguage(lang);
}
LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, cfg);
annotator.annotate(output);
}
use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.
the class TokenizeTestCase method requireThatHashCodeAndEqualsAreImplemented.
@Test
public void requireThatHashCodeAndEqualsAreImplemented() {
AnnotatorConfig config = new AnnotatorConfig().setLanguage(Language.ARABIC);
Expression exp = new TokenizeExpression(new SimpleLinguistics(), config);
assertFalse(exp.equals(new Object()));
assertFalse(exp.equals(new TokenizeExpression(Mockito.mock(Linguistics.class), new AnnotatorConfig().setLanguage(Language.SPANISH))));
assertFalse(exp.equals(new TokenizeExpression(new SimpleLinguistics(), new AnnotatorConfig().setLanguage(Language.SPANISH))));
assertEquals(exp, new TokenizeExpression(new SimpleLinguistics(), config));
assertEquals(exp.hashCode(), new TokenizeExpression(new SimpleLinguistics(), config).hashCode());
}
Aggregations