Search in sources :

Example 1 with AnnotatorConfig

use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.

the class TextMatch method findAnnotatorConfig.

private AnnotatorConfig findAnnotatorConfig(Search search, SDField field) {
    AnnotatorConfig ret = new AnnotatorConfig();
    Stemming activeStemming = field.getStemming();
    if (activeStemming == null) {
        activeStemming = search.getStemming();
    }
    ret.setStemMode(activeStemming.toStemMode());
    ret.setRemoveAccents(field.getNormalizing().doRemoveAccents());
    if ((field.getMatching() != null) && (field.getMatching().maxLength() != null)) {
        ret.setMaxTokenLength(field.getMatching().maxLength());
    }
    return ret;
}
Also used : AnnotatorConfig(com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig) Stemming(com.yahoo.searchdefinition.document.Stemming)

Example 2 with AnnotatorConfig

use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.

the class IndexingOperation method fromStream.

public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine, Linguistics linguistics) throws ParseException {
    ScriptParserContext config = new ScriptParserContext(linguistics);
    config.setAnnotatorConfig(new AnnotatorConfig());
    config.setInputStream(input);
    ScriptExpression exp;
    try {
        if (multiLine) {
            exp = ScriptExpression.newInstance(config);
        } else {
            exp = new ScriptExpression(StatementExpression.newInstance(config));
        }
    } catch (com.yahoo.vespa.indexinglanguage.parser.ParseException e) {
        ParseException t = new ParseException("Error reported by IL parser: " + e.getMessage());
        t.initCause(e);
        throw t;
    }
    return new IndexingOperation(exp);
}
Also used : AnnotatorConfig(com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig) ParseException(com.yahoo.searchdefinition.parser.ParseException) ScriptParserContext(com.yahoo.vespa.indexinglanguage.ScriptParserContext) ScriptExpression(com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression)

Example 3 with AnnotatorConfig

use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.

the class ExpressionVisitorTestCase method requireThatAllExpressionsAreVisited.

@SuppressWarnings("unchecked")
@Test
public void requireThatAllExpressionsAreVisited() {
    assertCount(3, new ArithmeticExpression(new InputExpression("foo"), ArithmeticExpression.Operator.ADD, new InputExpression("bar")));
    assertCount(1, new AttributeExpression("foo"));
    assertCount(1, new Base64DecodeExpression());
    assertCount(1, new Base64EncodeExpression());
    assertCount(3, new CatExpression(new InputExpression("foo"), new IndexExpression("bar")));
    assertCount(1, new ClearStateExpression());
    assertCount(1, new EchoExpression());
    assertCount(2, new ForEachExpression(new IndexExpression("foo")));
    assertCount(1, new GetFieldExpression("foo"));
    assertCount(1, new GetVarExpression("foo"));
    assertCount(2, new GuardExpression(new IndexExpression("foo")));
    assertCount(1, new HexDecodeExpression());
    assertCount(1, new HexEncodeExpression());
    assertCount(1, new HostNameExpression());
    assertCount(5, new IfThenExpression(new InputExpression("foo"), IfThenExpression.Comparator.EQ, new InputExpression("bar"), new IndexExpression("baz"), new IndexExpression("cox")));
    assertCount(1, new IndexExpression("foo"));
    assertCount(1, new InputExpression("foo"));
    assertCount(1, new JoinExpression("foo"));
    assertCount(1, new LowerCaseExpression());
    assertCount(1, new NormalizeExpression(new SimpleLinguistics()));
    assertCount(1, new NowExpression());
    assertCount(1, new OptimizePredicateExpression());
    assertCount(2, new ParenthesisExpression(new InputExpression("foo")));
    assertCount(1, new RandomExpression(69));
    assertCount(3, new ScriptExpression(new StatementExpression(new InputExpression("foo"))));
    assertCount(3, new SelectInputExpression(new Pair<String, Expression>("foo", new IndexExpression("bar")), new Pair<String, Expression>("bar", new IndexExpression("foo"))));
    assertCount(1, new SetLanguageExpression());
    assertCount(1, new SetValueExpression(new IntegerFieldValue(69)));
    assertCount(1, new SetVarExpression("foo"));
    assertCount(1, new SplitExpression("foo"));
    assertCount(2, new StatementExpression(new InputExpression("foo")));
    assertCount(1, new SummaryExpression("foo"));
    assertCount(1, new SubstringExpression(6, 9));
    assertCount(3, new SwitchExpression(Collections.singletonMap("foo", (Expression) new IndexExpression("bar")), new InputExpression("baz")));
    assertCount(1, new ThisExpression());
    assertCount(1, new ToArrayExpression());
    assertCount(1, new ToByteExpression());
    assertCount(1, new ToDoubleExpression());
    assertCount(1, new ToFloatExpression());
    assertCount(1, new ToIntegerExpression());
    assertCount(1, new TokenizeExpression(new SimpleLinguistics(), new AnnotatorConfig()));
    assertCount(1, new ToLongExpression());
    assertCount(1, new ToPositionExpression());
    assertCount(1, new ToStringExpression());
    assertCount(1, new ToWsetExpression(false, false));
    assertCount(1, new TrimExpression());
    assertCount(1, new ZCurveExpression());
}
Also used : SummaryExpression(com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression) SwitchExpression(com.yahoo.vespa.indexinglanguage.expressions.SwitchExpression) AnnotatorConfig(com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig) StatementExpression(com.yahoo.vespa.indexinglanguage.expressions.StatementExpression) IntegerFieldValue(com.yahoo.document.datatypes.IntegerFieldValue) OptimizePredicateExpression(com.yahoo.vespa.indexinglanguage.expressions.OptimizePredicateExpression) SplitExpression(com.yahoo.vespa.indexinglanguage.expressions.SplitExpression) ToPositionExpression(com.yahoo.vespa.indexinglanguage.expressions.ToPositionExpression) ThisExpression(com.yahoo.vespa.indexinglanguage.expressions.ThisExpression) ToByteExpression(com.yahoo.vespa.indexinglanguage.expressions.ToByteExpression) TokenizeExpression(com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression) IfThenExpression(com.yahoo.vespa.indexinglanguage.expressions.IfThenExpression) ToStringExpression(com.yahoo.vespa.indexinglanguage.expressions.ToStringExpression) SubstringExpression(com.yahoo.vespa.indexinglanguage.expressions.SubstringExpression) EchoExpression(com.yahoo.vespa.indexinglanguage.expressions.EchoExpression) SetVarExpression(com.yahoo.vespa.indexinglanguage.expressions.SetVarExpression) HexDecodeExpression(com.yahoo.vespa.indexinglanguage.expressions.HexDecodeExpression) SelectInputExpression(com.yahoo.vespa.indexinglanguage.expressions.SelectInputExpression) InputExpression(com.yahoo.vespa.indexinglanguage.expressions.InputExpression) AttributeExpression(com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression) ScriptExpression(com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression) SimpleLinguistics(com.yahoo.language.simple.SimpleLinguistics) ToArrayExpression(com.yahoo.vespa.indexinglanguage.expressions.ToArrayExpression) SetLanguageExpression(com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression) LowerCaseExpression(com.yahoo.vespa.indexinglanguage.expressions.LowerCaseExpression) ToIntegerExpression(com.yahoo.vespa.indexinglanguage.expressions.ToIntegerExpression) ForEachExpression(com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression) SelectInputExpression(com.yahoo.vespa.indexinglanguage.expressions.SelectInputExpression) ToWsetExpression(com.yahoo.vespa.indexinglanguage.expressions.ToWsetExpression) Base64DecodeExpression(com.yahoo.vespa.indexinglanguage.expressions.Base64DecodeExpression) IndexExpression(com.yahoo.vespa.indexinglanguage.expressions.IndexExpression) GetFieldExpression(com.yahoo.vespa.indexinglanguage.expressions.GetFieldExpression) RandomExpression(com.yahoo.vespa.indexinglanguage.expressions.RandomExpression) TrimExpression(com.yahoo.vespa.indexinglanguage.expressions.TrimExpression) JoinExpression(com.yahoo.vespa.indexinglanguage.expressions.JoinExpression) HexEncodeExpression(com.yahoo.vespa.indexinglanguage.expressions.HexEncodeExpression) Pair(com.yahoo.collections.Pair) SetValueExpression(com.yahoo.vespa.indexinglanguage.expressions.SetValueExpression) NowExpression(com.yahoo.vespa.indexinglanguage.expressions.NowExpression) ToDoubleExpression(com.yahoo.vespa.indexinglanguage.expressions.ToDoubleExpression) ToLongExpression(com.yahoo.vespa.indexinglanguage.expressions.ToLongExpression) ToFloatExpression(com.yahoo.vespa.indexinglanguage.expressions.ToFloatExpression) Base64EncodeExpression(com.yahoo.vespa.indexinglanguage.expressions.Base64EncodeExpression) CatExpression(com.yahoo.vespa.indexinglanguage.expressions.CatExpression) GuardExpression(com.yahoo.vespa.indexinglanguage.expressions.GuardExpression) ParenthesisExpression(com.yahoo.vespa.indexinglanguage.expressions.ParenthesisExpression) NormalizeExpression(com.yahoo.vespa.indexinglanguage.expressions.NormalizeExpression) ArithmeticExpression(com.yahoo.vespa.indexinglanguage.expressions.ArithmeticExpression) HostNameExpression(com.yahoo.vespa.indexinglanguage.expressions.HostNameExpression) GetVarExpression(com.yahoo.vespa.indexinglanguage.expressions.GetVarExpression) ClearStateExpression(com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression) ZCurveExpression(com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression) Test(org.junit.Test)

Example 4 with AnnotatorConfig

use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.

the class TokenizeExpression method doExecute.

@Override
protected void doExecute(ExecutionContext context) {
    StringFieldValue input = (StringFieldValue) context.getValue();
    StringFieldValue output = input.clone();
    context.setValue(output);
    AnnotatorConfig cfg = new AnnotatorConfig(config);
    Language lang = context.resolveLanguage(linguistics);
    if (lang != null) {
        cfg.setLanguage(lang);
    }
    LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, cfg);
    annotator.annotate(output);
}
Also used : AnnotatorConfig(com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig) Language(com.yahoo.language.Language) StringFieldValue(com.yahoo.document.datatypes.StringFieldValue) LinguisticsAnnotator(com.yahoo.vespa.indexinglanguage.linguistics.LinguisticsAnnotator)

Example 5 with AnnotatorConfig

use of com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig in project vespa by vespa-engine.

the class TokenizeTestCase method requireThatHashCodeAndEqualsAreImplemented.

@Test
public void requireThatHashCodeAndEqualsAreImplemented() {
    AnnotatorConfig config = new AnnotatorConfig().setLanguage(Language.ARABIC);
    Expression exp = new TokenizeExpression(new SimpleLinguistics(), config);
    assertFalse(exp.equals(new Object()));
    assertFalse(exp.equals(new TokenizeExpression(Mockito.mock(Linguistics.class), new AnnotatorConfig().setLanguage(Language.SPANISH))));
    assertFalse(exp.equals(new TokenizeExpression(new SimpleLinguistics(), new AnnotatorConfig().setLanguage(Language.SPANISH))));
    assertEquals(exp, new TokenizeExpression(new SimpleLinguistics(), config));
    assertEquals(exp.hashCode(), new TokenizeExpression(new SimpleLinguistics(), config).hashCode());
}
Also used : SimpleLinguistics(com.yahoo.language.simple.SimpleLinguistics) AnnotatorConfig(com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig) Linguistics(com.yahoo.language.Linguistics) SimpleLinguistics(com.yahoo.language.simple.SimpleLinguistics) Test(org.junit.Test)

Aggregations

AnnotatorConfig (com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig)9 SimpleLinguistics (com.yahoo.language.simple.SimpleLinguistics)6 Test (org.junit.Test)5 ScriptExpression (com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression)3 Pair (com.yahoo.collections.Pair)2 IntegerFieldValue (com.yahoo.document.datatypes.IntegerFieldValue)2 StringFieldValue (com.yahoo.document.datatypes.StringFieldValue)2 Linguistics (com.yahoo.language.Linguistics)2 ArithmeticExpression (com.yahoo.vespa.indexinglanguage.expressions.ArithmeticExpression)2 AttributeExpression (com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression)2 Base64DecodeExpression (com.yahoo.vespa.indexinglanguage.expressions.Base64DecodeExpression)2 Base64EncodeExpression (com.yahoo.vespa.indexinglanguage.expressions.Base64EncodeExpression)2 CatExpression (com.yahoo.vespa.indexinglanguage.expressions.CatExpression)2 ClearStateExpression (com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression)2 EchoExpression (com.yahoo.vespa.indexinglanguage.expressions.EchoExpression)2 ForEachExpression (com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression)2 GetFieldExpression (com.yahoo.vespa.indexinglanguage.expressions.GetFieldExpression)2 GetVarExpression (com.yahoo.vespa.indexinglanguage.expressions.GetVarExpression)2 GuardExpression (com.yahoo.vespa.indexinglanguage.expressions.GuardExpression)2 HexDecodeExpression (com.yahoo.vespa.indexinglanguage.expressions.HexDecodeExpression)2