Search in sources :

Example 1 with Substring

use of in project vespa by vespa-engine.

the class YqlParser method wordStyleSettings.

private void wordStyleSettings(OperatorNode<ExpressionOperator> ast, WordItem out) {
    Substring origin = getOrigin(ast);
    if (origin != null) {
    Boolean usePositionData = getAnnotation(ast, USE_POSITION_DATA, Boolean.class, null, USE_POSITION_DATA_DESCRIPTION);
    if (usePositionData != null) {
    Boolean stem = getAnnotation(ast, STEM, Boolean.class, null, STEM_DESCRIPTION);
    if (stem != null) {
    Boolean normalizeCase = getAnnotation(ast, NORMALIZE_CASE, Boolean.class, null, NORMALIZE_CASE_DESCRIPTION);
    if (normalizeCase != null) {
    Boolean accentDrop = getAnnotation(ast, ACCENT_DROP, Boolean.class, null, ACCENT_DROP_DESCRIPTION);
    if (accentDrop != null) {
    Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, Boolean.class, null, "setting for whether to force using AND for segments on and off");
    if (andSegmenting != null) {
        if (andSegmenting) {
        } else {
Also used : Substring(

Example 2 with Substring

use of in project vespa by vespa-engine.

the class YqlParser method instantiateWordAlternativesItem.

private Item instantiateWordAlternativesItem(String field, OperatorNode<ExpressionOperator> ast) {
    List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1);
    Preconditions.checkArgument(args.size() >= 1, "Expected 1 or more arguments, got %s.", args.size());
    Preconditions.checkArgument(args.get(0).getOperator() == ExpressionOperator.MAP, "Expected MAP, got %s.", args.get(0).getOperator());
    List<WordAlternativesItem.Alternative> terms = new ArrayList<>();
    List<String> keys = args.get(0).getArgument(0);
    List<OperatorNode<ExpressionOperator>> values = args.get(0).getArgument(1);
    for (int i = 0; i < keys.size(); ++i) {
        OperatorNode<ExpressionOperator> value = values.get(i);
        if (value.getOperator() != ExpressionOperator.LITERAL)
            throw newUnexpectedArgumentException(value.getOperator(), ExpressionOperator.LITERAL);
        String term = keys.get(i);
        double exactness = value.getArgument(0, Double.class);
        terms.add(new WordAlternativesItem.Alternative(term, exactness));
    Substring origin = getOrigin(ast);
    Boolean isFromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION);
    return leafStyleSettings(ast, new WordAlternativesItem(field, isFromQuery, origin, terms));
Also used : Substring( ArrayList(java.util.ArrayList) WordAlternativesItem(

Example 3 with Substring

use of in project vespa by vespa-engine.

the class YqlParser method instantiatePhraseSegmentItem.

private Item instantiatePhraseSegmentItem(String field, OperatorNode<ExpressionOperator> ast, boolean forcePhrase) {
    Substring origin = getOrigin(ast);
    Boolean stem = getAnnotation(ast, STEM, Boolean.class, Boolean.TRUE, STEM_DESCRIPTION);
    Boolean andSegmenting = getAnnotation(ast, AND_SEGMENTING, Boolean.class, Boolean.FALSE, "setting for whether to force using AND for segments on and off");
    SegmentItem phrase;
    List<String> words = null;
    if (forcePhrase || !andSegmenting) {
        phrase = new PhraseSegmentItem(origin.getValue(), origin.getValue(), true, !stem, origin);
    } else {
        phrase = new AndSegmentItem(origin.getValue(), true, !stem);
    if (resegment && getAnnotation(ast, IMPLICIT_TRANSFORMS, Boolean.class, Boolean.TRUE, IMPLICIT_TRANSFORMS_DESCRIPTION)) {
        words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage());
    if (words != null && words.size() > 0) {
        for (String word : words) {
            phrase.addItem(new WordItem(word, field, true));
    } else {
        for (OperatorNode<ExpressionOperator> word : ast.<List<OperatorNode<ExpressionOperator>>>getArgument(1)) {
            phrase.addItem(instantiateWordItem(field, word, phrase.getClass(), SegmentWhen.NEVER));
    if (phrase instanceof TaggableItem) {
        leafStyleSettings(ast, (TaggableItem) phrase);
    return phrase;
Also used : Substring( TaggableItem( AndSegmentItem( List(java.util.List) ArrayList(java.util.ArrayList) WordItem( PhraseSegmentItem( PhraseSegmentItem( AndSegmentItem( SegmentItem( NonNull(edu.umd.cs.findbugs.annotations.NonNull)

Example 4 with Substring

use of in project vespa by vespa-engine.

the class YqlParser method resegment.

private TaggableItem resegment(String field, OperatorNode<ExpressionOperator> ast, String wordData, boolean fromQuery, Class<?> parent, Language language) {
    String toSegment = wordData;
    Substring s = getOrigin(ast);
    Language usedLanguage = language == null ? currentlyParsing.getLanguage() : language;
    if (s != null) {
        toSegment = s.getValue();
    List<String> words = segmenter.segment(toSegment, usedLanguage);
    TaggableItem wordItem;
    if (words.size() == 0) {
        wordItem = new WordItem(wordData, fromQuery);
    } else if (words.size() == 1 || !phraseArgumentSupported(parent)) {
        wordItem = new WordItem(words.get(0), fromQuery);
    } else {
        wordItem = new PhraseSegmentItem(toSegment, fromQuery, false);
        ((PhraseSegmentItem) wordItem).setIndexName(field);
        for (String w : words) {
            WordItem segment = new WordItem(w, fromQuery);
            prepareWord(field, ast, fromQuery, segment);
            ((PhraseSegmentItem) wordItem).addItem(segment);
        ((PhraseSegmentItem) wordItem).lock();
    return wordItem;
Also used : Substring( TaggableItem( Language( WordItem( PhraseSegmentItem( NonNull(edu.umd.cs.findbugs.annotations.NonNull)

Example 5 with Substring

use of in project vespa by vespa-engine.

the class YqlParserTestCase method testTermAnnotations.

public void testTermAnnotations() {
    assertEquals("merkelapp", getRootWord("select foo from bar where baz contains " + "([ {\"label\": \"merkelapp\"} ]\"colors\");").getLabel());
    assertEquals("another", getRootWord("select foo from bar where baz contains " + "([ {\"annotations\": {\"cox\": \"another\"}} ]\"colors\");").getAnnotation("cox"));
    assertEquals(23.0, getRootWord("select foo from bar where baz contains " + "([ {\"significance\": 23.0} ]\"colors\");").getSignificance(), 1E-6);
    assertEquals(23, getRootWord("select foo from bar where baz contains " + "([ {\"id\": 23} ]\"colors\");").getUniqueID());
    assertEquals(150, getRootWord("select foo from bar where baz contains " + "([ {\"weight\": 150} ]\"colors\");").getWeight());
    assertFalse(getRootWord("select foo from bar where baz contains " + "([ {\"usePositionData\": false} ]\"colors\");").usePositionData());
    assertTrue(getRootWord("select foo from bar where baz contains " + "([ {\"filter\": true} ]\"colors\");").isFilter());
    assertFalse(getRootWord("select foo from bar where baz contains " + "([ {\"ranked\": false} ]\"colors\");").isRanked());
    Substring origin = getRootWord("select foo from bar where baz contains " + "([ {\"origin\": {\"original\": \"abc\", \"offset\": 1, \"length\": 2}} ]" + "\"colors\");").getOrigin();
    assertEquals("abc", origin.string);
    assertEquals(1, origin.start);
    assertEquals(3, origin.end);
Also used : Substring( Test(org.junit.Test)


Substring ( WordAlternativesItem ( WordItem ( ArrayList (java.util.ArrayList)3 Test (org.junit.Test)3 PhraseSegmentItem ( TaggableItem ( NonNull (edu.umd.cs.findbugs.annotations.NonNull)2 Language ( AndItem ( AndSegmentItem ( ExactStringItem ( IndexedItem ( Item ( PhraseItem ( PrefixItem ( RegExpItem ( SegmentItem ( SubstringItem ( SuffixItem (