Search in sources :

Example 1 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class LiteralBoostSearcher method addLiterals.

private void addLiterals(RankItem rankTerms, Item item, IndexFacts.Session indexFacts) {
    if (item == null)
        return;
    if (item instanceof NotItem) {
        addLiterals(rankTerms, ((NotItem) item).getPositiveItem(), indexFacts);
    } else if (item instanceof CompositeItem) {
        for (Iterator<Item> i = ((CompositeItem) item).getItemIterator(); i.hasNext(); ) addLiterals(rankTerms, i.next(), indexFacts);
    } else if (item instanceof TermItem) {
        TermItem termItem = (TermItem) item;
        Index index = indexFacts.getIndex(termItem.getIndexName());
        if (index.getLiteralBoost())
            rankTerms.addItem(new WordItem(toLowerCase(termItem.getRawWord()), index.getName() + "_literal"));
    }
}
Also used : CompositeItem(com.yahoo.prelude.query.CompositeItem) NotItem(com.yahoo.prelude.query.NotItem) Iterator(java.util.Iterator) Index(com.yahoo.prelude.Index) WordItem(com.yahoo.prelude.query.WordItem) TermItem(com.yahoo.prelude.query.TermItem)

Example 2 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class StemmingSearcher method checkBlock.

private Item checkBlock(BlockItem b, boolean isCJK, Language language, IndexFacts.Session indexFacts, Map<Item, TaggableItem> reverseConnectivity) {
    if (b instanceof PrefixItem || !b.isWords())
        return (Item) b;
    if (b.isFromQuery() && !b.isStemmed()) {
        Index index = indexFacts.getIndex(b.getIndexName());
        StemMode stemMode = index.getStemMode();
        if (stemMode != StemMode.NONE)
            return stem(b, isCJK, language, reverseConnectivity, index);
    }
    return (Item) b;
}
Also used : Index(com.yahoo.prelude.Index) StemMode(com.yahoo.language.process.StemMode)

Example 3 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class JuniperSearcher method highlight.

private void highlight(boolean bolding, Iterator<Hit> hitsToHighlight, String summaryClass, IndexFacts.Session indexFacts) {
    while (hitsToHighlight.hasNext()) {
        Hit sniffHit = hitsToHighlight.next();
        if (!(sniffHit instanceof FastHit))
            continue;
        FastHit hit = (FastHit) sniffHit;
        if (summaryClass != null && !hit.isFilled(summaryClass))
            continue;
        Object searchDefinitionField = hit.getField(MAGIC_FIELD);
        if (searchDefinitionField == null)
            continue;
        String searchDefinitionName = searchDefinitionField.toString();
        for (String fieldName : hit.fields().keySet()) {
            Index index = indexFacts.getIndex(fieldName, searchDefinitionName);
            if (index.getDynamicSummary() || index.getHighlightSummary())
                insertTags(hit.buildHitField(fieldName, true, true), bolding, index.getDynamicSummary());
        }
    }
}
Also used : FastHit(com.yahoo.prelude.fastsearch.FastHit) Hit(com.yahoo.search.result.Hit) FastHit(com.yahoo.prelude.fastsearch.FastHit) Index(com.yahoo.prelude.Index)

Example 4 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class AbstractParser method generateLanguageDetectionTextFrom.

/**
 * Do a best-effort attempt at creating a single string for language detection from only the relevant
 * subset of tokens.
 * The relevant tokens are text tokens which follows names of indexes which are tokenized.
 *
 * This method does not modify the position of the given token stream.
 */
private String generateLanguageDetectionTextFrom(TokenPosition tokens, IndexFacts.Session indexFacts, String defaultIndex) {
    StringBuilder detectionText = new StringBuilder();
    int initialPosition = tokens.getPosition();
    while (tokens.hasNext()) {
        // look for occurrences of text and text:text
        while (// skip nonwords
        !tokens.currentIs(Token.Kind.WORD) && tokens.hasNext()) tokens.next();
        if (!tokens.hasNext())
            break;
        String queryText;
        Index index;
        Token word1 = tokens.next();
        if (is(Token.Kind.COLON, tokens.currentNoIgnore())) {
            // colon
            tokens.next();
            Token word2 = tokens.next();
            if (is(Token.Kind.WORD, word2))
                queryText = word2.image;
            else
                queryText = "";
            index = indexFacts.getIndex(word1.image);
            if (index.isNull()) {
                // interpret both as words
                index = indexFacts.getIndex(defaultIndex);
                queryText = word1.image + " " + queryText;
            }
        } else if (is(Token.Kind.COLON, tokens.currentNoIgnore()) && is(Token.Kind.QUOTE, tokens.currentNoIgnore(1))) {
            // colon
            tokens.next();
            // quote
            tokens.next();
            StringBuilder quotedContent = new StringBuilder();
            while (!tokens.currentIs(Token.Kind.QUOTE) && tokens.hasNext()) {
                Token token = tokens.next();
                if (is(Token.Kind.WORD, token))
                    quotedContent.append(token.image).append(" ");
            }
            tokens.next();
            queryText = quotedContent.toString();
            index = indexFacts.getIndex(word1.image);
            if (index.isNull()) {
                // interpret both as words
                index = indexFacts.getIndex(defaultIndex);
                queryText = word1.image + " " + queryText;
            }
        } else {
            index = indexFacts.getIndex(defaultIndex);
            queryText = word1.image;
        }
        if (queryText != null && index.hasPlainTokens())
            detectionText.append(queryText).append(" ");
    }
    tokens.setPosition(initialPosition);
    return detectionText.toString();
}
Also used : Index(com.yahoo.prelude.Index)

Example 5 with Index

use of com.yahoo.prelude.Index in project vespa by vespa-engine.

the class Tokenizer method tokenize.

/**
 * Resets this tokenizer and create tokens from the given string.
 *
 * @param string the string to tokenize
 * @param defaultIndexName the name of the index to use as default
 * @param indexFacts information about the indexes we will search
 * @return a read-only list of tokens. This list can only be used by this thread
 */
@SuppressWarnings({ "deprecation" })
public // To avoid this we need to pass an IndexFacts.session down instead - easily done but not without breaking API's
List<Token> tokenize(String string, String defaultIndexName, IndexFacts.Session indexFacts) {
    this.source = string;
    tokens.clear();
    parensToEat = 0;
    Index topLevelIndex = Index.nullIndex;
    Index defaultIndex = indexFacts.getIndex(defaultIndexName);
    if (defaultIndexName != null) {
        topLevelIndex = defaultIndex;
    }
    Index currentIndex = topLevelIndex;
    for (int i = 0; i < source.length(); i++) {
        if (currentIndex.isExact()) {
            // currentIndex may change after seeing a colon below
            i = consumeExact(i, currentIndex);
            currentIndex = topLevelIndex;
        } else {
            i = consumeSpecialToken(i);
        }
        if (i >= source.length())
            break;
        int c = source.codePointAt(i);
        if (characterClasses.isLetterOrDigit(c) || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) {
            i = consumeWordOrNumber(i, currentIndex);
        } else if (Character.isWhitespace(c)) {
            addToken(SPACE, " ", i, i + 1);
        } else if (c == '"' || c == '\u201C' || c == '\u201D' || c == '\u201E' || c == '\u201F' || c == '\u2039' || c == '\u203A' || c == '\u00AB' || c == '\u00BB' || c == '\u301D' || c == '\u301E' || c == '\u301F' || c == '\uFF02') {
            addToken(QUOTE, "\"", i, i + 1);
        } else if (c == '-' || c == '\uFF0D') {
            addToken(MINUS, "-", i, i + 1);
        } else if (c == '+' || c == '\uFF0B') {
            addToken(PLUS, "+", i, i + 1);
        } else if (c == '.' || c == '\uFF0E') {
            addToken(DOT, ".", i, i + 1);
        } else if (c == ',' || c == '\uFF0C') {
            addToken(COMMA, ",", i, i + 1);
        } else if (c == ':' || c == '\uFF1A') {
            currentIndex = determineCurrentIndex(defaultIndex, indexFacts);
            addToken(COLON, ":", i, i + 1);
        } else if (c == '(' || c == '\uFF08') {
            addToken(LBRACE, "(", i, i + 1);
            parensToEat++;
        } else if (c == ')' || c == '\uFF09') {
            addToken(RBRACE, ")", i, i + 1);
            parensToEat--;
            if (parensToEat < 0)
                parensToEat = 0;
        } else if (c == '[' || c == '\uFF3B') {
            addToken(LSQUAREBRACKET, "[", i, i + 1);
        } else if (c == ']' || c == '\uFF3D') {
            addToken(RSQUAREBRACKET, "]", i, i + 1);
        } else if (c == ';' || c == '\uFF1B') {
            addToken(SEMICOLON, ";", i, i + 1);
        } else if (c == '>' || c == '\uFF1E') {
            addToken(GREATER, ">", i, i + 1);
        } else if (c == '<' || c == '\uFF1C') {
            addToken(SMALLER, "<", i, i + 1);
        } else if (c == '!' || c == '\uFF01') {
            addToken(EXCLAMATION, "!", i, i + 1);
        } else if (c == '_' || c == '\uFF3F') {
            addToken(UNDERSCORE, "_", i, i + 1);
        } else if (c == '^' || c == '\uFF3E') {
            addToken(HAT, "^", i, i + 1);
        } else if (c == '*' || c == '\uFF0A') {
            addToken(STAR, "*", i, i + 1);
        } else if (c == '$' || c == '\uFF04') {
            addToken(DOLLAR, "$", i, i + 1);
        } else {
            addToken(NOISE, "<NOISE>", i, i + 1);
        }
    }
    addToken(EOF, "<EOF>", source.length(), source.length());
    source = null;
    return tokens;
}
Also used : Index(com.yahoo.prelude.Index)

Aggregations

Index (com.yahoo.prelude.Index)36 IndexFacts (com.yahoo.prelude.IndexFacts)23 Test (org.junit.Test)13 SimpleLinguistics (com.yahoo.language.simple.SimpleLinguistics)7 Query (com.yahoo.search.Query)6 SearchDefinition (com.yahoo.prelude.SearchDefinition)5 Token (com.yahoo.prelude.query.parser.Token)5 Tokenizer (com.yahoo.prelude.query.parser.Tokenizer)5 Execution (com.yahoo.search.searchchain.Execution)4 JSONString (com.yahoo.prelude.hitfield.JSONString)2 XMLString (com.yahoo.prelude.hitfield.XMLString)2 CompositeItem (com.yahoo.prelude.query.CompositeItem)2 Hit (com.yahoo.search.result.Hit)2 List (java.util.List)2 Before (org.junit.Before)2 Chain (com.yahoo.component.chain.Chain)1 Language (com.yahoo.language.Language)1 StemMode (com.yahoo.language.process.StemMode)1 IndexModel (com.yahoo.prelude.IndexModel)1 FastHit (com.yahoo.prelude.fastsearch.FastHit)1