Search in sources :

Example 1 with TextField

use of org.apache.solr.schema.TextField in project lucene-solr by apache.

the class SolrQueryParserBase method getFieldQuery.

// if raw==true, then it's possible for this method to return a RawQuery that will need to be transformed
// further before using.
protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw) throws SyntaxError {
    checkNullField(field);
    SchemaField sf;
    if (field.equals(lastFieldName)) {
        // only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
        // and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
        sf = lastField;
    } else {
        // own functions.
        if (field.charAt(0) == '_' && parser != null) {
            MagicFieldName magic = MagicFieldName.get(field);
            if (null != magic) {
                subQParser = parser.subQuery(queryText, magic.subParser);
                return subQParser.getQuery();
            }
        }
        lastFieldName = field;
        sf = lastField = schema.getFieldOrNull(field);
    }
    if (sf != null) {
        FieldType ft = sf.getType();
        // delegate to type for everything except tokenized fields
        if (ft.isTokenized() && sf.indexed()) {
            boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
            boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
            return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
        } else {
            if (raw) {
                return new RawQuery(sf, queryText);
            } else {
                return ft.getFieldQuery(parser, sf, queryText);
            }
        }
    }
    // default to a normal field query
    return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true);
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) TextField(org.apache.solr.schema.TextField) FieldType(org.apache.solr.schema.FieldType)

Example 2 with TextField

use of org.apache.solr.schema.TextField in project lucene-solr by apache.

the class SolrQueryParserBase method getFieldQuery.

// Assumption: quoted is always false
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
    checkNullField(field);
    SchemaField sf;
    if (field.equals(lastFieldName)) {
        // only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
        // and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
        sf = lastField;
    } else {
        // own functions.
        if (field.charAt(0) == '_' && parser != null) {
            MagicFieldName magic = MagicFieldName.get(field);
            if (null != magic) {
                subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser);
                return subQParser.getQuery();
            }
        }
        lastFieldName = field;
        sf = lastField = schema.getFieldOrNull(field);
    }
    if (sf != null) {
        FieldType ft = sf.getType();
        // delegate to type for everything except tokenized fields
        if (ft.isTokenized() && sf.indexed()) {
            String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
            boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
            boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
            return newFieldQuery(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
        } else {
            if (raw) {
                return new RawQuery(sf, queryTerms);
            } else {
                if (queryTerms.size() == 1) {
                    return ft.getFieldQuery(parser, sf, queryTerms.get(0));
                } else {
                    List<Query> subqs = new ArrayList<>();
                    for (String queryTerm : queryTerms) {
                        try {
                            subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
                        } catch (Exception e) {
                        // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
                        // for edismax: ignore parsing failures
                        }
                    }
                    if (subqs.size() == 1) {
                        return subqs.get(0);
                    } else {
                        // delay building boolean query until we must
                        final BooleanClause.Occur occur = operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
                        BooleanQuery.Builder booleanBuilder = newBooleanQuery();
                        subqs.forEach(subq -> booleanBuilder.add(subq, occur));
                        return booleanBuilder.build();
                    }
                }
            }
        }
    }
    // default to a normal field query
    String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
    return newFieldQuery(getAnalyzer(), field, queryText, false, false, true);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) FilterQuery(org.apache.solr.query.FilterQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) ArrayList(java.util.ArrayList) SolrException(org.apache.solr.common.SolrException) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) BooleanClause(org.apache.lucene.search.BooleanClause) TextField(org.apache.solr.schema.TextField)

Example 3 with TextField

use of org.apache.solr.schema.TextField in project lucene-solr by apache.

the class TestValueSource method parseTerm.

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
private static TInfo parseTerm(FunctionQParser fp) throws SyntaxError {
    TInfo tinfo = new TInfo();
    tinfo.indexedField = tinfo.field = fp.parseArg();
    tinfo.val = fp.parseArg();
    tinfo.indexedBytes = new BytesRefBuilder();
    FieldType ft = fp.getReq().getSchema().getFieldTypeNoEx(tinfo.field);
    if (ft == null)
        ft = new StrField();
    if (ft instanceof TextField) {
        // need to do analysis on the term
        String indexedVal = tinfo.val;
        Query q = ft.getFieldQuery(fp, fp.getReq().getSchema().getFieldOrNull(tinfo.field), tinfo.val);
        if (q instanceof TermQuery) {
            Term term = ((TermQuery) q).getTerm();
            tinfo.indexedField = term.field();
            indexedVal = term.text();
        }
        tinfo.indexedBytes.copyChars(indexedVal);
    } else {
        ft.readableToIndexed(tinfo.val, tinfo.indexedBytes);
    }
    return tinfo;
}
Also used : StrField(org.apache.solr.schema.StrField) TermQuery(org.apache.lucene.search.TermQuery) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BoostedQuery(org.apache.lucene.queries.function.BoostedQuery) TextField(org.apache.solr.schema.TextField) Term(org.apache.lucene.index.Term) FieldType(org.apache.solr.schema.FieldType)

Example 4 with TextField

use of org.apache.solr.schema.TextField in project lucene-solr by apache.

the class FieldAnalysisRequestHandlerTest method testCustomAttribute.

//See SOLR-8460
@Test
public void testCustomAttribute() throws Exception {
    FieldAnalysisRequest request = new FieldAnalysisRequest();
    request.addFieldType("skutype1");
    request.setFieldValue("hi, 3456-12 a Test");
    request.setShowMatch(false);
    FieldType fieldType = new TextField();
    Analyzer analyzer = new TokenizerChain(new TokenizerFactory(Collections.emptyMap()) {

        @Override
        public Tokenizer create(AttributeFactory factory) {
            return new CustomTokenizer(factory);
        }
    }, new TokenFilterFactory[] { new TokenFilterFactory(Collections.emptyMap()) {

        @Override
        public TokenStream create(TokenStream input) {
            return new CustomTokenFilter(input);
        }
    } });
    fieldType.setIndexAnalyzer(analyzer);
    NamedList<NamedList> result = handler.analyzeValues(request, fieldType, "fieldNameUnused");
    // just test that we see "900" in the flags attribute here
    List<NamedList> tokenInfoList = (List<NamedList>) result.findRecursive("index", CustomTokenFilter.class.getName());
    // '1' from CustomTokenFilter plus 900 from CustomFlagsAttributeImpl.
    assertEquals(901, tokenInfoList.get(0).get("org.apache.lucene.analysis.tokenattributes.FlagsAttribute#flags"));
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) NamedList(org.apache.solr.common.util.NamedList) AttributeFactory(org.apache.lucene.util.AttributeFactory) Analyzer(org.apache.lucene.analysis.Analyzer) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) FieldType(org.apache.solr.schema.FieldType) TokenizerChain(org.apache.solr.analysis.TokenizerChain) TextField(org.apache.solr.schema.TextField) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) FieldAnalysisRequest(org.apache.solr.client.solrj.request.FieldAnalysisRequest) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Test(org.junit.Test)

Example 5 with TextField

use of org.apache.solr.schema.TextField in project lucene-solr by apache.

the class SolrQueryParserBase method getBooleanQuery.

/**
   * Factory method for generating query, given a set of clauses.
   * By default creates a boolean query composed of clauses passed in.
   *
   * Can be overridden by extending classes, to modify query being
   * returned.
   *
   * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances
   *    to join.
   *
   * @return Resulting {@link org.apache.lucene.search.Query} object.
   */
protected Query getBooleanQuery(List<BooleanClause> clauses) throws SyntaxError {
    if (clauses.size() == 0) {
        // all clause words were filtered away by the analyzer.
        return null;
    }
    SchemaField sfield = null;
    List<RawQuery> fieldValues = null;
    boolean onlyRawQueries = true;
    int allRawQueriesTermCount = 0;
    for (BooleanClause clause : clauses) {
        if (clause.getQuery() instanceof RawQuery) {
            allRawQueriesTermCount += ((RawQuery) clause.getQuery()).getTermCount();
        } else {
            onlyRawQueries = false;
        }
    }
    boolean useTermsQuery = (flags & QParser.FLAG_FILTER) != 0 && allRawQueriesTermCount > TERMS_QUERY_THRESHOLD;
    BooleanQuery.Builder booleanBuilder = newBooleanQuery();
    Map<SchemaField, List<RawQuery>> fmap = new HashMap<>();
    for (BooleanClause clause : clauses) {
        Query subq = clause.getQuery();
        if (subq instanceof RawQuery) {
            if (clause.getOccur() != BooleanClause.Occur.SHOULD) {
                // We only collect optional terms for set queries.  Since this isn't optional,
                // convert the raw query to a normal query and handle as usual.
                clause = new BooleanClause(rawToNormal(subq), clause.getOccur());
            } else {
                // Optional raw query.
                RawQuery rawq = (RawQuery) subq;
                // only look up fmap and type info on a field change
                if (sfield != rawq.sfield) {
                    sfield = rawq.sfield;
                    fieldValues = fmap.get(sfield);
                    // the "useTermQuery" check.
                    if ((fieldValues == null && useTermsQuery) || !sfield.indexed()) {
                        fieldValues = new ArrayList<>(2);
                        fmap.put(sfield, fieldValues);
                    }
                }
                if (fieldValues != null) {
                    fieldValues.add(rawq);
                    continue;
                }
                clause = new BooleanClause(rawToNormal(subq), clause.getOccur());
            }
        }
        booleanBuilder.add(clause);
    }
    for (Map.Entry<SchemaField, List<RawQuery>> entry : fmap.entrySet()) {
        sfield = entry.getKey();
        fieldValues = entry.getValue();
        FieldType ft = sfield.getType();
        // TODO: pull more of this logic out to FieldType?  We would need to be able to add clauses to our existing booleanBuilder.
        int termCount = fieldValues.stream().mapToInt(RawQuery::getTermCount).sum();
        if ((sfield.indexed() && termCount < TERMS_QUERY_THRESHOLD) || termCount == 1) {
            // use boolean query instead
            for (RawQuery rawq : fieldValues) {
                Query subq;
                if (ft.isTokenized() && sfield.indexed()) {
                    boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
                    boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
                    subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(), false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
                    booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
                } else {
                    for (String externalVal : rawq.getExternalVals()) {
                        subq = ft.getFieldQuery(this.parser, sfield, externalVal);
                        booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
                    }
                }
            }
        } else {
            List<String> externalVals = fieldValues.stream().flatMap(rawq -> rawq.getExternalVals().stream()).collect(Collectors.toList());
            Query subq = ft.getSetQuery(this.parser, sfield, externalVals);
            // if this is everything, don't wrap in a boolean query
            if (onlyRawQueries && termCount == allRawQueriesTermCount)
                return subq;
            booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
        }
    }
    BooleanQuery bq = booleanBuilder.build();
    if (bq.clauses().size() == 1) {
        // Unwrap single SHOULD query
        BooleanClause clause = bq.clauses().iterator().next();
        if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            return clause.getQuery();
        }
    }
    return bq;
}
Also used : Query(org.apache.lucene.search.Query) QParser(org.apache.solr.search.QParser) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) FieldType(org.apache.solr.schema.FieldType) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) QueryBuilder(org.apache.lucene.util.QueryBuilder) SolrException(org.apache.solr.common.SolrException) SchemaField(org.apache.solr.schema.SchemaField) RegexpQuery(org.apache.lucene.search.RegexpQuery) Operations(org.apache.lucene.util.automaton.Operations) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SyntaxError(org.apache.solr.search.SyntaxError) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Map(java.util.Map) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) EnumSet(java.util.EnumSet) Automata(org.apache.lucene.util.automaton.Automata) ReversedWildcardFilterFactory(org.apache.solr.analysis.ReversedWildcardFilterFactory) TokenizerChain(org.apache.solr.analysis.TokenizerChain) TextField(org.apache.solr.schema.TextField) Automaton(org.apache.lucene.util.automaton.Automaton) Analyzer(org.apache.lucene.analysis.Analyzer) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Collectors(java.util.stream.Collectors) WildcardQuery(org.apache.lucene.search.WildcardQuery) BooleanClause(org.apache.lucene.search.BooleanClause) IndexSchema(org.apache.solr.schema.IndexSchema) List(java.util.List) FilterQuery(org.apache.solr.query.FilterQuery) StringReader(java.io.StringReader) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) Collections(java.util.Collections) ReverseStringFilter(org.apache.lucene.analysis.reverse.ReverseStringFilter) Operator(org.apache.solr.parser.QueryParser.Operator) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) FilterQuery(org.apache.solr.query.FilterQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) BooleanClause(org.apache.lucene.search.BooleanClause) TextField(org.apache.solr.schema.TextField) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

TextField (org.apache.solr.schema.TextField)6 FieldType (org.apache.solr.schema.FieldType)5 SchemaField (org.apache.solr.schema.SchemaField)4 ArrayList (java.util.ArrayList)3 Query (org.apache.lucene.search.Query)3 List (java.util.List)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)2 Term (org.apache.lucene.index.Term)2 AutomatonQuery (org.apache.lucene.search.AutomatonQuery)2 BooleanClause (org.apache.lucene.search.BooleanClause)2 BooleanQuery (org.apache.lucene.search.BooleanQuery)2 BoostQuery (org.apache.lucene.search.BoostQuery)2 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)2 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)2 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)2 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)2 MultiTermQuery (org.apache.lucene.search.MultiTermQuery)2 PhraseQuery (org.apache.lucene.search.PhraseQuery)2 RegexpQuery (org.apache.lucene.search.RegexpQuery)2