Search in sources :

Example 66 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class FieldAnalysisRequestHandlerTest method testCustomAttribute.

//See SOLR-8460
@Test
public void testCustomAttribute() throws Exception {
    FieldAnalysisRequest request = new FieldAnalysisRequest();
    request.addFieldType("skutype1");
    request.setFieldValue("hi, 3456-12 a Test");
    request.setShowMatch(false);
    FieldType fieldType = new TextField();
    Analyzer analyzer = new TokenizerChain(new TokenizerFactory(Collections.emptyMap()) {

        @Override
        public Tokenizer create(AttributeFactory factory) {
            return new CustomTokenizer(factory);
        }
    }, new TokenFilterFactory[] { new TokenFilterFactory(Collections.emptyMap()) {

        @Override
        public TokenStream create(TokenStream input) {
            return new CustomTokenFilter(input);
        }
    } });
    fieldType.setIndexAnalyzer(analyzer);
    NamedList<NamedList> result = handler.analyzeValues(request, fieldType, "fieldNameUnused");
    // just test that we see "900" in the flags attribute here
    List<NamedList> tokenInfoList = (List<NamedList>) result.findRecursive("index", CustomTokenFilter.class.getName());
    // '1' from CustomTokenFilter plus 900 from CustomFlagsAttributeImpl.
    assertEquals(901, tokenInfoList.get(0).get("org.apache.lucene.analysis.tokenattributes.FlagsAttribute#flags"));
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) NamedList(org.apache.solr.common.util.NamedList) AttributeFactory(org.apache.lucene.util.AttributeFactory) Analyzer(org.apache.lucene.analysis.Analyzer) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) FieldType(org.apache.solr.schema.FieldType) TokenizerChain(org.apache.solr.analysis.TokenizerChain) TextField(org.apache.solr.schema.TextField) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) FieldAnalysisRequest(org.apache.solr.client.solrj.request.FieldAnalysisRequest) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Test(org.junit.Test)

Example 67 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class SolrCore method initCodec.

private Codec initCodec(SolrConfig solrConfig, final IndexSchema schema) {
    final PluginInfo info = solrConfig.getPluginInfo(CodecFactory.class.getName());
    final CodecFactory factory;
    if (info != null) {
        factory = schema.getResourceLoader().newInstance(info.className, CodecFactory.class);
        factory.init(info.initArgs);
    } else {
        factory = new CodecFactory() {

            @Override
            public Codec getCodec() {
                return Codec.getDefault();
            }
        };
    }
    if (factory instanceof SolrCoreAware) {
        // CodecFactory needs SolrCore before inform() is called on all registered
        // SolrCoreAware listeners, at the end of the SolrCore constructor
        ((SolrCoreAware) factory).inform(this);
    } else {
        for (FieldType ft : schema.getFieldTypes().values()) {
            if (null != ft.getPostingsFormat()) {
                String msg = "FieldType '" + ft.getTypeName() + "' is configured with a postings format, but the codec does not support it: " + factory.getClass();
                log.error(msg);
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
            }
            if (null != ft.getDocValuesFormat()) {
                String msg = "FieldType '" + ft.getTypeName() + "' is configured with a docValues format, but the codec does not support it: " + factory.getClass();
                log.error(msg);
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
            }
        }
    }
    return factory.getCodec();
}
Also used : Codec(org.apache.lucene.codecs.Codec) SolrCoreAware(org.apache.solr.util.plugin.SolrCoreAware) SolrException(org.apache.solr.common.SolrException) FieldType(org.apache.solr.schema.FieldType)

Example 68 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class SimpleFacets method getGroupedCounts.

public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher, DocSet base, String field, boolean multiToken, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter) throws IOException {
    GroupingSpecification groupingSpecification = rb.getGroupingSpec();
    final String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null;
    if (groupField == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter");
    }
    BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null;
    final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBytesRef, 128);
    Collector groupWrapper = getInsanityWrapper(groupField, collector);
    Collector fieldWrapper = getInsanityWrapper(field, groupWrapper);
    // When GroupedFacetCollector can handle numerics we can remove the wrapped collectors
    searcher.search(base.getTopFilter(), fieldWrapper);
    boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY);
    TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(limit < 0 ? Integer.MAX_VALUE : (offset + limit), mincount, orderByCount);
    CharsRefBuilder charsRef = new CharsRefBuilder();
    FieldType facetFieldType = searcher.getSchema().getFieldType(field);
    NamedList<Integer> facetCounts = new NamedList<>();
    List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit);
    for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) {
        //:TODO:can we filter earlier than this to make it more efficient?
        if (termFilter != null && !termFilter.test(facetEntry.getValue())) {
            continue;
        }
        facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef);
        facetCounts.add(charsRef.toString(), facetEntry.getCount());
    }
    if (missing) {
        facetCounts.add(null, result.getTotalMissingCount());
    }
    return facetCounts;
}
Also used : TermGroupFacetCollector(org.apache.lucene.search.grouping.TermGroupFacetCollector) NamedList(org.apache.solr.common.util.NamedList) FieldType(org.apache.solr.schema.FieldType) AllGroupHeadsCollector(org.apache.lucene.search.grouping.AllGroupHeadsCollector) LeafCollector(org.apache.lucene.search.LeafCollector) AllGroupsCollector(org.apache.lucene.search.grouping.AllGroupsCollector) FilterCollector(org.apache.lucene.search.FilterCollector) TermGroupFacetCollector(org.apache.lucene.search.grouping.TermGroupFacetCollector) Collector(org.apache.lucene.search.Collector) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) GroupingSpecification(org.apache.solr.search.grouping.GroupingSpecification) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef)

Example 69 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class SimpleFacets method getTermCounts.

/**
   * Term counts for use in field faceting that resepcts the specified mincount - 
   * if mincount is null, the "zeros" param is consulted for the appropriate backcompat 
   * default
   *
   * @see FacetParams#FACET_ZEROS
   */
private NamedList<Integer> getTermCounts(String field, Integer mincount, ParsedParams parsed) throws IOException {
    final SolrParams params = parsed.params;
    final DocSet docs = parsed.docs;
    final int threads = parsed.threads;
    int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
    int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
    if (limit == 0)
        return new NamedList<>();
    if (mincount == null) {
        Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
        // mincount = (zeros!=null && zeros) ? 0 : 1;
        mincount = (zeros != null && !zeros) ? 1 : 0;
    // current default is to include zeros.
    }
    boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
    // default to sorting if there is a limit.
    String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit > 0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
    String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
    final Predicate<BytesRef> termFilter = newBytesRefFilter(field, params);
    boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
    NamedList<Integer> counts;
    SchemaField sf = searcher.getSchema().getField(field);
    if (sf.getType().isPointField() && !sf.hasDocValues()) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't facet on a PointField without docValues");
    }
    FieldType ft = sf.getType();
    // determine what type of faceting method to use
    final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
    final FacetMethod requestedMethod;
    if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
        requestedMethod = FacetMethod.ENUM;
    } else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
        requestedMethod = FacetMethod.FCS;
    } else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
        requestedMethod = FacetMethod.FC;
    } else if (FacetParams.FACET_METHOD_uif.equals(methodStr)) {
        requestedMethod = FacetMethod.UIF;
    } else {
        requestedMethod = null;
    }
    final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
    FacetMethod appliedFacetMethod = selectFacetMethod(field, sf, requestedMethod, mincount, exists);
    RTimer timer = null;
    if (fdebug != null) {
        fdebug.putInfoItem("requestedMethod", requestedMethod == null ? "not specified" : requestedMethod.name());
        fdebug.putInfoItem("appliedMethod", appliedFacetMethod.name());
        fdebug.putInfoItem("inputDocSetSize", docs.size());
        fdebug.putInfoItem("field", field);
        timer = new RTimer();
    }
    if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
        counts = getGroupedCounts(searcher, docs, field, multiToken, offset, limit, mincount, missing, sort, prefix, termFilter);
    } else {
        assert appliedFacetMethod != null;
        switch(appliedFacetMethod) {
            case ENUM:
                assert TrieField.getMainValuePrefix(ft) == null;
                counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter, exists);
                break;
            case FCS:
                assert ft.isPointField() || !multiToken;
                if (ft.isPointField() || (ft.getNumberType() != null && !sf.multiValued())) {
                    if (prefix != null) {
                        throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
                    }
                    if (termFilter != null) {
                        throw new SolrException(ErrorCode.BAD_REQUEST, "BytesRef term filters (" + FacetParams.FACET_CONTAINS + ", " + FacetParams.FACET_EXCLUDETERMS + ") are not supported on numeric types");
                    }
                    //            We should do this, but mincount=0 is currently the default
                    //            if (ft.isPointField() && mincount <= 0) {
                    //              throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_MINCOUNT + " <= 0 is not supported on point types");
                    //            }
                    counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
                } else {
                    PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter);
                    Executor executor = threads == 0 ? directExecutor : facetExecutor;
                    ps.setNumThreads(threads);
                    counts = ps.getFacetCounts(executor);
                }
                break;
            case UIF:
                //Emulate the JSON Faceting structure so we can use the same parsing classes
                Map<String, Object> jsonFacet = new HashMap<>(13);
                jsonFacet.put("type", "terms");
                jsonFacet.put("field", field);
                jsonFacet.put("offset", offset);
                jsonFacet.put("limit", limit);
                jsonFacet.put("mincount", mincount);
                jsonFacet.put("missing", missing);
                jsonFacet.put("prefix", prefix);
                jsonFacet.put("numBuckets", params.getFieldBool(field, "numBuckets", false));
                jsonFacet.put("allBuckets", params.getFieldBool(field, "allBuckets", false));
                jsonFacet.put("method", "uif");
                jsonFacet.put("cacheDf", 0);
                jsonFacet.put("perSeg", false);
                final String sortVal;
                switch(sort) {
                    case FacetParams.FACET_SORT_COUNT_LEGACY:
                        sortVal = FacetParams.FACET_SORT_COUNT;
                        break;
                    case FacetParams.FACET_SORT_INDEX_LEGACY:
                        sortVal = FacetParams.FACET_SORT_INDEX;
                        break;
                    default:
                        sortVal = sort;
                }
                jsonFacet.put(SORT, sortVal);
                Map<String, Object> topLevel = new HashMap<>();
                topLevel.put(field, jsonFacet);
                topLevel.put("processEmpty", true);
                FacetProcessor fproc = // rb.getResults().docSet
                FacetProcessor.createProcessor(// rb.getResults().docSet
                rb.req, // rb.getResults().docSet
                topLevel, docs);
                //TODO do we handle debug?  Should probably already be handled by the legacy code
                fproc.process();
                //Go through the response to build the expected output for SimpleFacets
                Object res = fproc.getResponse();
                counts = new NamedList<Integer>();
                if (res != null) {
                    SimpleOrderedMap<Object> som = (SimpleOrderedMap<Object>) res;
                    SimpleOrderedMap<Object> asdf = (SimpleOrderedMap<Object>) som.get(field);
                    List<SimpleOrderedMap<Object>> buckets = (List<SimpleOrderedMap<Object>>) asdf.get("buckets");
                    for (SimpleOrderedMap<Object> b : buckets) {
                        counts.add(b.get("val").toString(), (Integer) b.get("count"));
                    }
                    if (missing) {
                        SimpleOrderedMap<Object> missingCounts = (SimpleOrderedMap<Object>) asdf.get("missing");
                        counts.add(null, (Integer) missingCounts.get("count"));
                    }
                }
                break;
            case FC:
                counts = DocValuesFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter, fdebug);
                break;
            default:
                throw new AssertionError();
        }
    }
    if (fdebug != null) {
        long timeElapsed = (long) timer.getTime();
        fdebug.setElapse(timeElapsed);
    }
    return counts;
}
Also used : IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) FacetProcessor(org.apache.solr.search.facet.FacetProcessor) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Executor(java.util.concurrent.Executor) List(java.util.List) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) BytesRef(org.apache.lucene.util.BytesRef) SolrException(org.apache.solr.common.SolrException) RTimer(org.apache.solr.util.RTimer) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) RequiredSolrParams(org.apache.solr.common.params.RequiredSolrParams) SolrParams(org.apache.solr.common.params.SolrParams) HashDocSet(org.apache.solr.search.HashDocSet) DocSet(org.apache.solr.search.DocSet) SortedIntDocSet(org.apache.solr.search.SortedIntDocSet) BitDocSet(org.apache.solr.search.BitDocSet)

Example 70 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class SolrQueryParserBase method rawToNormal.

// Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
Query rawToNormal(Query q) {
    Query normal = q;
    if (q instanceof RawQuery) {
        RawQuery rawq = (RawQuery) q;
        if (rawq.sfield.getType().isTokenized()) {
            normal = rawq.sfield.getType().getFieldQuery(parser, rawq.sfield, rawq.getJoinedExternalVal());
        } else {
            FieldType ft = rawq.sfield.getType();
            if (rawq.getTermCount() == 1) {
                normal = ft.getFieldQuery(this.parser, rawq.sfield, rawq.getExternalVals().get(0));
            } else {
                BooleanQuery.Builder booleanBuilder = newBooleanQuery();
                for (String externalVal : rawq.getExternalVals()) {
                    Query subq = ft.getFieldQuery(this.parser, rawq.sfield, externalVal);
                    booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
                }
                normal = booleanBuilder.build();
            }
        }
    }
    return normal;
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) FilterQuery(org.apache.solr.query.FilterQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) FieldType(org.apache.solr.schema.FieldType)

Aggregations

FieldType (org.apache.solr.schema.FieldType)93 SchemaField (org.apache.solr.schema.SchemaField)37 SolrException (org.apache.solr.common.SolrException)29 ArrayList (java.util.ArrayList)23 BytesRef (org.apache.lucene.util.BytesRef)23 NamedList (org.apache.solr.common.util.NamedList)23 IOException (java.io.IOException)18 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)15 IndexSchema (org.apache.solr.schema.IndexSchema)14 Query (org.apache.lucene.search.Query)13 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)13 Analyzer (org.apache.lucene.analysis.Analyzer)12 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)10 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)10 StrField (org.apache.solr.schema.StrField)8 HashMap (java.util.HashMap)7 List (java.util.List)7 Map (java.util.Map)7 DocIterator (org.apache.solr.search.DocIterator)7 DocList (org.apache.solr.search.DocList)7