Search in sources :

Example 6 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project stanbol by apache.

the class IndexConfiguration method processFstConfig.

/**
     * This method combines the {@link #fstConfig} with the data present in the
     * {@link SolrCore}. 
     * <p>
     * As information for fields are only available when a
     * field was actually used by a document stored in the index one needs to
     * inspect the index after every change. 
     * <p>
     * An empty Solr index will result in
     * an empty {@link #corpusInfos} map. The first document with an value
     * for the English field will cause an {@link CorpusInfo} for the English
     * language to be created. As soon as the last document with an label for
     * a given language will be deleted the {@link CorpusInfo} for that language
     * will also disappear.
     * @param indexVersion the current version of the {@link #index} to process
     * the FST config for.
     * <p>
     * This method acquires a write lock on {@link #corpusInfoLock} while it
     * inspects the Solr index
     * @param indexReader The {@link AtomicReader} has access to the actual
     * fields present in the {@link SolrCore}. It is used to compare field
     * configurations in the {@link #fstConfig} with fields present in the Solr
     * {@link #index}.
     * @return If any {@link CorpusInfo FST configuration} where found during
     * inspecting the Solr {@link #index}
     */
private boolean processFstConfig(long indexVersion, AtomicReader indexReader) {
    //first check if the Solr index was updated
    corpusInfoLock.readLock().lock();
    try {
        if (indexVersion == this.indexVersion) {
            //nothing to do
            return !corpusInfos.isEmpty();
        }
    } finally {
        corpusInfoLock.readLock().unlock();
    }
    log.debug("> {} FST config for {} (FST dir: {})", corpusInfos == null ? "create" : "update", index.getName(), fstDirectory.getAbsolutePath());
    boolean foundCorpus = false;
    corpusInfoLock.writeLock().lock();
    try {
        this.indexVersion = indexVersion;
        IndexSchema schema = index.getLatestSchema();
        Map<String, CorpusInfo> corpusInfosCopy;
        if (corpusInfos == null) {
            //first call
            //init the field
            corpusInfos = new HashMap<String, CorpusInfo>();
            corpusInfosCopy = new HashMap<String, CorpusInfo>();
        } else {
            corpusInfosCopy = new HashMap<String, CorpusInfo>(corpusInfos);
            //clear the old data
            corpusInfos.clear();
        }
        //(0) get basic parameters of the default configuration
        log.debug(" - default config");
        Map<String, String> defaultParams = fstConfig.getDefaultParameters();
        String fstName = defaultParams.get(IndexConfiguration.PARAM_FST);
        String indexField = defaultParams.get(IndexConfiguration.PARAM_FIELD);
        String storeField = defaultParams.get(IndexConfiguration.PARAM_STORE_FIELD);
        if (storeField == null) {
            //apply indexField as default if indexField is NOT NULL
            storeField = indexField;
        }
        if (indexField == null) {
            //apply the defaults if null
            indexField = IndexConfiguration.DEFAULT_FIELD;
        }
        if (fstName == null) {
            //use default
            fstName = getDefaultFstFileName(indexField);
        }
        //This are all fields actually present in the index (distinguished with
        //those defined in the schema). This also includes actual instances of
        //dynamic field definition in the schema.
        //we need this twice
        FieldInfos fieldInfos = indexReader.getFieldInfos();
        //NOTE: this needs only do be done if wildcards are enabled in the fstConfig
        if (fstConfig.useWildcard()) {
            //(1.a) search for present FST files in the FST directory
            Map<String, File> presentFstFiles = new HashMap<String, File>();
            WildcardFileFilter fstFilter = new WildcardFileFilter(fstName + ".*.fst");
            Iterator<File> fstFiles = FileUtils.iterateFiles(fstDirectory, fstFilter, null);
            while (fstFiles.hasNext()) {
                File fstFile = fstFiles.next();
                String fstFileName = fstFile.getName();
                //files are named such as "{name}.{lang}.fst"
                String language = FilenameUtils.getExtension(FilenameUtils.getBaseName(fstFileName));
                presentFstFiles.put(language, fstFile);
            }
            //(1.b) iterate over the fields in the Solr index and search for 
            //      matches against the configured indexField name
            String fieldWildcard = FieldEncodingEnum.encodeLanguage(indexField, fieldEncoding, "*");
            for (FieldInfo fieldInfo : fieldInfos) {
                //try to match the field names against the wildcard
                if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
                    //for matches parse the language from the field name
                    String language = FieldEncodingEnum.parseLanguage(fieldInfo.name, fieldEncoding, indexField);
                    if (//successfully parsed language
                    language != null && //is current language is enabled? 
                    fstConfig.isLanguage(language) && //is there no explicit configuration for this language?
                    !fstConfig.getExplicitlyIncluded().contains(language)) {
                        //generate the FST file name
                        StringBuilder fstFileName = new StringBuilder(fstName);
                        if (!language.isEmpty()) {
                            fstFileName.append('.').append(language);
                        }
                        fstFileName.append(".fst");
                        File fstFile = new File(fstDirectory, fstFileName.toString());
                        //get the FieldType of the field from the Solr schema
                        FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
                        if (fieldType != null) {
                            //if the fieldType is present
                            if (runtimeGeneration || fstFile.isFile()) {
                                //and FST is present or can be created
                                //we need also to check if the stored field with
                                //the labels is present
                                //get the stored Field and check if it is present!
                                String storeFieldName;
                                if (storeField == null) {
                                    //storeField == indexField
                                    storeFieldName = fieldInfo.name;
                                } else {
                                    // check that the storeField is present in the index
                                    storeFieldName = FieldEncodingEnum.encodeLanguage(storeField, fieldEncoding, language);
                                    FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
                                    if (storedFieldInfos == null) {
                                        log.debug(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
                                        storeFieldName = null;
                                    }
                                }
                                if (storeFieldName != null) {
                                    // == valid configuration
                                    CorpusInfo fstInfo = corpusInfosCopy.get(language);
                                    if (//new one
                                    fstInfo == null || //index field compatible
                                    !fstInfo.indexedField.equals(fieldInfo.name) || !fstInfo.storedField.equals(storeFieldName)) {
                                        //store field compatible
                                        CorpusInfo newFstInfo = new CorpusInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile, runtimeGeneration);
                                        log.debug(" ... {} {} ", fstInfo == null ? "create" : "update", newFstInfo);
                                        addCorpusInfo(newFstInfo);
                                        corpusInfosCopy.put(language, newFstInfo);
                                    } else {
                                        //no change in the SolrIndex ... use the exsisting CorpusInfo
                                        addCorpusInfo(fstInfo);
                                    }
                                    foundCorpus = true;
                                }
                            } else {
                                log.debug(" ... ignore language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, fieldInfo.name, fstFile.getAbsolutePath() });
                            }
                        } else {
                            log.debug(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
                        }
                    }
                //else the field matched the wildcard, but has not passed the
                //encoding test.
                }
            //Solr field does not match the field definition in the config
            }
        // end iterate over all fields in the SolrIndex
        }
        //(2) process explicit configuration for configured languages
        for (String language : fstConfig.getExplicitlyIncluded()) {
            //(2.a) get the language specific config (with fallback to default)
            Map<String, String> config = fstConfig.getParameters(language);
            String langIndexField = config.get(IndexConfiguration.PARAM_FIELD);
            String langStoreField = config.get(IndexConfiguration.PARAM_STORE_FIELD);
            String langFstFileName = config.get(IndexConfiguration.PARAM_FST);
            final boolean langAllowCreation;
            final String langAllowCreationString = config.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
            if (langIndexField != null) {
                //also consider explicit field names as default for the fst name
                if (langFstFileName == null) {
                    StringBuilder fileName = new StringBuilder(getDefaultFstFileName(langIndexField));
                    if (!language.isEmpty()) {
                        fileName.append('.').append(language);
                    }
                    fileName.append(".fst");
                    langFstFileName = fileName.toString();
                }
            } else {
                langIndexField = indexField;
            }
            if (langStoreField == null) {
                //fallbacks
                if (storeField != null) {
                    //first to default store field
                    langStoreField = storeField;
                } else {
                    //else to the lang index field
                    langStoreField = langIndexField;
                }
            }
            if (langFstFileName == null) {
                //no fstFileName config
                // ... use the default
                langFstFileName = new StringBuilder(fstName).append('.').append(language).append(".fst").toString();
            }
            if (langAllowCreationString != null) {
                langAllowCreation = Boolean.parseBoolean(langAllowCreationString);
            } else {
                langAllowCreation = runtimeGeneration;
            }
            //(2.b) check if the Solr field is present
            String encodedLangIndexField = FieldEncodingEnum.encodeLanguage(langIndexField, fieldEncoding, language);
            String encodedLangStoreField = FieldEncodingEnum.encodeLanguage(langStoreField, fieldEncoding, language);
            FieldInfo langIndexFieldInfo = fieldInfos.fieldInfo(encodedLangIndexField);
            if (langIndexFieldInfo != null) {
                FieldInfo langStoreFieldInfo = fieldInfos.fieldInfo(encodedLangStoreField);
                if (langStoreFieldInfo != null) {
                    FieldType fieldType = schema.getFieldTypeNoEx(langIndexFieldInfo.name);
                    if (fieldType != null) {
                        //(2.c) check the FST file
                        File langFstFile = new File(fstDirectory, langFstFileName);
                        if (langFstFile.isFile() || langAllowCreation) {
                            CorpusInfo langFstInfo = corpusInfosCopy.get(language);
                            if (//new one
                            langFstInfo == null || //index field compatible
                            !langFstInfo.indexedField.equals(encodedLangIndexField) || !langFstInfo.storedField.equals(encodedLangStoreField)) {
                                //store field compatible
                                CorpusInfo newLangFstInfo = new CorpusInfo(language, encodedLangIndexField, encodedLangStoreField, fieldType, langFstFile, langAllowCreation);
                                log.debug("   ... {} {} for explicitly configured language", langFstInfo == null ? "create" : "update", newLangFstInfo);
                                addCorpusInfo(newLangFstInfo);
                            } else {
                                //we can use the existing instance
                                addCorpusInfo(langFstInfo);
                            }
                            foundCorpus = true;
                        } else {
                            log.debug(" ... ignore explicitly configured language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, langIndexFieldInfo.name, langFstFile.getAbsolutePath() });
                        }
                    } else {
                        log.debug(" ... ignore explicitly configured language {} becuase unknown fieldtype " + "for SolrFied {}", language, langIndexFieldInfo.name);
                    }
                } else {
                    log.debug(" ... ignore explicitly configured language {} because configured stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, langStoreField, langIndexFieldInfo.name });
                }
            } else {
                log.debug(" ... ignore explicitly configured language {} because configured field {} (encoded: {}) " + "is not present in the SolrIndex!", new Object[] { language, langIndexField, encodedLangIndexField });
            }
        }
    } finally {
        corpusInfoLock.writeLock().unlock();
    }
    return foundCorpus;
}
Also used : HashMap(java.util.HashMap) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) FieldType(org.apache.solr.schema.FieldType) FieldInfos(org.apache.lucene.index.FieldInfos) IndexSchema(org.apache.solr.schema.IndexSchema) File(java.io.File) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 7 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project elasticsearch-skywalker by jprante.

the class DocumentReconstructor method reconstruct.

/**
     * Reconstruct an index shard
     *
     * @return reconstructed document
     * @throws Exception
     */
public XContentBuilder reconstruct(int shardId) throws IOException {
    XContentBuilder builder = jsonBuilder();
    builder.startObject().field("shardId", shardId).field("numDeletions", reader.numDeletedDocs());
    builder.startArray("docs");
    FieldInfos fieldInfos = reader.getFieldInfos();
    Bits live = MultiFields.getLiveDocs(reader);
    for (int docNum = 0; docNum < reader.maxDoc(); docNum++) {
        Document doc = reader.document(docNum);
        if (live != null && live.get(docNum)) {
            // not deleted
            continue;
        }
        builder.startObject().startArray("fields");
        if (fieldInfos != null) {
            for (FieldInfo fi : fieldInfos) {
                String name = fi.name;
                IndexableField[] fs = doc.getFields(name);
                if (fs != null && fs.length > 0) {
                    for (IndexableField f : fs) {
                        IndexableFieldToXContent x = new IndexableFieldToXContent().field(f);
                        x.toXContent(builder, ToXContent.EMPTY_PARAMS);
                    }
                }
            }
        }
        builder.endArray();
        builder.startArray("terms");
        if (fieldInfos != null) {
            TermsEnum te = null;
            DocsAndPositionsEnum dpe = null;
            for (FieldInfo fi : fieldInfos) {
                Terms terms = MultiFields.getTerms(reader, fi.name);
                if (terms == null) {
                    // no terms in this field
                    continue;
                }
                te = terms.iterator(te);
                while (te.next() != null) {
                    DocsAndPositionsEnum newDpe = te.docsAndPositions(live, dpe, 0);
                    if (newDpe == null) {
                        // no position info for this field
                        break;
                    }
                    dpe = newDpe;
                    int num = dpe.advance(docNum);
                    if (num != docNum) {
                        // no data for this term in this doc
                        continue;
                    }
                    String text = te.term().utf8ToString();
                    List<Integer> positions = new ArrayList();
                    List<Integer> starts = new ArrayList();
                    List<Integer> ends = new ArrayList();
                    for (int k = 0; k < dpe.freq(); k++) {
                        int pos = dpe.nextPosition();
                        positions.add(pos);
                        starts.add(dpe.startOffset());
                        ends.add(dpe.endOffset());
                    }
                    builder.startObject().field("text", text).field("positions", positions).field("starts", starts).field("ends", ends).field("count", dpe.freq()).endObject();
                }
            }
        }
        builder.endArray();
        builder.endObject();
    }
    builder.endArray();
    builder.endObject();
    return builder;
}
Also used : Terms(org.apache.lucene.index.Terms) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) TermsEnum(org.apache.lucene.index.TermsEnum) FieldInfos(org.apache.lucene.index.FieldInfos) IndexableField(org.apache.lucene.index.IndexableField) IndexableFieldToXContent(org.xbib.elasticsearch.action.skywalker.support.IndexableFieldToXContent) DocsAndPositionsEnum(org.apache.lucene.index.DocsAndPositionsEnum) Bits(org.apache.lucene.util.Bits) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 8 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project jackrabbit-oak by apache.

the class LucenePropertyIndex method query.

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    final Sort sort = getSort(plan);
    final PlanResult pr = getPlanResult(plan);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {

        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();

        private final Set<String> seenPaths = Sets.newHashSet();

        private ScoreDoc lastDoc;

        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;

        private boolean noDocs = false;

        private IndexSearcher indexSearcher;

        private int indexNodeId = -1;

        @Override
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            releaseSearcher();
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt, Facets facets, String explanation) throws IOException {
            IndexReader reader = searcher.getIndexReader();
            //TODO Look into usage of field cache for retrieving the path
            //instead of reading via reader if no of docs in index are limited
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (pr.isPathTransformed()) {
                    String originalPath = path;
                    path = pr.transformPath(path);
                    if (path == null) {
                        LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
                        return null;
                    }
                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
                        return null;
                    }
                    seenPaths.add(path);
                }
                LOG.trace("Matched path {}", path);
                return new LuceneResultRow(path, doc.score, excerpt, facets, explanation);
            }
            return null;
        }

        /**
             * Loads the lucene documents in batches
             * @return true if any document is loaded
             */
        private boolean loadDocs() {
            if (noDocs) {
                return false;
            }
            ScoreDoc lastDocToRecord = null;
            final IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = getCurrentSearcher(indexNode);
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);
                    if (customScoreQuery != null) {
                        query = customScoreQuery;
                    }
                    TopDocs docs;
                    long start = PERF_LOGGER.start();
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                            } else {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
                            }
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.search(query, nextBatchSize);
                            } else {
                                docs = searcher.search(query, nextBatchSize, sort);
                            }
                        }
                        PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
                        long f = PERF_LOGGER.start();
                        Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets());
                        PERF_LOGGER.end(f, -1, "facets retrieved");
                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();
                        restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION);
                        boolean addExplain = restriction != null && restriction.isNotNullRestriction();
                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
                        FieldInfos mergedFieldInfos = null;
                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                            mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
                        }
                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos);
                            }
                            String explanation = null;
                            if (addExplain) {
                                explanation = searcher.explain(query, doc.doc).toString();
                            }
                            LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }
                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            //queue is still empty but more results can be fetched
                            //from Lucene so still continue
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK;
                    noDocs = true;
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);
                    // ACL filter spellchecks
                    QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string));
                        query = addDescendantClauseIfRequired(query, plan);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.string));
                                    break;
                                }
                            }
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
                    noDocs = true;
                    List<Lookup.LookupResult> lookupResults = SuggestHelper.getSuggestions(indexNode.getLookup(), suggestQuery);
                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer());
                    // ACL filter suggestions
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");
                        query = addDescendantClauseIfRequired(query, plan);
                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
                                    break;
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }
            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }
            return !queue.isEmpty();
        }

        private IndexSearcher getCurrentSearcher(IndexNode indexNode) {
            //the searcher would be refreshed as done earlier
            if (indexNodeId != indexNode.getIndexNodeId()) {
                //if already initialized then log about change
                if (indexNodeId > 0) {
                    LOG.debug("Change in index version detected. Query would be performed without offset");
                }
                //TODO Add testcase for this scenario
                indexSearcher = indexNode.getSearcher();
                indexNodeId = indexNode.getIndexNodeId();
                lastDoc = null;
            }
            return indexSearcher;
        }

        private void releaseSearcher() {
            //For now nullifying it.
            indexSearcher = null;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {

        @Override
        public long getSize() {
            IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) PlanResult(org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult) Set(java.util.Set) Facets(org.apache.lucene.facet.Facets) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) NumericRangeQuery(org.apache.lucene.search.NumericRangeQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) QueryEngineSettings(org.apache.jackrabbit.oak.query.QueryEngineSettings) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) Sort(org.apache.lucene.search.Sort) Lookup(org.apache.lucene.search.suggest.Lookup) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) AbstractIterator(com.google.common.collect.AbstractIterator) PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) SuggestHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper) IOException(java.io.IOException) Deque(java.util.Deque) QueryNodeException(org.apache.lucene.queryparser.flexible.core.QueryNodeException) ParseException(org.apache.lucene.queryparser.classic.ParseException) IOException(java.io.IOException) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) FieldInfos(org.apache.lucene.index.FieldInfos) StandardQueryParser(org.apache.lucene.queryparser.flexible.standard.StandardQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Filter(org.apache.jackrabbit.oak.spi.query.Filter) IndexReader(org.apache.lucene.index.IndexReader) SuggestWord(org.apache.lucene.search.spell.SuggestWord) SpellcheckHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper)

Example 9 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.

the class TestHalfAndHalfDocValues method testHalfAndHalfDocValues.

public void testHalfAndHalfDocValues() throws Exception {
    // Insert two docs without docvalues
    String fieldname = "string_add_dv_later";
    assertU(adoc("id", "3", fieldname, "c"));
    assertU(commit());
    assertU(adoc("id", "1", fieldname, "a"));
    assertU(commit());
    try (SolrCore core = h.getCoreInc()) {
        assertFalse(core.getLatestSchema().getField(fieldname).hasDocValues());
        // Add docvalues to the field type
        IndexSchema schema = core.getLatestSchema();
        SchemaField oldField = schema.getField(fieldname);
        int newProperties = oldField.getProperties() | SchemaField.DOC_VALUES;
        SchemaField sf = new SchemaField(fieldname, oldField.getType(), newProperties, null);
        schema.getFields().put(fieldname, sf);
        // Insert a new doc with docvalues
        assertU(adoc("id", "2", fieldname, "b"));
        assertU(commit());
        // Check there are a mix of segments with and without docvalues
        final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
        final SolrIndexSearcher searcher = searcherRef.get();
        try {
            final DirectoryReader topReader = searcher.getRawReader();
            //Assert no merges
            assertEquals(3, topReader.numDocs());
            assertEquals(3, topReader.leaves().size());
            final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
            //The global field type should have docValues because a document with dvs was added
            assertEquals(DocValuesType.SORTED, infos.fieldInfo(fieldname).getDocValuesType());
            for (LeafReaderContext ctx : topReader.leaves()) {
                LeafReader r = ctx.reader();
                //Make sure there were no merges
                assertEquals(1, r.numDocs());
                Document doc = r.document(0);
                String id = doc.getField("id").stringValue();
                if (id.equals("1") || id.equals("3")) {
                    assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
                } else {
                    assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
                }
            }
        } finally {
            searcherRef.decref();
        }
    }
    // Assert sort order is correct
    assertQ(req("q", "string_add_dv_later:*", "sort", "string_add_dv_later asc"), "//*[@numFound='3']", "//result/doc[1]/int[@name='id'][.=1]", "//result/doc[2]/int[@name='id'][.=2]", "//result/doc[3]/int[@name='id'][.=3]");
}
Also used : FieldInfos(org.apache.lucene.index.FieldInfos) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) SolrCore(org.apache.solr.core.SolrCore) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Document(org.apache.lucene.document.Document)

Example 10 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.

the class DocValuesMultiTest method testDocValues.

@Test
public void testDocValues() throws IOException {
    assertU(adoc("id", "1", "floatdv", "4.5", "intdv", "-1", "intdv", "3", "stringdv", "value1", "stringdv", "value2", "booldv", "false", "booldv", "true"));
    assertU(commit());
    try (SolrCore core = h.getCoreInc()) {
        final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
        final SolrIndexSearcher searcher = searcherRef.get();
        try {
            final LeafReader reader = searcher.getSlowAtomicReader();
            assertEquals(1, reader.numDocs());
            final FieldInfos infos = reader.getFieldInfos();
            assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("stringdv").getDocValuesType());
            assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("booldv").getDocValuesType());
            assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("floatdv").getDocValuesType());
            assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("intdv").getDocValuesType());
            SortedSetDocValues dv = reader.getSortedSetDocValues("stringdv");
            assertEquals(0, dv.nextDoc());
            assertEquals(0, dv.nextOrd());
            assertEquals(1, dv.nextOrd());
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
            dv = reader.getSortedSetDocValues("booldv");
            assertEquals(0, dv.nextDoc());
            assertEquals(0, dv.nextOrd());
            assertEquals(1, dv.nextOrd());
            assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
        } finally {
            searcherRef.decref();
        }
    }
}
Also used : FieldInfos(org.apache.lucene.index.FieldInfos) LeafReader(org.apache.lucene.index.LeafReader) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) SolrCore(org.apache.solr.core.SolrCore) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Test(org.junit.Test)

Aggregations

FieldInfos (org.apache.lucene.index.FieldInfos)15 FieldInfo (org.apache.lucene.index.FieldInfo)8 LeafReader (org.apache.lucene.index.LeafReader)6 Document (org.apache.lucene.document.Document)5 DocValuesType (org.apache.lucene.index.DocValuesType)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 IndexOptions (org.apache.lucene.index.IndexOptions)3 SortedDocValues (org.apache.lucene.index.SortedDocValues)3 SolrCore (org.apache.solr.core.SolrCore)3 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)3 File (java.io.File)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)2 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)2 AbstractIterator (com.google.common.collect.AbstractIterator)1 IOException (java.io.IOException)1 Date (java.util.Date)1 Deque (java.util.Deque)1 Random (java.util.Random)1