Search in sources :

Example 1 with DelegatingAnalyzerWrapper

use of org.apache.lucene.analysis.DelegatingAnalyzerWrapper in project elasticsearch by elastic.

the class PercolateQueryBuilder method doToQuery.

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
    // Call nowInMillis() so that this query becomes un-cacheable since we
    // can't be sure that it doesn't use now or scripts
    context.nowInMillis();
    if (indexedDocumentIndex != null || indexedDocumentType != null || indexedDocumentId != null) {
        throw new IllegalStateException("query builder must be rewritten first");
    }
    if (document == null) {
        throw new IllegalStateException("no document to percolate");
    }
    MapperService mapperService = context.getMapperService();
    DocumentMapperForType docMapperForType = mapperService.documentMapperWithAutoCreate(documentType);
    DocumentMapper docMapper = docMapperForType.getDocumentMapper();
    ParsedDocument doc = docMapper.parse(source(context.index().getName(), documentType, "_temp_id", document, documentXContentType));
    FieldNameAnalyzer fieldNameAnalyzer = (FieldNameAnalyzer) docMapper.mappers().indexAnalyzer();
    // Need to this custom impl because FieldNameAnalyzer is strict and the percolator sometimes isn't when
    // 'index.percolator.map_unmapped_fields_as_string' is enabled:
    Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {

        @Override
        protected Analyzer getWrappedAnalyzer(String fieldName) {
            Analyzer analyzer = fieldNameAnalyzer.analyzers().get(fieldName);
            if (analyzer != null) {
                return analyzer;
            } else {
                return context.getIndexAnalyzers().getDefaultIndexAnalyzer();
            }
        }
    };
    final IndexSearcher docSearcher;
    if (doc.docs().size() > 1) {
        assert docMapper.hasNestedObjects();
        docSearcher = createMultiDocumentSearcher(analyzer, doc);
    } else {
        MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc.rootDoc(), analyzer, true, false);
        docSearcher = memoryIndex.createSearcher();
        docSearcher.setQueryCache(null);
    }
    Version indexVersionCreated = context.getIndexSettings().getIndexVersionCreated();
    boolean mapUnmappedFieldsAsString = context.getIndexSettings().getValue(PercolatorFieldMapper.INDEX_MAP_UNMAPPED_FIELDS_AS_STRING_SETTING);
    // We have to make a copy of the QueryShardContext here so we can have a unfrozen version for parsing the legacy
    // percolator queries
    QueryShardContext percolateShardContext = new QueryShardContext(context);
    MappedFieldType fieldType = context.fieldMapper(field);
    if (fieldType == null) {
        throw new QueryShardException(context, "field [" + field + "] does not exist");
    }
    if (!(fieldType instanceof PercolatorFieldMapper.FieldType)) {
        throw new QueryShardException(context, "expected field [" + field + "] to be of type [percolator], but is of type [" + fieldType.typeName() + "]");
    }
    PercolatorFieldMapper.FieldType pft = (PercolatorFieldMapper.FieldType) fieldType;
    PercolateQuery.QueryStore queryStore = createStore(pft, percolateShardContext, mapUnmappedFieldsAsString);
    return pft.percolateQuery(documentType, queryStore, document, docSearcher);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldNameAnalyzer(org.elasticsearch.index.analysis.FieldNameAnalyzer) DocumentMapper(org.elasticsearch.index.mapper.DocumentMapper) Analyzer(org.apache.lucene.analysis.Analyzer) FieldNameAnalyzer(org.elasticsearch.index.analysis.FieldNameAnalyzer) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) DelegatingAnalyzerWrapper(org.apache.lucene.analysis.DelegatingAnalyzerWrapper) Version(org.elasticsearch.Version) DocumentMapperForType(org.elasticsearch.index.mapper.DocumentMapperForType) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) QueryShardException(org.elasticsearch.index.query.QueryShardException) MapperService(org.elasticsearch.index.mapper.MapperService)

Example 2 with DelegatingAnalyzerWrapper

use of org.apache.lucene.analysis.DelegatingAnalyzerWrapper in project lucene-solr by apache.

the class FastVectorHighlighterTest method matchedFieldsTestCase.

private void matchedFieldsTestCase(boolean useMatchedFields, boolean fieldMatch, String fieldValue, String expected, Query... queryClauses) throws IOException {
    Document doc = new Document();
    FieldType stored = new FieldType(TextField.TYPE_STORED);
    stored.setStoreTermVectorOffsets(true);
    stored.setStoreTermVectorPositions(true);
    stored.setStoreTermVectors(true);
    stored.freeze();
    FieldType matched = new FieldType(TextField.TYPE_NOT_STORED);
    matched.setStoreTermVectorOffsets(true);
    matched.setStoreTermVectorPositions(true);
    matched.setStoreTermVectors(true);
    matched.freeze();
    // Whitespace tokenized with English stop words
    doc.add(new Field("field", fieldValue, stored));
    // Whitespace tokenized without stop words
    doc.add(new Field("field_exact", fieldValue, matched));
    // Whitespace tokenized without toLower
    doc.add(new Field("field_super_exact", fieldValue, matched));
    // Each letter is a token
    doc.add(new Field("field_characters", fieldValue, matched));
    // Every three letters is a token
    doc.add(new Field("field_tripples", fieldValue, matched));
    doc.add(new Field("field_sliced", // Sliced at 10 chars then analyzed just like field
    fieldValue.substring(// Sliced at 10 chars then analyzed just like field
    0, Math.min(fieldValue.length() - 1, 10)), matched));
    doc.add(new Field("field_der_red", new // Hacky field containing "der" and "red" at pos = 0
    CannedTokenStream(token("der", 1, 0, 3), token("red", 0, 0, 3)), matched));
    final Map<String, Analyzer> fieldAnalyzers = new TreeMap<>();
    fieldAnalyzers.put("field", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET));
    fieldAnalyzers.put("field_exact", new MockAnalyzer(random()));
    fieldAnalyzers.put("field_super_exact", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
    fieldAnalyzers.put("field_characters", new MockAnalyzer(random(), new CharacterRunAutomaton(new RegExp(".").toAutomaton()), true));
    fieldAnalyzers.put("field_tripples", new MockAnalyzer(random(), new CharacterRunAutomaton(new RegExp("...").toAutomaton()), true));
    fieldAnalyzers.put("field_sliced", fieldAnalyzers.get("field"));
    // This is required even though we provide a token stream
    fieldAnalyzers.put("field_der_red", fieldAnalyzers.get("field"));
    Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {

        public Analyzer getWrappedAnalyzer(String fieldName) {
            return fieldAnalyzers.get(fieldName);
        }
    };
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    writer.addDocument(doc);
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    FragListBuilder fragListBuilder = new SimpleFragListBuilder();
    FragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder();
    IndexReader reader = DirectoryReader.open(writer);
    String[] preTags = new String[] { "<b>" };
    String[] postTags = new String[] { "</b>" };
    Encoder encoder = new DefaultEncoder();
    int docId = 0;
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    for (Query clause : queryClauses) {
        query.add(clause, Occur.MUST);
    }
    FieldQuery fieldQuery = new FieldQuery(query.build(), reader, true, fieldMatch);
    String[] bestFragments;
    if (useMatchedFields) {
        Set<String> matchedFields = new HashSet<>();
        matchedFields.add("field");
        matchedFields.add("field_exact");
        matchedFields.add("field_super_exact");
        matchedFields.add("field_characters");
        matchedFields.add("field_tripples");
        matchedFields.add("field_sliced");
        matchedFields.add("field_der_red");
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", matchedFields, 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
    } else {
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 25, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
    }
    assertEquals(expected, bestFragments[0]);
    reader.close();
    writer.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Document(org.apache.lucene.document.Document) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) Encoder(org.apache.lucene.search.highlight.Encoder) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) RegExp(org.apache.lucene.util.automaton.RegExp) TreeMap(java.util.TreeMap) FieldType(org.apache.lucene.document.FieldType) DelegatingAnalyzerWrapper(org.apache.lucene.analysis.DelegatingAnalyzerWrapper) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader)

Aggregations

Analyzer (org.apache.lucene.analysis.Analyzer)2 DelegatingAnalyzerWrapper (org.apache.lucene.analysis.DelegatingAnalyzerWrapper)2 HashSet (java.util.HashSet)1 TreeMap (java.util.TreeMap)1 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 StoredField (org.apache.lucene.document.StoredField)1 TextField (org.apache.lucene.document.TextField)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 MemoryIndex (org.apache.lucene.index.memory.MemoryIndex)1 CommonTermsQuery (org.apache.lucene.queries.CommonTermsQuery)1 CustomScoreQuery (org.apache.lucene.queries.CustomScoreQuery)1 BooleanQuery (org.apache.lucene.search.BooleanQuery)1 BoostQuery (org.apache.lucene.search.BoostQuery)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 PhraseQuery (org.apache.lucene.search.PhraseQuery)1