Search in sources :

Example 1 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.

the class TopHitsAggregatorTests method testTopLevel.

public void testTopLevel() throws Exception {
    Aggregation result;
    if (randomBoolean()) {
        result = testCase(new MatchAllDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
    } else {
        Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
        result = testCase(query, topHits("_name"));
    }
    SearchHits searchHits = ((TopHits) result).getHits();
    assertEquals(3L, searchHits.getTotalHits());
    assertEquals("3", searchHits.getAt(0).getId());
    assertEquals("type", searchHits.getAt(0).getType());
    assertEquals("2", searchHits.getAt(1).getId());
    assertEquals("type", searchHits.getAt(1).getType());
    assertEquals("1", searchHits.getAt(2).getId());
    assertEquals("type", searchHits.getAt(2).getType());
}
Also used : Aggregation(org.elasticsearch.search.aggregations.Aggregation) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) SearchHits(org.elasticsearch.search.SearchHits) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery)

Example 2 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.

the class FreqTermsEnumTests method setUp.

@Before
@Override
public void setUp() throws Exception {
    super.setUp();
    referenceAll = new HashMap<>();
    referenceNotDeleted = new HashMap<>();
    referenceFilter = new HashMap<>();
    Directory dir = newDirectory();
    // use keyword analyzer we rely on the stored field holding the exact term.
    IndexWriterConfig conf = newIndexWriterConfig(new KeywordAnalyzer());
    if (frequently()) {
        // we don't want to do any merges, so we won't expunge deletes
        conf.setMergePolicy(NoMergePolicy.INSTANCE);
    }
    iw = new IndexWriter(dir, conf);
    terms = new String[scaledRandomIntBetween(10, 300)];
    for (int i = 0; i < terms.length; i++) {
        terms[i] = randomAsciiOfLength(5);
    }
    int numberOfDocs = scaledRandomIntBetween(30, 300);
    Document[] docs = new Document[numberOfDocs];
    for (int i = 0; i < numberOfDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
        docs[i] = doc;
        for (String term : terms) {
            if (randomBoolean()) {
                continue;
            }
            int freq = randomIntBetween(1, 3);
            for (int j = 0; j < freq; j++) {
                doc.add(new TextField("field", term, Field.Store.YES));
            }
        }
    }
    for (int i = 0; i < docs.length; i++) {
        Document doc = docs[i];
        iw.addDocument(doc);
        if (rarely()) {
            iw.commit();
        }
    }
    Set<String> deletedIds = new HashSet<>();
    for (int i = 0; i < docs.length; i++) {
        Document doc = docs[i];
        if (randomInt(5) == 2) {
            Term idTerm = new Term("id", doc.getField("id").stringValue());
            deletedIds.add(idTerm.text());
            iw.deleteDocuments(idTerm);
        }
    }
    for (String term : terms) {
        referenceAll.put(term, new FreqHolder());
        referenceFilter.put(term, new FreqHolder());
        referenceNotDeleted.put(term, new FreqHolder());
    }
    // now go over each doc, build the relevant references and filter
    reader = DirectoryReader.open(iw);
    List<BytesRef> filterTerms = new ArrayList<>();
    for (int docId = 0; docId < reader.maxDoc(); docId++) {
        Document doc = reader.document(docId);
        addFreqs(doc, referenceAll);
        if (!deletedIds.contains(doc.getField("id").stringValue())) {
            addFreqs(doc, referenceNotDeleted);
            if (randomBoolean()) {
                filterTerms.add(new BytesRef(doc.getField("id").stringValue()));
                addFreqs(doc, referenceFilter);
            }
        }
    }
    filter = new TermInSetQuery("id", filterTerms);
}
Also used : KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) HashSet(java.util.HashSet) Before(org.junit.Before)

Example 3 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.

the class IndexFieldDataServiceTests method testFieldDataCacheListener.

public void testFieldDataCacheListener() throws Exception {
    final IndexService indexService = createIndex("test");
    final IndicesService indicesService = getInstanceFromNode(IndicesService.class);
    // copy the ifdService since we can set the listener only once.
    final IndexFieldDataService ifdService = new IndexFieldDataService(indexService.getIndexSettings(), indicesService.getIndicesFieldDataCache(), indicesService.getCircuitBreakerService(), indexService.mapperService());
    final BuilderContext ctx = new BuilderContext(indexService.getIndexSettings().getSettings(), new ContentPath(1));
    final MappedFieldType mapper1 = new TextFieldMapper.Builder("s").fielddata(true).build(ctx).fieldType();
    final IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
    Document doc = new Document();
    doc.add(new StringField("s", "thisisastring", Store.NO));
    writer.addDocument(doc);
    DirectoryReader open = DirectoryReader.open(writer);
    final boolean wrap = randomBoolean();
    final IndexReader reader = wrap ? ElasticsearchDirectoryReader.wrap(open, new ShardId("test", "_na_", 1)) : open;
    final AtomicInteger onCacheCalled = new AtomicInteger();
    final AtomicInteger onRemovalCalled = new AtomicInteger();
    ifdService.setListener(new IndexFieldDataCache.Listener() {

        @Override
        public void onCache(ShardId shardId, String fieldName, Accountable ramUsage) {
            if (wrap) {
                assertEquals(new ShardId("test", "_na_", 1), shardId);
            } else {
                assertNull(shardId);
            }
            onCacheCalled.incrementAndGet();
        }

        @Override
        public void onRemoval(ShardId shardId, String fieldName, boolean wasEvicted, long sizeInBytes) {
            if (wrap) {
                assertEquals(new ShardId("test", "_na_", 1), shardId);
            } else {
                assertNull(shardId);
            }
            onRemovalCalled.incrementAndGet();
        }
    });
    IndexFieldData<?> ifd = ifdService.getForField(mapper1);
    LeafReaderContext leafReaderContext = reader.getContext().leaves().get(0);
    AtomicFieldData load = ifd.load(leafReaderContext);
    assertEquals(1, onCacheCalled.get());
    assertEquals(0, onRemovalCalled.get());
    reader.close();
    load.close();
    writer.close();
    assertEquals(1, onCacheCalled.get());
    assertEquals(1, onRemovalCalled.get());
    ifdService.clear();
}
Also used : IndexService(org.elasticsearch.index.IndexService) Matchers.containsString(org.hamcrest.Matchers.containsString) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Accountable(org.apache.lucene.util.Accountable) IndicesService(org.elasticsearch.indices.IndicesService) ContentPath(org.elasticsearch.index.mapper.ContentPath) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriter(org.apache.lucene.index.IndexWriter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) BuilderContext(org.elasticsearch.index.mapper.Mapper.BuilderContext) TextFieldMapper(org.elasticsearch.index.mapper.TextFieldMapper) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 4 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project orientdb by orientechnologies.

the class OLuceneTxChangesMultiRid method isDeleted.

public boolean isDeleted(Document document, Object key, OIdentifiable value) {
    boolean match = false;
    List<String> strings = deleted.get(value.getIdentity().toString());
    if (strings != null) {
        MemoryIndex memoryIndex = new MemoryIndex();
        for (String string : strings) {
            Query q = engine.deleteQuery(string, value);
            memoryIndex.reset();
            for (IndexableField field : document.getFields()) {
                memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer());
            }
            match = match || (memoryIndex.search(q) > 0.0f);
        }
        return match;
    }
    return match;
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) MemoryIndex(org.apache.lucene.index.memory.MemoryIndex) Query(org.apache.lucene.search.Query)

Example 5 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch-opennlp-plugin by spinscale.

the class OpenNlpMappingTest method setupMapperParser.

@Before
public void setupMapperParser() {
    Index index = new Index("test");
    Map<String, AnalyzerProviderFactory> analyzerFactoryFactories = Maps.newHashMap();
    analyzerFactoryFactories.put("keyword", new PreBuiltAnalyzerProviderFactory("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer()));
    AnalysisService analysisService = new AnalysisService(index, ImmutableSettings.Builder.EMPTY_SETTINGS, null, analyzerFactoryFactories, null, null, null);
    mapperParser = new DocumentMapperParser(index, analysisService, new PostingsFormatService(index), new SimilarityLookupService(index, ImmutableSettings.Builder.EMPTY_SETTINGS));
    Settings settings = settingsBuilder().put("opennlp.models.name.file", "src/test/resources/models/en-ner-person.bin").put("opennlp.models.date.file", "src/test/resources/models/en-ner-date.bin").put("opennlp.models.location.file", "src/test/resources/models/en-ner-location.bin").build();
    LogConfigurator.configure(settings);
    OpenNlpService openNlpService = new OpenNlpService(settings);
    openNlpService.start();
    mapperParser.putTypeParser(OpenNlpMapper.CONTENT_TYPE, new OpenNlpMapper.TypeParser(analysisService, openNlpService));
}
Also used : KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) PostingsFormatService(org.elasticsearch.index.codec.postingsformat.PostingsFormatService) PreBuiltAnalyzerProviderFactory(org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory) Index(org.elasticsearch.index.Index) Matchers.containsString(org.hamcrest.Matchers.containsString) AnalyzerProviderFactory(org.elasticsearch.index.analysis.AnalyzerProviderFactory) PreBuiltAnalyzerProviderFactory(org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory) DocumentMapperParser(org.elasticsearch.index.mapper.DocumentMapperParser) OpenNlpService(org.elasticsearch.service.opennlp.OpenNlpService) SimilarityLookupService(org.elasticsearch.index.similarity.SimilarityLookupService) OpenNlpMapper(org.elasticsearch.index.mapper.opennlp.OpenNlpMapper) AnalysisService(org.elasticsearch.index.analysis.AnalysisService) ImmutableSettings(org.elasticsearch.common.settings.ImmutableSettings) Settings(org.elasticsearch.common.settings.Settings) Before(org.junit.Before)

Aggregations

KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)24 Analyzer (org.apache.lucene.analysis.Analyzer)12 HashMap (java.util.HashMap)11 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)11 Document (org.apache.lucene.document.Document)5 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)4 UnitTest (org.apache.geode.test.junit.categories.UnitTest)4 StringField (org.apache.lucene.document.StringField)4 IndexWriter (org.apache.lucene.index.IndexWriter)4 ResultCollector (org.apache.geode.cache.execute.ResultCollector)3 InternalCache (org.apache.geode.internal.cache.InternalCache)3 CommandResult (org.apache.geode.management.internal.cli.result.CommandResult)3 TabularResultData (org.apache.geode.management.internal.cli.result.TabularResultData)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)3 Set (java.util.Set)2 LuceneService (org.apache.geode.cache.lucene.LuceneService)2