Search in sources :

Example 11 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project geode by apache.

the class LuceneIndexCreationProfileJUnitTest method getTwoAnalyzersLuceneIndexCreationProfile.

private LuceneIndexCreationProfile getTwoAnalyzersLuceneIndexCreationProfile() {
    Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
    fieldAnalyzers.put("field1", new KeywordAnalyzer());
    fieldAnalyzers.put("field2", new KeywordAnalyzer());
    return new LuceneIndexCreationProfile(INDEX_NAME, REGION_NAME, new String[] { "field1", "field2" }, getPerFieldAnalyzerWrapper(fieldAnalyzers), fieldAnalyzers);
}
Also used : KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) HashMap(java.util.HashMap) Analyzer(org.apache.lucene.analysis.Analyzer) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer)

Example 12 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project geode by apache.

the class LuceneIndexCommandsDUnitTest method createIndex.

private void createIndex(final VM vm1) {
    vm1.invoke(() -> {
        LuceneService luceneService = LuceneServiceProvider.get(getCache());
        Map<String, Analyzer> fieldAnalyzers = new HashMap();
        fieldAnalyzers.put("field1", new StandardAnalyzer());
        fieldAnalyzers.put("field2", new KeywordAnalyzer());
        fieldAnalyzers.put("field3", null);
        luceneService.createIndexFactory().setFields(fieldAnalyzers).create(INDEX_NAME, REGION_NAME);
        createRegion();
    });
}
Also used : KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) HashMap(java.util.HashMap) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) LuceneService(org.apache.geode.cache.lucene.LuceneService)

Example 13 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.

the class LuceneTests method testAsSequentialAccessBits.

public void testAsSequentialAccessBits() throws Exception {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()));
    Document doc = new Document();
    doc.add(new StringField("foo", "bar", Store.NO));
    w.addDocument(doc);
    doc = new Document();
    w.addDocument(doc);
    doc = new Document();
    doc.add(new StringField("foo", "bar", Store.NO));
    w.addDocument(doc);
    try (DirectoryReader reader = DirectoryReader.open(w)) {
        IndexSearcher searcher = newSearcher(reader);
        Weight termWeight = new TermQuery(new Term("foo", "bar")).createWeight(searcher, false);
        assertEquals(1, reader.leaves().size());
        LeafReaderContext leafReaderContext = searcher.getIndexReader().leaves().get(0);
        Bits bits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), termWeight.scorer(leafReaderContext));
        expectThrows(IndexOutOfBoundsException.class, () -> bits.get(-1));
        expectThrows(IndexOutOfBoundsException.class, () -> bits.get(leafReaderContext.reader().maxDoc()));
        assertTrue(bits.get(0));
        assertTrue(bits.get(0));
        assertFalse(bits.get(1));
        assertFalse(bits.get(1));
        expectThrows(IllegalArgumentException.class, () -> bits.get(0));
        assertTrue(bits.get(2));
        assertTrue(bits.get(2));
        expectThrows(IllegalArgumentException.class, () -> bits.get(1));
    }
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Weight(org.apache.lucene.search.Weight) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StringField(org.apache.lucene.document.StringField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 14 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.

the class TermsSliceQueryTests method testSearch.

public void testSearch() throws Exception {
    final int numDocs = randomIntBetween(100, 200);
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, new KeywordAnalyzer());
    int max = randomIntBetween(2, 10);
    int[] sliceCounters = new int[max];
    Set<String> keys = new HashSet<>();
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        String uuid = UUIDs.base64UUID();
        BytesRef br = new BytesRef(uuid);
        int id = Math.floorMod(br.hashCode(), max);
        sliceCounters[id]++;
        doc.add(new StringField("uuid", uuid, Field.Store.YES));
        w.addDocument(doc);
        keys.add(uuid);
    }
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    for (int id = 0; id < max; id++) {
        TermsSliceQuery query1 = new TermsSliceQuery("uuid", id, max);
        assertThat(searcher.count(query1), equalTo(sliceCounters[id]));
        searcher.search(query1, new Collector() {

            @Override
            public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
                return new LeafCollector() {

                    @Override
                    public void setScorer(Scorer scorer) throws IOException {
                    }

                    @Override
                    public void collect(int doc) throws IOException {
                        Document d = context.reader().document(doc, Collections.singleton("uuid"));
                        String uuid = d.get("uuid");
                        assertThat(keys.contains(uuid), equalTo(true));
                        keys.remove(uuid);
                    }
                };
            }

            @Override
            public boolean needsScores() {
                return false;
            }
        });
    }
    assertThat(keys.size(), equalTo(0));
    w.close();
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) Scorer(org.apache.lucene.search.Scorer) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) LeafCollector(org.apache.lucene.search.LeafCollector) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) LeafCollector(org.apache.lucene.search.LeafCollector) Collector(org.apache.lucene.search.Collector) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 15 with KeywordAnalyzer

use of org.apache.lucene.analysis.core.KeywordAnalyzer in project elasticsearch by elastic.

the class TopHitsAggregatorTests method testInsideTerms.

/**
     * Tests {@code top_hits} inside of {@code terms}. While not strictly a unit test this is a fairly common way to run {@code top_hits}
     * and serves as a good example of running {@code top_hits} inside of another aggregation.
     */
public void testInsideTerms() throws Exception {
    Aggregation result;
    if (randomBoolean()) {
        result = testCase(new MatchAllDocsQuery(), terms("term").field("string").subAggregation(topHits("top").sort("string", SortOrder.DESC)));
    } else {
        Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
        result = testCase(query, terms("term").field("string").subAggregation(topHits("top")));
    }
    Terms terms = (Terms) result;
    // The "a" bucket
    TopHits hits = (TopHits) terms.getBucketByKey("a").getAggregations().get("top");
    SearchHits searchHits = (hits).getHits();
    assertEquals(2L, searchHits.getTotalHits());
    assertEquals("2", searchHits.getAt(0).getId());
    assertEquals("1", searchHits.getAt(1).getId());
    // The "b" bucket
    searchHits = ((TopHits) terms.getBucketByKey("b").getAggregations().get("top")).getHits();
    assertEquals(2L, searchHits.getTotalHits());
    assertEquals("3", searchHits.getAt(0).getId());
    assertEquals("1", searchHits.getAt(1).getId());
    // The "c" bucket
    searchHits = ((TopHits) terms.getBucketByKey("c").getAggregations().get("top")).getHits();
    assertEquals(1L, searchHits.getTotalHits());
    assertEquals("2", searchHits.getAt(0).getId());
    // The "d" bucket
    searchHits = ((TopHits) terms.getBucketByKey("d").getAggregations().get("top")).getHits();
    assertEquals(1L, searchHits.getTotalHits());
    assertEquals("3", searchHits.getAt(0).getId());
}
Also used : Aggregation(org.elasticsearch.search.aggregations.Aggregation) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) SearchHits(org.elasticsearch.search.SearchHits) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery)

Aggregations

KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)24 Analyzer (org.apache.lucene.analysis.Analyzer)12 HashMap (java.util.HashMap)11 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)11 Document (org.apache.lucene.document.Document)5 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)4 UnitTest (org.apache.geode.test.junit.categories.UnitTest)4 StringField (org.apache.lucene.document.StringField)4 IndexWriter (org.apache.lucene.index.IndexWriter)4 ResultCollector (org.apache.geode.cache.execute.ResultCollector)3 InternalCache (org.apache.geode.internal.cache.InternalCache)3 CommandResult (org.apache.geode.management.internal.cli.result.CommandResult)3 TabularResultData (org.apache.geode.management.internal.cli.result.TabularResultData)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)3 Set (java.util.Set)2 LuceneService (org.apache.geode.cache.lucene.LuceneService)2