Search in sources :

Example 1 with FilterIntersectionProvider

use of com.greplin.lucene.util.FilterIntersectionProvider in project greplin-lucene-utils by Cue.

the class PhraseFilterBenchmark method main.

public static void main(String[] argv) {
    Directory directory = new RAMDirectory();
    try {
        IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
        int done = 0;
        for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
            int remaining = NUMBER_OF_SEGMENTS - i;
            int numberOfDocs;
            if (remaining == 1) {
                numberOfDocs = TOTAL_DOCS - done;
            } else {
                numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
            }
            done += numberOfDocs;
            System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");
            for (int d = 0; d < numberOfDocs; d++) {
                int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION;
                Document doc = new Document();
                doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED));
                doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.commit();
        }
        writer.close();
        IndexReader reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        String[][] queries = new String[TOTAL_QUERIES][];
        Term[][] terms = new Term[TOTAL_QUERIES][];
        for (int q = 0; q < TOTAL_QUERIES; q++) {
            queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
            terms[q] = new Term[queries[q].length];
            for (int qw = 0; qw < queries[q].length; qw++) {
                terms[q][qw] = new Term(FIELD, queries[q][qw]);
            }
        }
        // Warm up.
        new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);
        for (int round = 0; round < ROUNDS; round++) {
            System.out.println();
            String name1 = "filter";
            String name2 = "query";
            long ms1 = 0, ms2 = 0;
            for (int step = 0; step < 2; step++) {
                System.gc();
                System.gc();
                System.gc();
                if (step == (round & 1)) {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (String[] queryWords : queries) {
                        PhraseFilter pf = new PhraseFilter(new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords);
                        hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
                    }
                    ms1 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
                } else {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (Term[] queryTerms : terms) {
                        PhraseQuery pq = new PhraseQuery();
                        for (Term term : queryTerms) {
                            pq.add(term);
                        }
                        Query query = BooleanQueryBuilder.builder().must(new TermQuery(new Term("second", "yes"))).must(pq).build();
                        hits += searcher.search(query, 1).totalHits;
                    }
                    ms2 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
                }
            }
            System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) FilteredQuery(org.apache.lucene.search.FilteredQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Document(org.apache.lucene.document.Document) FilteredQuery(org.apache.lucene.search.FilteredQuery) Field(org.apache.lucene.document.Field) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) WhitespaceAnalyzer(org.apache.lucene.analysis.WhitespaceAnalyzer) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) FilterIntersectionProvider(com.greplin.lucene.util.FilterIntersectionProvider) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with FilterIntersectionProvider

use of com.greplin.lucene.util.FilterIntersectionProvider in project greplin-lucene-utils by Cue.

the class PhraseFilterTest method testIntersectionWithFilter.

@Test
public void testIntersectionWithFilter() throws Exception {
    IndexReader reader = createReaderWithSampleDocuments();
    IntersectionProvider filter = new FilterIntersectionProvider(TermsFilter.from(new Term("name", "one"), new Term("name", "two")));
    assertFilterBitsEqual(reader, new PhraseFilter("f", "world"), true, true, true);
    assertFilterBitsEqual(reader, new PhraseFilter(filter, "f", "world"), true, true, false);
    assertFilterBitsEqual(reader, new PhraseFilter("f", "hello", "world"), true, false, true);
    assertFilterBitsEqual(reader, new PhraseFilter(filter, "f", "hello", "world"), true, false, false);
}
Also used : FilterIntersectionProvider(com.greplin.lucene.util.FilterIntersectionProvider) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) BitsProviderIntersectionProvider(com.greplin.lucene.util.BitsProviderIntersectionProvider) FilterIntersectionProvider(com.greplin.lucene.util.FilterIntersectionProvider) IntersectionProvider(com.greplin.lucene.util.IntersectionProvider) Test(org.junit.Test)

Aggregations

FilterIntersectionProvider (com.greplin.lucene.util.FilterIntersectionProvider)2 IndexReader (org.apache.lucene.index.IndexReader)2 Term (org.apache.lucene.index.Term)2 BitsProviderIntersectionProvider (com.greplin.lucene.util.BitsProviderIntersectionProvider)1 IntersectionProvider (com.greplin.lucene.util.IntersectionProvider)1 IOException (java.io.IOException)1 WhitespaceAnalyzer (org.apache.lucene.analysis.WhitespaceAnalyzer)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 FilteredQuery (org.apache.lucene.search.FilteredQuery)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1 PhraseQuery (org.apache.lucene.search.PhraseQuery)1 Query (org.apache.lucene.search.Query)1 TermQuery (org.apache.lucene.search.TermQuery)1 Directory (org.apache.lucene.store.Directory)1 RAMDirectory (org.apache.lucene.store.RAMDirectory)1 Test (org.junit.Test)1