Search in sources :

Example 1 with FilteredQuery

use of org.apache.lucene.search.FilteredQuery in project greplin-lucene-utils by Cue.

the class PhraseFilterBenchmark method main.

public static void main(String[] argv) {
    Directory directory = new RAMDirectory();
    try {
        IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
        int done = 0;
        for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
            int remaining = NUMBER_OF_SEGMENTS - i;
            int numberOfDocs;
            if (remaining == 1) {
                numberOfDocs = TOTAL_DOCS - done;
            } else {
                numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
            }
            done += numberOfDocs;
            System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");
            for (int d = 0; d < numberOfDocs; d++) {
                int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION;
                Document doc = new Document();
                doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED));
                doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.commit();
        }
        writer.close();
        IndexReader reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        String[][] queries = new String[TOTAL_QUERIES][];
        Term[][] terms = new Term[TOTAL_QUERIES][];
        for (int q = 0; q < TOTAL_QUERIES; q++) {
            queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
            terms[q] = new Term[queries[q].length];
            for (int qw = 0; qw < queries[q].length; qw++) {
                terms[q][qw] = new Term(FIELD, queries[q][qw]);
            }
        }
        // Warm up.
        new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);
        for (int round = 0; round < ROUNDS; round++) {
            System.out.println();
            String name1 = "filter";
            String name2 = "query";
            long ms1 = 0, ms2 = 0;
            for (int step = 0; step < 2; step++) {
                System.gc();
                System.gc();
                System.gc();
                if (step == (round & 1)) {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (String[] queryWords : queries) {
                        PhraseFilter pf = new PhraseFilter(new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords);
                        hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
                    }
                    ms1 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
                } else {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (Term[] queryTerms : terms) {
                        PhraseQuery pq = new PhraseQuery();
                        for (Term term : queryTerms) {
                            pq.add(term);
                        }
                        Query query = BooleanQueryBuilder.builder().must(new TermQuery(new Term("second", "yes"))).must(pq).build();
                        hits += searcher.search(query, 1).totalHits;
                    }
                    ms2 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
                }
            }
            System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) FilteredQuery(org.apache.lucene.search.FilteredQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Document(org.apache.lucene.document.Document) FilteredQuery(org.apache.lucene.search.FilteredQuery) Field(org.apache.lucene.document.Field) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) WhitespaceAnalyzer(org.apache.lucene.analysis.WhitespaceAnalyzer) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) FilterIntersectionProvider(com.greplin.lucene.util.FilterIntersectionProvider) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

FilterIntersectionProvider (com.greplin.lucene.util.FilterIntersectionProvider)1 IOException (java.io.IOException)1 WhitespaceAnalyzer (org.apache.lucene.analysis.WhitespaceAnalyzer)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 Term (org.apache.lucene.index.Term)1 FilteredQuery (org.apache.lucene.search.FilteredQuery)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1 PhraseQuery (org.apache.lucene.search.PhraseQuery)1 Query (org.apache.lucene.search.Query)1 TermQuery (org.apache.lucene.search.TermQuery)1 Directory (org.apache.lucene.store.Directory)1 RAMDirectory (org.apache.lucene.store.RAMDirectory)1