Examples with WhitespaceAnalyzer - org.apache.lucene.analysis.WhitespaceAnalyzer

Example 1 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.WhitespaceAnalyzer in project greplin-lucene-utils by Cue.

the class TermsForFieldTest method setUp.

@Before
public void setUp() throws Exception {
    Directory d = new RAMDirectory();
    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
    Document doc1 = new Document();
    doc1.add(new Field("stored", "1", Field.Store.YES, Field.Index.ANALYZED));
    doc1.add(new Field("stored", "2", Field.Store.YES, Field.Index.ANALYZED));
    doc1.add(new Field("notStored", "a", Field.Store.NO, Field.Index.ANALYZED));
    w.addDocument(doc1);
    Document doc2 = new Document();
    doc2.add(new Field("stored", "3", Field.Store.YES, Field.Index.ANALYZED));
    doc2.add(new Field("notStored", "b", Field.Store.NO, Field.Index.ANALYZED));
    doc2.add(new Field("noIndex", "?", Field.Store.YES, Field.Index.NO));
    w.addDocument(doc2);
    w.close();
    this.reader = IndexReader.open(d);
}

Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.WhitespaceAnalyzer) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 2 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.WhitespaceAnalyzer in project greplin-lucene-utils by Cue.

the class PhraseFilterBenchmark method main.

public static void main(String[] argv) {
    Directory directory = new RAMDirectory();
    try {
        IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
        int done = 0;
        for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
            int remaining = NUMBER_OF_SEGMENTS - i;
            int numberOfDocs;
            if (remaining == 1) {
                numberOfDocs = TOTAL_DOCS - done;
            } else {
                numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
            }
            done += numberOfDocs;
            System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");
            for (int d = 0; d < numberOfDocs; d++) {
                int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION;
                Document doc = new Document();
                doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED));
                doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.commit();
        }
        writer.close();
        IndexReader reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        String[][] queries = new String[TOTAL_QUERIES][];
        Term[][] terms = new Term[TOTAL_QUERIES][];
        for (int q = 0; q < TOTAL_QUERIES; q++) {
            queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
            terms[q] = new Term[queries[q].length];
            for (int qw = 0; qw < queries[q].length; qw++) {
                terms[q][qw] = new Term(FIELD, queries[q][qw]);
            }
        }
        // Warm up.
        new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);
        for (int round = 0; round < ROUNDS; round++) {
            System.out.println();
            String name1 = "filter";
            String name2 = "query";
            long ms1 = 0, ms2 = 0;
            for (int step = 0; step < 2; step++) {
                System.gc();
                System.gc();
                System.gc();
                if (step == (round & 1)) {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (String[] queryWords : queries) {
                        PhraseFilter pf = new PhraseFilter(new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords);
                        hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
                    }
                    ms1 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
                } else {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (Term[] queryTerms : terms) {
                        PhraseQuery pq = new PhraseQuery();
                        for (Term term : queryTerms) {
                            pq.add(term);
                        }
                        Query query = BooleanQueryBuilder.builder().must(new TermQuery(new Term("second", "yes"))).must(pq).build();
                        hits += searcher.search(query, 1).totalHits;
                    }
                    ms2 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
                }
            }
            System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) FilteredQuery(org.apache.lucene.search.FilteredQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Document(org.apache.lucene.document.Document) FilteredQuery(org.apache.lucene.search.FilteredQuery) Field(org.apache.lucene.document.Field) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) WhitespaceAnalyzer(org.apache.lucene.analysis.WhitespaceAnalyzer) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) FilterIntersectionProvider(com.greplin.lucene.util.FilterIntersectionProvider) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 3 with WhitespaceAnalyzer

use of org.apache.lucene.analysis.WhitespaceAnalyzer in project greplin-lucene-utils by Cue.

the class PredicateBonusQueryTest method testBasics.

@Test
public void testBasics() throws Exception {
    IndexWriter writer = new IndexWriter(this.directory, new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35)));
    writer.addDocument(new DocumentBuilder().add("value", "5").build());
    writer.close();
    IndexReader reader = IndexReader.open(this.directory);
    IndexSearcher searcher = new IndexSearcher(reader);
    Query query = new ConstantScoreQuery(new TermQuery(new Term("value", "5")));
    Assert.assertEquals(1.0, searcher.search(query, 1).getMaxScore(), 0.00001);
    Query noBonus = new PredicateBonusQuery(query, Predicates.NONE, 10.0f);
    Assert.assertEquals(1.0, searcher.search(noBonus, 1).getMaxScore(), 0.00001);
    Query bonus = new PredicateBonusQuery(query, Predicates.ALL, 100.0f);
    Assert.assertEquals(101.0, searcher.search(bonus, 1).getMaxScore(), 0.00001);
    Query noMatch = new TermQuery(new Term("value", "not5"));
    Assert.assertEquals(Double.NaN, searcher.search(noMatch, 1).getMaxScore(), 0.00001);
    Query noMatchNoBonus = new PredicateBonusQuery(noMatch, Predicates.NONE, 10.0f);
    Assert.assertEquals(Double.NaN, searcher.search(noMatchNoBonus, 1).getMaxScore(), 0.00001);
    Query noMatchIgnoresBonus = new PredicateBonusQuery(noMatch, Predicates.ALL, 100.0f);
    Assert.assertEquals(Double.NaN, searcher.search(noMatchIgnoresBonus, 1).getMaxScore(), 0.00001);
}

Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.WhitespaceAnalyzer) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) IndexWriter(org.apache.lucene.index.IndexWriter) DocumentBuilder(com.greplin.lucene.document.DocumentBuilder) IndexReader(org.apache.lucene.index.IndexReader) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Term(org.apache.lucene.index.Term) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

WhitespaceAnalyzer (org.apache.lucene.analysis.WhitespaceAnalyzer)3 IndexWriter (org.apache.lucene.index.IndexWriter)3 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)3 Document (org.apache.lucene.document.Document)2 Field (org.apache.lucene.document.Field)2 IndexReader (org.apache.lucene.index.IndexReader)2 Term (org.apache.lucene.index.Term)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 Query (org.apache.lucene.search.Query)2 TermQuery (org.apache.lucene.search.TermQuery)2 Directory (org.apache.lucene.store.Directory)2 RAMDirectory (org.apache.lucene.store.RAMDirectory)2 DocumentBuilder (com.greplin.lucene.document.DocumentBuilder)1 FilterIntersectionProvider (com.greplin.lucene.util.FilterIntersectionProvider)1 IOException (java.io.IOException)1 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)1 FilteredQuery (org.apache.lucene.search.FilteredQuery)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1 PhraseQuery (org.apache.lucene.search.PhraseQuery)1 Before (org.junit.Before)1