Search in sources :

Example 96 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project greplin-lucene-utils by Cue.

the class PhraseFilterBenchmark method main.

public static void main(String[] argv) {
    Directory directory = new RAMDirectory();
    try {
        IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
        int done = 0;
        for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
            int remaining = NUMBER_OF_SEGMENTS - i;
            int numberOfDocs;
            if (remaining == 1) {
                numberOfDocs = TOTAL_DOCS - done;
            } else {
                numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
            }
            done += numberOfDocs;
            System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");
            for (int d = 0; d < numberOfDocs; d++) {
                int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION;
                Document doc = new Document();
                doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED));
                doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.commit();
        }
        writer.close();
        IndexReader reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        String[][] queries = new String[TOTAL_QUERIES][];
        Term[][] terms = new Term[TOTAL_QUERIES][];
        for (int q = 0; q < TOTAL_QUERIES; q++) {
            queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
            terms[q] = new Term[queries[q].length];
            for (int qw = 0; qw < queries[q].length; qw++) {
                terms[q][qw] = new Term(FIELD, queries[q][qw]);
            }
        }
        // Warm up.
        new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);
        for (int round = 0; round < ROUNDS; round++) {
            System.out.println();
            String name1 = "filter";
            String name2 = "query";
            long ms1 = 0, ms2 = 0;
            for (int step = 0; step < 2; step++) {
                System.gc();
                System.gc();
                System.gc();
                if (step == (round & 1)) {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (String[] queryWords : queries) {
                        PhraseFilter pf = new PhraseFilter(new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords);
                        hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
                    }
                    ms1 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
                } else {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (Term[] queryTerms : terms) {
                        PhraseQuery pq = new PhraseQuery();
                        for (Term term : queryTerms) {
                            pq.add(term);
                        }
                        Query query = BooleanQueryBuilder.builder().must(new TermQuery(new Term("second", "yes"))).must(pq).build();
                        hits += searcher.search(query, 1).totalHits;
                    }
                    ms2 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
                }
            }
            System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) FilteredQuery(org.apache.lucene.search.FilteredQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Document(org.apache.lucene.document.Document) FilteredQuery(org.apache.lucene.search.FilteredQuery) Field(org.apache.lucene.document.Field) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) WhitespaceAnalyzer(org.apache.lucene.analysis.WhitespaceAnalyzer) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) FilterIntersectionProvider(com.greplin.lucene.util.FilterIntersectionProvider) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 97 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project greplin-lucene-utils by Cue.

the class TermsForFieldTest method setUp.

@Before
public void setUp() throws Exception {
    Directory d = new RAMDirectory();
    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
    Document doc1 = new Document();
    doc1.add(new Field("stored", "1", Field.Store.YES, Field.Index.ANALYZED));
    doc1.add(new Field("stored", "2", Field.Store.YES, Field.Index.ANALYZED));
    doc1.add(new Field("notStored", "a", Field.Store.NO, Field.Index.ANALYZED));
    w.addDocument(doc1);
    Document doc2 = new Document();
    doc2.add(new Field("stored", "3", Field.Store.YES, Field.Index.ANALYZED));
    doc2.add(new Field("notStored", "b", Field.Store.NO, Field.Index.ANALYZED));
    doc2.add(new Field("noIndex", "?", Field.Store.YES, Field.Index.NO));
    w.addDocument(doc2);
    w.close();
    this.reader = IndexReader.open(d);
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.WhitespaceAnalyzer) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 98 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project ddf by codice.

the class TestGeoNamesQueryLuceneIndex method initializeIndex.

private void initializeIndex() throws IOException {
    directory = new RAMDirectory();
    final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new StandardAnalyzer());
    indexWriterConfig.setOpenMode(OpenMode.CREATE);
    final IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
    indexWriter.addDocument(createDocumentFromGeoEntry(GEO_ENTRY_1));
    indexWriter.addDocument(createDocumentFromGeoEntry(GEO_ENTRY_2));
    indexWriter.addDocument(createDocumentFromGeoEntry(GEO_ENTRY_3));
    indexWriter.close();
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 99 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project lucene-skos by behas.

the class SKOSStandardQueryParserTest method setUp.

@Before
public void setUp() throws Exception {
    // adding some test data
    skosEngine = new SKOSEngineMock();
    skosEngine.addEntry("http://example.com/concept/1", SKOSType.PREF, "jumps");
    skosEngine.addEntry("http://example.com/concept/1", SKOSType.ALT, "leaps", "hops");
    skosEngine.addEntry("http://example.com/concept/2", SKOSType.PREF, "quick");
    skosEngine.addEntry("http://example.com/concept/2", SKOSType.ALT, "fast", "speedy");
    skosEngine.addEntry("http://example.com/concept/3", SKOSType.PREF, "over");
    skosEngine.addEntry("http://example.com/concept/3", SKOSType.ALT, "above");
    skosEngine.addEntry("http://example.com/concept/4", SKOSType.PREF, "lazy");
    skosEngine.addEntry("http://example.com/concept/4", SKOSType.ALT, "apathic", "sluggish");
    skosEngine.addEntry("http://example.com/concept/5", SKOSType.PREF, "dog");
    skosEngine.addEntry("http://example.com/concept/5", SKOSType.ALT, "canine", "pooch");
    skosEngine.addEntry("http://example.com/concept/6", SKOSType.PREF, "united nations");
    skosEngine.addEntry("http://example.com/concept/6", SKOSType.ALT, "UN");
    skosEngine.addEntry("http://example.com/concept/7", SKOSType.PREF, "lazy dog");
    skosEngine.addEntry("http://example.com/concept/7", SKOSType.ALT, "Odie");
    directory = new RAMDirectory();
    skosAnalyzer = new SKOSAnalyzer(skosEngine, ExpansionType.LABEL);
    writer = new IndexWriter(directory, new IndexWriterConfig(skosAnalyzer));
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) SKOSEngineMock(at.ac.univie.mminf.luceneSKOS.skos.engine.mock.SKOSEngineMock) RAMDirectory(org.apache.lucene.store.RAMDirectory) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 100 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project lucene-skos by behas.

the class LabelbasedTermExpansionTest method labelBasedTermExpansion.

/**
   * This test indexes a sample metadata record (=lucene document) having a
   * "title", "description", and "subject" field.
   * <p/>
   * A search for "arms" returns that record as a result because "arms" is
   * defined as an alternative label for "weapons", the term which is
   * contained in the subject field.
   *
   * @throws IOException
   */
@Test
public void labelBasedTermExpansion() throws IOException {
    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));
    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";
    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL);
    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);
    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));
    /* adding the document to the index */
    writer.addDocument(doc);
    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);
    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    TopDocs results = searcher.search(builder.build(), 10);
    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);
    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));
    results = searcher.search(query, 10);
    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);
}
Also used : HashMap(java.util.HashMap) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

RAMDirectory (org.apache.lucene.store.RAMDirectory)183 Directory (org.apache.lucene.store.Directory)101 IndexWriter (org.apache.lucene.index.IndexWriter)82 Document (org.apache.lucene.document.Document)75 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)73 IndexSearcher (org.apache.lucene.search.IndexSearcher)43 IndexReader (org.apache.lucene.index.IndexReader)41 Test (org.junit.Test)35 TextField (org.apache.lucene.document.TextField)33 Field (org.apache.lucene.document.Field)29 Term (org.apache.lucene.index.Term)25 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)21 Before (org.junit.Before)21 IOException (java.io.IOException)19 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)19 Analyzer (org.apache.lucene.analysis.Analyzer)18 TopDocs (org.apache.lucene.search.TopDocs)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)15 FilterDirectory (org.apache.lucene.store.FilterDirectory)15 FieldType (org.apache.lucene.document.FieldType)13