Search in sources :

Example 11 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testValueSourceEmptyReader.

@Test
public void testValueSourceEmptyReader() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    // Make sure the index is created?
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    assertNull(inputIterator.next());
    assertEquals(inputIterator.weight(), 0);
    assertNull(inputIterator.payload());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 12 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentDictionaryTest method testWithDeletions.

@Test
public void testWithDeletions() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
    Map<String, Document> docs = res.getValue();
    List<String> invalidDocTerms = res.getKey();
    Random rand = random();
    List<String> termsToDel = new ArrayList<>();
    for (Document doc : docs.values()) {
        IndexableField f = doc.getField(FIELD_NAME);
        if (rand.nextBoolean() && f != null && !invalidDocTerms.contains(f.stringValue())) {
            termsToDel.add(doc.get(FIELD_NAME));
        }
        writer.addDocument(doc);
    }
    writer.commit();
    Term[] delTerms = new Term[termsToDel.size()];
    for (int i = 0; i < termsToDel.size(); i++) {
        delTerms[i] = new Term(FIELD_NAME, termsToDel.get(i));
    }
    for (Term delTerm : delTerms) {
        writer.deleteDocuments(delTerm);
    }
    writer.commit();
    writer.close();
    for (String termToDel : termsToDel) {
        assertTrue(null != docs.remove(termToDel));
    }
    IndexReader ir = DirectoryReader.open(dir);
    assertEquals(ir.numDocs(), docs.size());
    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
        assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
        assertNull(inputIterator.payload());
    }
    for (String invalidTerm : invalidDocTerms) {
        assertNotNull(docs.remove(invalidTerm));
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Random(java.util.Random) IndexReader(org.apache.lucene.index.IndexReader) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 13 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentDictionaryTest method testEmptyReader.

@Test
public void testEmptyReader() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    // Make sure the index is created?
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    assertNull(inputIterator.next());
    assertEquals(inputIterator.weight(), 0);
    assertNull(inputIterator.payload());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 14 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class TestHighFrequencyDictionary method testEmpty.

public void testEmpty() throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
    BytesRefIterator tf = dictionary.getEntryIterator();
    assertNull(tf.next());
    dir.close();
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) HighFrequencyDictionary(org.apache.lucene.search.spell.HighFrequencyDictionary) BytesRefIterator(org.apache.lucene.util.BytesRefIterator) HighFrequencyDictionary(org.apache.lucene.search.spell.HighFrequencyDictionary) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 15 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project jackrabbit-oak by apache.

the class SuggestHelper method updateSuggester.

public static void updateSuggester(Directory directory, Analyzer analyzer, IndexReader reader) throws IOException {
    File tempDir = null;
    try {
        //Analyzing infix suggester takes a file parameter. It uses its path to getDirectory()
        //for actual storage of suggester data. BUT, while building it also does getDirectory() to
        //a temporary location (original path + ".tmp"). So, instead we create a temp dir and also
        //create a placeholder non-existing-sub-child which would mark the location when we want to return
        //our internal suggestion OakDirectory. After build is done, we'd delete the temp directory
        //thereby removing any temp stuff that suggester created in the interim.
        tempDir = Files.createTempDir();
        File tempSubChild = new File(tempDir, "non-existing-sub-child");
        if (reader.getDocCount(FieldNames.SUGGEST) > 0) {
            Dictionary dictionary = new LuceneDictionary(reader, FieldNames.SUGGEST);
            getLookup(directory, analyzer, tempSubChild).build(dictionary);
        }
    } catch (RuntimeException e) {
        log.debug("could not update the suggester", e);
    } finally {
        //cleanup temp dir
        if (tempDir != null && !FileUtils.deleteQuietly(tempDir)) {
            log.error("Cleanup failed for temp dir {}", tempDir.getAbsolutePath());
        }
    }
}
Also used : LuceneDictionary(org.apache.lucene.search.spell.LuceneDictionary) Dictionary(org.apache.lucene.search.spell.Dictionary) LuceneDictionary(org.apache.lucene.search.spell.LuceneDictionary) File(java.io.File)

Aggregations

Dictionary (org.apache.lucene.search.spell.Dictionary)21 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 IndexReader (org.apache.lucene.index.IndexReader)20 Directory (org.apache.lucene.store.Directory)20 Analyzer (org.apache.lucene.analysis.Analyzer)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)19 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)19 Test (org.junit.Test)19 BytesRef (org.apache.lucene.util.BytesRef)16 Document (org.apache.lucene.document.Document)15 IndexableField (org.apache.lucene.index.IndexableField)13 LongValuesSource (org.apache.lucene.search.LongValuesSource)8 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)4 List (java.util.List)4 Map (java.util.Map)4 HashSet (java.util.HashSet)3 Random (java.util.Random)3 Term (org.apache.lucene.index.Term)3 File (java.io.File)1