Search in sources :

Example 16 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testValueSourceBasic.

@Test
public void testValueSourceBasic() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    LongValuesSource s = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, s, PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), (w1 + w2 + w3));
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 17 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentDictionaryTest method testWithContexts.

@Test
public void testWithContexts() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true);
    Map<String, Document> docs = res.getValue();
    List<String> invalidDocTerms = res.getKey();
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
        assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
        Set<BytesRef> oriCtxs = new HashSet<>();
        Set<BytesRef> contextSet = inputIterator.contexts();
        for (IndexableField ctxf : doc.getFields(CONTEXT_FIELD_NAME)) {
            oriCtxs.add(ctxf.binaryValue());
        }
        assertEquals(oriCtxs.size(), contextSet.size());
    }
    for (String invalidTerm : invalidDocTerms) {
        assertNotNull(docs.remove(invalidTerm));
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 18 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testLongValuesSourceWithDeletions.

@Test
public void testLongValuesSourceWithDeletions() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    Random rand = random();
    List<String> termsToDel = new ArrayList<>();
    for (Document doc : docs.values()) {
        if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1) {
            termsToDel.add(doc.get(FIELD_NAME));
        }
        writer.addDocument(doc);
    }
    writer.commit();
    Term[] delTerms = new Term[termsToDel.size()];
    for (int i = 0; i < termsToDel.size(); i++) {
        delTerms[i] = new Term(FIELD_NAME, termsToDel.get(i));
    }
    for (Term delTerm : delTerms) {
        writer.deleteDocuments(delTerm);
    }
    writer.commit();
    writer.close();
    for (String termToDel : termsToDel) {
        assertTrue(null != docs.remove(termToDel));
    }
    IndexReader ir = DirectoryReader.open(dir);
    assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0);
    assertEquals(ir.numDocs(), docs.size());
    LongValuesSource sumValues = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, sumValues, PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), w2 + w1);
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Random(java.util.Random) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 19 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testLongValuesSourceBasic.

@Test
public void testLongValuesSourceBasic() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    LongValuesSource sumValueSource = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, sumValueSource, PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), (w1 + w2 + w3));
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 20 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentDictionaryTest method testWithoutPayload.

@Test
public void testWithoutPayload() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
    Map<String, Document> docs = res.getValue();
    List<String> invalidDocTerms = res.getKey();
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
        assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
        assertNull(inputIterator.payload());
    }
    for (String invalidTerm : invalidDocTerms) {
        assertNotNull(docs.remove(invalidTerm));
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

Dictionary (org.apache.lucene.search.spell.Dictionary)21 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 IndexReader (org.apache.lucene.index.IndexReader)20 Directory (org.apache.lucene.store.Directory)20 Analyzer (org.apache.lucene.analysis.Analyzer)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)19 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)19 Test (org.junit.Test)19 BytesRef (org.apache.lucene.util.BytesRef)16 Document (org.apache.lucene.document.Document)15 IndexableField (org.apache.lucene.index.IndexableField)13 LongValuesSource (org.apache.lucene.search.LongValuesSource)8 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)4 List (java.util.List)4 Map (java.util.Map)4 HashSet (java.util.HashSet)3 Random (java.util.Random)3 Term (org.apache.lucene.index.Term)3 File (java.io.File)1