Examples with Dictionary - org.apache.lucene.search.spell.Dictionary

Example 6 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testValueSourceWithDeletions.

@Test
public void testValueSourceWithDeletions() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    Random rand = random();
    List<String> termsToDel = new ArrayList<>();
    for (Document doc : docs.values()) {
        if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1) {
            termsToDel.add(doc.get(FIELD_NAME));
        }
        writer.addDocument(doc);
    }
    writer.commit();
    Term[] delTerms = new Term[termsToDel.size()];
    for (int i = 0; i < termsToDel.size(); i++) {
        delTerms[i] = new Term(FIELD_NAME, termsToDel.get(i));
    }
    for (Term delTerm : delTerms) {
        writer.deleteDocuments(delTerm);
    }
    writer.commit();
    writer.close();
    for (String termToDel : termsToDel) {
        assertTrue(null != docs.remove(termToDel));
    }
    IndexReader ir = DirectoryReader.open(dir);
    assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0);
    assertEquals(ir.numDocs(), docs.size());
    LongValuesSource s = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, s, PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), w2 + w1);
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}

Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Random(java.util.Random) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 7 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testLongValuesSourceWithoutPayload.

@Test
public void testLongValuesSourceWithoutPayload() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    LongValuesSource sumValues = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, sumValues);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), (w1 + w2 + w3));
        assertNull(inputIterator.payload());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}

Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 8 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testLongValuesSourceEmptyReader.

@Test
public void testLongValuesSourceEmptyReader() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    // Make sure the index is created?
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    assertNull(inputIterator.next());
    assertEquals(inputIterator.weight(), 0);
    assertNull(inputIterator.payload());
    IOUtils.close(ir, analyzer, dir);
}

Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 9 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testValueSourceWithContext.

@Test
public void testValueSourceWithContext() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    LongValuesSource s = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, s, PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), (w1 + w2 + w3));
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
        Set<BytesRef> originalCtxs = new HashSet<>();
        for (IndexableField ctxf : doc.getFields(CONTEXTS_FIELD_NAME)) {
            originalCtxs.add(ctxf.binaryValue());
        }
        assertEquals(originalCtxs, inputIterator.contexts());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}

Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 10 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testLongValuesSourceWithContext.

@Test
public void testLongValuesSourceWithContext() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    LongValuesSource sumValues = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, sumValues, PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), (w1 + w2 + w3));
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
        Set<BytesRef> originalCtxs = new HashSet<>();
        for (IndexableField ctxf : doc.getFields(CONTEXTS_FIELD_NAME)) {
            originalCtxs.add(ctxf.binaryValue());
        }
        assertEquals(originalCtxs, inputIterator.contexts());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}

Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Dictionary (org.apache.lucene.search.spell.Dictionary)21 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 IndexReader (org.apache.lucene.index.IndexReader)20 Directory (org.apache.lucene.store.Directory)20 Analyzer (org.apache.lucene.analysis.Analyzer)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)19 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)19 Test (org.junit.Test)19 BytesRef (org.apache.lucene.util.BytesRef)16 Document (org.apache.lucene.document.Document)15 IndexableField (org.apache.lucene.index.IndexableField)13 LongValuesSource (org.apache.lucene.search.LongValuesSource)8 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)4 List (java.util.List)4 Map (java.util.Map)4 HashSet (java.util.HashSet)3 Random (java.util.Random)3 Term (org.apache.lucene.index.Term)3 File (java.io.File)1