Search in sources :

Example 1 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentDictionaryTest method testMultiValuedField.

@Test
public void testMultiValuedField() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    List<Suggestion> suggestions = indexMultiValuedDocuments(atLeast(1000), writer);
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    Iterator<Suggestion> suggestionsIter = suggestions.iterator();
    while ((f = inputIterator.next()) != null) {
        Suggestion nextSuggestion = suggestionsIter.next();
        assertTrue(f.equals(nextSuggestion.term));
        long weight = nextSuggestion.weight;
        assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
        assertEquals(inputIterator.payload(), nextSuggestion.payload);
        assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
    }
    assertFalse(suggestionsIter.hasNext());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 2 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentDictionaryTest method testWithOptionalPayload.

@Test
public void testWithOptionalPayload() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    // Create a document that is missing the payload field
    Document doc = new Document();
    Field field = new TextField(FIELD_NAME, "some field", Field.Store.YES);
    doc.add(field);
    // do not store the payload or the contexts
    Field weight = new NumericDocValuesField(WEIGHT_FIELD_NAME, 100);
    doc.add(weight);
    writer.addDocument(doc);
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    // Even though the payload field is missing, the dictionary iterator should not skip the document
    // because the payload field is optional.
    Dictionary dictionaryOptionalPayload = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionaryOptionalPayload.getEntryIterator();
    BytesRef f = inputIterator.next();
    assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
    IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
    assertEquals(inputIterator.weight(), weightField.numericValue().longValue());
    IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
    assertNull(payloadField);
    assertTrue(inputIterator.payload().length == 0);
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) IndexableField(org.apache.lucene.index.IndexableField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 3 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testWithValueSource.

@Test
public void testWithValueSource() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), 10);
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 4 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testWithLongValuesSource.

@Test
public void testWithLongValuesSource() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), 10);
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 5 with Dictionary

use of org.apache.lucene.search.spell.Dictionary in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testValueSourceWithoutPayload.

@Test
public void testValueSourceWithoutPayload() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    LongValuesSource s = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, s);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), (w1 + w2 + w3));
        assertNull(inputIterator.payload());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

Dictionary (org.apache.lucene.search.spell.Dictionary)21 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 IndexReader (org.apache.lucene.index.IndexReader)20 Directory (org.apache.lucene.store.Directory)20 Analyzer (org.apache.lucene.analysis.Analyzer)19 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)19 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)19 Test (org.junit.Test)19 BytesRef (org.apache.lucene.util.BytesRef)16 Document (org.apache.lucene.document.Document)15 IndexableField (org.apache.lucene.index.IndexableField)13 LongValuesSource (org.apache.lucene.search.LongValuesSource)8 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)4 List (java.util.List)4 Map (java.util.Map)4 HashSet (java.util.HashSet)3 Random (java.util.Random)3 Term (org.apache.lucene.index.Term)3 File (java.io.File)1