Search in sources :

Example 71 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testWithLongValuesSource.

@Test
public void testWithLongValuesSource() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), 10);
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 72 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testValueSourceWithDeletions.

@Test
public void testValueSourceWithDeletions() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    Random rand = random();
    List<String> termsToDel = new ArrayList<>();
    for (Document doc : docs.values()) {
        if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1) {
            termsToDel.add(doc.get(FIELD_NAME));
        }
        writer.addDocument(doc);
    }
    writer.commit();
    Term[] delTerms = new Term[termsToDel.size()];
    for (int i = 0; i < termsToDel.size(); i++) {
        delTerms[i] = new Term(FIELD_NAME, termsToDel.get(i));
    }
    for (Term delTerm : delTerms) {
        writer.deleteDocuments(delTerm);
    }
    writer.commit();
    writer.close();
    for (String termToDel : termsToDel) {
        assertTrue(null != docs.remove(termToDel));
    }
    IndexReader ir = DirectoryReader.open(dir);
    assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0);
    assertEquals(ir.numDocs(), docs.size());
    LongValuesSource s = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, s, PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), w2 + w1);
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Random(java.util.Random) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 73 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testValueSourceWithContext.

@Test
public void testValueSourceWithContext() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    LongValuesSource s = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, s, PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), (w1 + w2 + w3));
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
        Set<BytesRef> originalCtxs = new HashSet<>();
        for (IndexableField ctxf : doc.getFields(CONTEXTS_FIELD_NAME)) {
            originalCtxs.add(ctxf.binaryValue());
        }
        assertEquals(originalCtxs, inputIterator.contexts());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 74 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class DocumentValueSourceDictionaryTest method testLongValuesSourceWithContext.

@Test
public void testLongValuesSourceWithContext() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
    for (Document doc : docs.values()) {
        writer.addDocument(doc);
    }
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    LongValuesSource sumValues = sum(WEIGHT_FIELD_NAME_1, WEIGHT_FIELD_NAME_2, WEIGHT_FIELD_NAME_3);
    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, sumValues, PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
        long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
        long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        assertEquals(inputIterator.weight(), (w1 + w2 + w3));
        IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
        if (payloadField == null)
            assertTrue(inputIterator.payload().length == 0);
        else
            assertEquals(inputIterator.payload(), payloadField.binaryValue());
        Set<BytesRef> originalCtxs = new HashSet<>();
        for (IndexableField ctxf : doc.getFields(CONTEXTS_FIELD_NAME)) {
            originalCtxs.add(ctxf.binaryValue());
        }
        assertEquals(originalCtxs, inputIterator.contexts());
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) LongValuesSource(org.apache.lucene.search.LongValuesSource) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 75 with IndexableField

use of org.apache.lucene.index.IndexableField in project lucene-solr by apache.

the class DocumentDictionaryTest method testWithDeletions.

@Test
public void testWithDeletions() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
    Map<String, Document> docs = res.getValue();
    List<String> invalidDocTerms = res.getKey();
    Random rand = random();
    List<String> termsToDel = new ArrayList<>();
    for (Document doc : docs.values()) {
        IndexableField f = doc.getField(FIELD_NAME);
        if (rand.nextBoolean() && f != null && !invalidDocTerms.contains(f.stringValue())) {
            termsToDel.add(doc.get(FIELD_NAME));
        }
        writer.addDocument(doc);
    }
    writer.commit();
    Term[] delTerms = new Term[termsToDel.size()];
    for (int i = 0; i < termsToDel.size(); i++) {
        delTerms[i] = new Term(FIELD_NAME, termsToDel.get(i));
    }
    for (Term delTerm : delTerms) {
        writer.deleteDocuments(delTerm);
    }
    writer.commit();
    writer.close();
    for (String termToDel : termsToDel) {
        assertTrue(null != docs.remove(termToDel));
    }
    IndexReader ir = DirectoryReader.open(dir);
    assertEquals(ir.numDocs(), docs.size());
    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    while ((f = inputIterator.next()) != null) {
        Document doc = docs.remove(f.utf8ToString());
        assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
        IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
        assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
        assertNull(inputIterator.payload());
    }
    for (String invalidTerm : invalidDocTerms) {
        assertNotNull(docs.remove(invalidTerm));
    }
    assertTrue(docs.isEmpty());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Random(java.util.Random) IndexReader(org.apache.lucene.index.IndexReader) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

IndexableField (org.apache.lucene.index.IndexableField)276 Document (org.apache.lucene.document.Document)90 CompressedXContent (org.elasticsearch.common.compress.CompressedXContent)75 Matchers.containsString (org.hamcrest.Matchers.containsString)57 BytesRef (org.apache.lucene.util.BytesRef)53 ArrayList (java.util.ArrayList)50 Field (org.apache.lucene.document.Field)34 Test (org.junit.Test)28 IOException (java.io.IOException)27 HashMap (java.util.HashMap)24 IndexReader (org.apache.lucene.index.IndexReader)24 Directory (org.apache.lucene.store.Directory)23 Map (java.util.Map)22 TopDocs (org.apache.lucene.search.TopDocs)22 Term (org.apache.lucene.index.Term)21 HashSet (java.util.HashSet)20 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)20 DocumentMapper (org.elasticsearch.index.mapper.DocumentMapper)20 Query (org.apache.lucene.search.Query)19 Analyzer (org.apache.lucene.analysis.Analyzer)18