Search in sources :

Example 76 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testMultipleSnippetSizes.

public void testMultipleSnippetSizes() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Field title = new Field("title", "", UHTestHelper.randomFieldType(random()));
    Document doc = new Document();
    doc.add(body);
    doc.add(title);
    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
    title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD).build();
    Map<String, String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query, new int[] { 0 }, new int[] { 1, 2 });
    String titleHighlight = snippets.get("title")[0];
    String bodyHighlight = snippets.get("body")[0];
    assertEquals("This is a <b>test</b>. ", titleHighlight);
    assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Field(org.apache.lucene.document.Field) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 77 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testCustomEmptyHighlights.

/**
   * Make sure highlighter we can customize how emtpy
   * highlight is returned.
   */
public void testCustomEmptyHighlights() throws Exception {
    indexAnalyzer.setPositionIncrementGap(10);
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", fieldType);
    doc.add(body);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    // don't want any default summary
    highlighter.setMaxNoHighlightPassages(0);
    Query query = new TermQuery(new Term("body", "highlighting"));
    int[] docIDs = new int[] { 0 };
    String[] snippets = highlighter.highlightFields(new String[] { "body" }, query, docIDs, new int[] { 2 }).get("body");
    assertEquals(1, snippets.length);
    assertNull(snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Field(org.apache.lucene.document.Field) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 78 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testRandom.

// TODO make a similar test for BT, w/ varied IDs:
public void testRandom() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
    int maxItemsInBlock = 2 * (minItemsInBlock - 1) + random().nextInt(50);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    //IndexWriter w = new IndexWriter(dir, iwc);
    int numDocs = atLeast(1000);
    Map<String, Long> idValues = new HashMap<String, Long>();
    int docUpto = 0;
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs);
    }
    IDSource ids = getRandomIDs();
    String idPrefix;
    if (random().nextBoolean()) {
        idPrefix = "";
    } else {
        idPrefix = TestUtil.randomSimpleString(random());
        if (VERBOSE) {
            System.out.println("TEST: use id prefix: " + idPrefix);
        }
    }
    boolean useMonotonicVersion = random().nextBoolean();
    if (VERBOSE) {
        System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
    }
    List<String> idsList = new ArrayList<>();
    long version = 0;
    while (docUpto < numDocs) {
        String idValue = idPrefix + ids.next();
        if (idValues.containsKey(idValue)) {
            continue;
        }
        if (useMonotonicVersion) {
            version += TestUtil.nextInt(random(), 1, 10);
        } else {
            version = random().nextLong() & 0x3fffffffffffffffL;
        }
        idValues.put(idValue, version);
        if (VERBOSE) {
            System.out.println("  " + idValue + " -> " + version);
        }
        Document doc = new Document();
        doc.add(makeIDField(idValue, version));
        w.addDocument(doc);
        idsList.add(idValue);
        if (idsList.size() > 0 && random().nextInt(7) == 5) {
            // Randomly delete or update a previous ID
            idValue = idsList.get(random().nextInt(idsList.size()));
            if (random().nextBoolean()) {
                if (useMonotonicVersion) {
                    version += TestUtil.nextInt(random(), 1, 10);
                } else {
                    version = random().nextLong() & 0x3fffffffffffffffL;
                }
                doc = new Document();
                doc.add(makeIDField(idValue, version));
                if (VERBOSE) {
                    System.out.println("  update " + idValue + " -> " + version);
                }
                w.updateDocument(new Term("id", idValue), doc);
                idValues.put(idValue, version);
            } else {
                if (VERBOSE) {
                    System.out.println("  delete " + idValue);
                }
                w.deleteDocuments(new Term("id", idValue));
                idValues.remove(idValue);
            }
        }
        docUpto++;
    }
    IndexReader r = w.getReader();
    //IndexReader r = DirectoryReader.open(w);
    PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
    List<Map.Entry<String, Long>> idValuesList = new ArrayList<>(idValues.entrySet());
    int iters = numDocs * 5;
    for (int iter = 0; iter < iters; iter++) {
        String idValue;
        if (random().nextBoolean()) {
            idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
        } else if (random().nextBoolean()) {
            idValue = ids.next();
        } else {
            idValue = idPrefix + TestUtil.randomSimpleString(random());
        }
        BytesRef idValueBytes = new BytesRef(idValue);
        Long expectedVersion = idValues.get(idValue);
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " id=" + idValue + " expectedVersion=" + expectedVersion);
        }
        if (expectedVersion == null) {
            assertEquals("term should not have been found (doesn't exist)", -1, lookup.lookup(idValueBytes));
        } else {
            if (random().nextBoolean()) {
                if (VERBOSE) {
                    System.out.println("  lookup exact version (should be found)");
                }
                assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
                assertEquals(expectedVersion.longValue(), lookup.getVersion());
            } else {
                if (VERBOSE) {
                    System.out.println("  lookup version+1 (should not be found)");
                }
                assertEquals("term should not have been found (version newer)", -1, lookup.lookup(idValueBytes, expectedVersion.longValue() + 1));
            }
        }
    }
    r.close();
    w.close();
    dir.close();
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) AtomicLong(java.util.concurrent.atomic.AtomicLong) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 79 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testInvalidPayload.

public void testInvalidPayload() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(new StringAndPayloadField("id", "id", new BytesRef("foo")));
    expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc);
        w.commit();
    });
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 80 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testMoreThanOnceInSingleDoc.

public void testMoreThanOnceInSingleDoc() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(makeIDField("id", 17));
    doc.add(makeIDField("id", 17));
    expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc);
        w.commit();
    });
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)779 Document (org.apache.lucene.document.Document)679 Directory (org.apache.lucene.store.Directory)588 IndexReader (org.apache.lucene.index.IndexReader)510 Term (org.apache.lucene.index.Term)325 IndexSearcher (org.apache.lucene.search.IndexSearcher)294 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)220 BytesRef (org.apache.lucene.util.BytesRef)142 Field (org.apache.lucene.document.Field)141 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)136 TopDocs (org.apache.lucene.search.TopDocs)134 TermQuery (org.apache.lucene.search.TermQuery)121 DirectoryReader (org.apache.lucene.index.DirectoryReader)120 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)110 ArrayList (java.util.ArrayList)95 StringField (org.apache.lucene.document.StringField)93 Analyzer (org.apache.lucene.analysis.Analyzer)88 BooleanQuery (org.apache.lucene.search.BooleanQuery)88 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)77 Test (org.junit.Test)75