Search in sources :

Example 36 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestPerFieldPostingsFormat2 method doTestMixedPostings.

private void doTestMixedPostings(Codec codec) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setCodec(codec);
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    // turn on vectors for the checkindex cross-check
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    Field idField = new Field("id", "", ft);
    Field dateField = new Field("date", "", ft);
    doc.add(idField);
    doc.add(dateField);
    for (int i = 0; i < 100; i++) {
        idField.setStringValue(Integer.toString(random().nextInt(50)));
        dateField.setStringValue(Integer.toString(random().nextInt(100)));
        iw.addDocument(doc);
    }
    iw.close();
    // checkindex
    dir.close();
}
Also used : StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IntPoint(org.apache.lucene.document.IntPoint) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Example 37 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestBinaryDocument method testBinaryFieldInIndex.

public void testBinaryFieldInIndex() throws Exception {
    FieldType ft = new FieldType();
    ft.setStored(true);
    StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes(StandardCharsets.UTF_8));
    Field stringFldStored = new Field("stringStored", binaryValStored, ft);
    Document doc = new Document();
    doc.add(binaryFldStored);
    doc.add(stringFldStored);
    /** test for field count */
    assertEquals(2, doc.getFields().size());
    /** add the doc to a ram index */
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    writer.addDocument(doc);
    /** open a reader and fetch the document */
    IndexReader reader = writer.getReader();
    Document docFromReader = reader.document(0);
    assertTrue(docFromReader != null);
    /** fetch the binary stored field and compare its content with the original one */
    BytesRef bytes = docFromReader.getBinaryValue("binaryStored");
    assertNotNull(bytes);
    String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, StandardCharsets.UTF_8);
    assertTrue(binaryFldStoredTest.equals(binaryValStored));
    /** fetch the string field and compare its content with the original one */
    String stringFldStoredTest = docFromReader.get("stringStored");
    assertTrue(stringFldStoredTest.equals(binaryValStored));
    writer.close();
    reader.close();
    dir.close();
}
Also used : IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 38 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestPhrasePrefixQuery method testPhrasePrefix.

/**
     *
     */
public void testPhrasePrefix() throws IOException {
    Directory indexStore = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
    Document doc1 = new Document();
    Document doc2 = new Document();
    Document doc3 = new Document();
    Document doc4 = new Document();
    Document doc5 = new Document();
    doc1.add(newTextField("body", "blueberry pie", Field.Store.YES));
    doc2.add(newTextField("body", "blueberry strudel", Field.Store.YES));
    doc3.add(newTextField("body", "blueberry pizza", Field.Store.YES));
    doc4.add(newTextField("body", "blueberry chewing gum", Field.Store.YES));
    doc5.add(newTextField("body", "piccadilly circus", Field.Store.YES));
    writer.addDocument(doc1);
    writer.addDocument(doc2);
    writer.addDocument(doc3);
    writer.addDocument(doc4);
    writer.addDocument(doc5);
    IndexReader reader = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(reader);
    // PhrasePrefixQuery query1 = new PhrasePrefixQuery();
    MultiPhraseQuery.Builder query1builder = new MultiPhraseQuery.Builder();
    // PhrasePrefixQuery query2 = new PhrasePrefixQuery();
    MultiPhraseQuery.Builder query2builder = new MultiPhraseQuery.Builder();
    query1builder.add(new Term("body", "blueberry"));
    query2builder.add(new Term("body", "strawberry"));
    LinkedList<Term> termsWithPrefix = new LinkedList<>();
    // this TermEnum gives "piccadilly", "pie" and "pizza".
    String prefix = "pi";
    TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
    te.seekCeil(new BytesRef(prefix));
    do {
        String s = te.term().utf8ToString();
        if (s.startsWith(prefix)) {
            termsWithPrefix.add(new Term("body", s));
        } else {
            break;
        }
    } while (te.next() != null);
    query1builder.add(termsWithPrefix.toArray(new Term[0]));
    query2builder.add(termsWithPrefix.toArray(new Term[0]));
    ScoreDoc[] result;
    result = searcher.search(query1builder.build(), 1000).scoreDocs;
    assertEquals(2, result.length);
    result = searcher.search(query2builder.build(), 1000).scoreDocs;
    assertEquals(0, result.length);
    reader.close();
    indexStore.close();
}
Also used : Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) LinkedList(java.util.LinkedList) TermsEnum(org.apache.lucene.index.TermsEnum) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 39 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestPhraseQuery method testPhraseQueryInConjunctionScorer.

public void testPhraseQueryInConjunctionScorer() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
    Document doc = new Document();
    doc.add(newTextField("source", "marketing info", Field.Store.YES));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(newTextField("contents", "foobar", Field.Store.YES));
    doc.add(newTextField("source", "marketing info", Field.Store.YES));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(reader);
    PhraseQuery phraseQuery = new PhraseQuery("source", "marketing", "info");
    ScoreDoc[] hits = searcher.search(phraseQuery, 1000).scoreDocs;
    assertEquals(2, hits.length);
    QueryUtils.check(random(), phraseQuery, searcher);
    TermQuery termQuery = new TermQuery(new Term("contents", "foobar"));
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
    booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
    hits = searcher.search(booleanQuery.build(), 1000).scoreDocs;
    assertEquals(1, hits.length);
    QueryUtils.check(random(), termQuery, searcher);
    reader.close();
    writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
    doc = new Document();
    doc.add(newTextField("contents", "map entry woo", Field.Store.YES));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(newTextField("contents", "woo map entry", Field.Store.YES));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(newTextField("contents", "map foobarword entry woo", Field.Store.YES));
    writer.addDocument(doc);
    reader = writer.getReader();
    writer.close();
    searcher = newSearcher(reader);
    termQuery = new TermQuery(new Term("contents", "woo"));
    phraseQuery = new PhraseQuery("contents", "map", "entry");
    hits = searcher.search(termQuery, 1000).scoreDocs;
    assertEquals(3, hits.length);
    hits = searcher.search(phraseQuery, 1000).scoreDocs;
    assertEquals(2, hits.length);
    booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
    booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
    hits = searcher.search(booleanQuery.build(), 1000).scoreDocs;
    assertEquals(2, hits.length);
    booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
    booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
    hits = searcher.search(booleanQuery.build(), 1000).scoreDocs;
    assertEquals(2, hits.length);
    QueryUtils.check(random(), booleanQuery.build(), searcher);
    reader.close();
    directory.close();
}
Also used : Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 40 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestPhraseQuery method testSlopScoring.

public void testSlopScoring() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()).setSimilarity(new BM25Similarity()));
    Document doc = new Document();
    doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
    writer.addDocument(doc);
    Document doc2 = new Document();
    doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES));
    writer.addDocument(doc2);
    Document doc3 = new Document();
    doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES));
    writer.addDocument(doc3);
    IndexReader reader = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(reader);
    searcher.setSimilarity(new ClassicSimilarity());
    PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname");
    ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
    assertEquals(3, hits.length);
    // Make sure that those matches where the terms appear closer to
    // each other get a higher score:
    assertEquals(1.0, hits[0].score, 0.01);
    assertEquals(0, hits[0].doc);
    assertEquals(0.63, hits[1].score, 0.01);
    assertEquals(1, hits[1].doc);
    assertEquals(0.47, hits[2].score, 0.01);
    assertEquals(2, hits[2].doc);
    QueryUtils.check(random(), query, searcher);
    reader.close();
    directory.close();
}
Also used : ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BM25Similarity(org.apache.lucene.search.similarities.BM25Similarity) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)775 Document (org.apache.lucene.document.Document)675 Directory (org.apache.lucene.store.Directory)584 IndexReader (org.apache.lucene.index.IndexReader)508 Term (org.apache.lucene.index.Term)324 IndexSearcher (org.apache.lucene.search.IndexSearcher)294 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)220 BytesRef (org.apache.lucene.util.BytesRef)142 Field (org.apache.lucene.document.Field)140 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)136 TopDocs (org.apache.lucene.search.TopDocs)134 TermQuery (org.apache.lucene.search.TermQuery)121 DirectoryReader (org.apache.lucene.index.DirectoryReader)119 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)110 ArrayList (java.util.ArrayList)91 StringField (org.apache.lucene.document.StringField)89 Analyzer (org.apache.lucene.analysis.Analyzer)88 BooleanQuery (org.apache.lucene.search.BooleanQuery)88 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)76 Query (org.apache.lucene.search.Query)73