Search in sources :

Example 11 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class MoreLikeThisQueryTests method testSimple.

public void testSimple() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    indexWriter.commit();
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new TextField("text", "lucene", Field.Store.YES));
    indexWriter.addDocument(document);
    document = new Document();
    document.add(new TextField("_id", "2", Field.Store.YES));
    document.add(new TextField("text", "lucene release", Field.Store.YES));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    MoreLikeThisQuery mltQuery = new MoreLikeThisQuery("lucene", new String[] { "text" }, Lucene.STANDARD_ANALYZER);
    mltQuery.setLikeText("lucene");
    mltQuery.setMinTermFrequency(1);
    mltQuery.setMinDocFreq(1);
    long count = searcher.count(mltQuery);
    assertThat(count, equalTo(2L));
    reader.close();
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) MoreLikeThisQuery(org.elasticsearch.common.lucene.search.MoreLikeThisQuery) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 12 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class LuceneTests method testCleanIndex.

public void testCleanIndex() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    try (DirectoryReader open = DirectoryReader.open(writer)) {
        assertEquals(3, open.numDocs());
        assertEquals(1, open.numDeletedDocs());
        assertEquals(4, open.maxDoc());
    }
    writer.close();
    if (random().nextBoolean()) {
        for (String file : dir.listAll()) {
            if (file.startsWith("_1")) {
                // delete a random file
                dir.deleteFile(file);
                break;
            }
        }
    }
    Lucene.cleanLuceneIndex(dir);
    if (dir.listAll().length > 0) {
        for (String file : dir.listAll()) {
            if (file.startsWith("extra") == false) {
                assertEquals(file, "write.lock");
            }
        }
    }
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) TextField(org.apache.lucene.document.TextField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 13 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class LuceneTests method testNumDocs.

public void testNumDocs() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(3, Lucene.getNumDocs(segmentCommitInfos));
    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(2, Lucene.getNumDocs(segmentCommitInfos));
    int numDocsToIndex = randomIntBetween(10, 50);
    List<Term> deleteTerms = new ArrayList<>();
    for (int i = 0; i < numDocsToIndex; i++) {
        doc = new Document();
        doc.add(new TextField("id", "extra_" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        deleteTerms.add(new Term("id", "extra_" + i));
        writer.addDocument(doc);
    }
    int numDocsToDelete = randomIntBetween(0, numDocsToIndex);
    Collections.shuffle(deleteTerms, random());
    for (int i = 0; i < numDocsToDelete; i++) {
        Term remove = deleteTerms.remove(0);
        writer.deleteDocuments(remove);
    }
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(2 + deleteTerms.size(), Lucene.getNumDocs(segmentCommitInfos));
    writer.close();
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) SegmentInfos(org.apache.lucene.index.SegmentInfos) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) ArrayList(java.util.ArrayList) TextField(org.apache.lucene.document.TextField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 14 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class SimpleAllTests method testMultipleTokensAllNoBoost.

public void testMultipleTokensAllNoBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    doc = new Document();
    doc.add(new Field("_id", "2", StoredField.TYPE));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 15 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class SimpleAllTests method testMultipleTokensAllWithBoost.

public void testMultipleTokensAllWithBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    doc = new Document();
    doc.add(new Field("_id", "2", StoredField.TYPE));
    doc.add(new AllField("_all", "else koo", 2.0f, allFt));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(1));
    assertThat(docs.scoreDocs[1].doc, equalTo(0));
    docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(1));
    assertThat(docs.scoreDocs[1].doc, equalTo(0));
    docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)427 IndexWriter (org.apache.lucene.index.IndexWriter)291 Document (org.apache.lucene.document.Document)277 Directory (org.apache.lucene.store.Directory)267 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)162 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)139 IndexReader (org.apache.lucene.index.IndexReader)133 Term (org.apache.lucene.index.Term)102 IndexSearcher (org.apache.lucene.search.IndexSearcher)86 DirectoryReader (org.apache.lucene.index.DirectoryReader)85 RAMDirectory (org.apache.lucene.store.RAMDirectory)79 TextField (org.apache.lucene.document.TextField)77 Field (org.apache.lucene.document.Field)71 BytesRef (org.apache.lucene.util.BytesRef)68 IOException (java.io.IOException)58 Analyzer (org.apache.lucene.analysis.Analyzer)57 Test (org.junit.Test)52 StringField (org.apache.lucene.document.StringField)47 TermQuery (org.apache.lucene.search.TermQuery)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)38