Search in sources :

Example 11 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class LuceneTests method testCleanIndex.

public void testCleanIndex() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setMaxBufferedDocs(2);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    doc = new Document();
    doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    try (DirectoryReader open = DirectoryReader.open(writer)) {
        assertEquals(3, open.numDocs());
        assertEquals(1, open.numDeletedDocs());
        assertEquals(4, open.maxDoc());
    }
    writer.close();
    if (random().nextBoolean()) {
        for (String file : dir.listAll()) {
            if (file.startsWith("_1")) {
                // delete a random file
                dir.deleteFile(file);
                break;
            }
        }
    }
    Lucene.cleanLuceneIndex(dir);
    if (dir.listAll().length > 0) {
        for (String file : dir.listAll()) {
            if (file.startsWith("extra") == false) {
                assertEquals(file, "write.lock");
            }
        }
    }
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) TextField(org.apache.lucene.document.TextField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 12 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class LuceneTests method testNumDocs.

public void testNumDocs() throws IOException {
    MockDirectoryWrapper dir = newMockDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    writer.commit();
    SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
    doc = new Document();
    doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
    writer.addDocument(doc);
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(3, Lucene.getNumDocs(segmentCommitInfos));
    writer.deleteDocuments(new Term("id", "2"));
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(2, Lucene.getNumDocs(segmentCommitInfos));
    int numDocsToIndex = randomIntBetween(10, 50);
    List<Term> deleteTerms = new ArrayList<>();
    for (int i = 0; i < numDocsToIndex; i++) {
        doc = new Document();
        doc.add(new TextField("id", "extra_" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
        deleteTerms.add(new Term("id", "extra_" + i));
        writer.addDocument(doc);
    }
    int numDocsToDelete = randomIntBetween(0, numDocsToIndex);
    Collections.shuffle(deleteTerms, random());
    for (int i = 0; i < numDocsToDelete; i++) {
        Term remove = deleteTerms.remove(0);
        writer.deleteDocuments(remove);
    }
    writer.commit();
    segmentCommitInfos = Lucene.readSegmentInfos(dir);
    assertEquals(2 + deleteTerms.size(), Lucene.getNumDocs(segmentCommitInfos));
    writer.close();
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) SegmentInfos(org.apache.lucene.index.SegmentInfos) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) ArrayList(java.util.ArrayList) TextField(org.apache.lucene.document.TextField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 13 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class SimpleAllTests method testMultipleTokensAllNoBoost.

public void testMultipleTokensAllNoBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    doc = new Document();
    doc.add(new Field("_id", "2", StoredField.TYPE));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 14 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class SimpleAllTests method testMultipleTokensAllWithBoost.

public void testMultipleTokensAllWithBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    doc = new Document();
    doc.add(new Field("_id", "2", StoredField.TYPE));
    doc.add(new AllField("_all", "else koo", 2.0f, allFt));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(1));
    assertThat(docs.scoreDocs[1].doc, equalTo(0));
    docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(1));
    assertThat(docs.scoreDocs[1].doc, equalTo(0));
    docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 15 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class SimpleAllTests method testNoTokens.

public void testNoTokens() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.KEYWORD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "", 2.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new MatchAllDocsQuery(), 10);
    assertThat(docs.totalHits, equalTo(1));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Aggregations

IndexWriter (org.apache.lucene.index.IndexWriter)529 Document (org.apache.lucene.document.Document)311 Directory (org.apache.lucene.store.Directory)306 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)293 IndexReader (org.apache.lucene.index.IndexReader)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)136 DirectoryReader (org.apache.lucene.index.DirectoryReader)127 Term (org.apache.lucene.index.Term)125 IndexSearcher (org.apache.lucene.search.IndexSearcher)110 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)107 TextField (org.apache.lucene.document.TextField)104 RAMDirectory (org.apache.lucene.store.RAMDirectory)88 IOException (java.io.IOException)86 Field (org.apache.lucene.document.Field)86 TermQuery (org.apache.lucene.search.TermQuery)56 StringField (org.apache.lucene.document.StringField)52 BytesRef (org.apache.lucene.util.BytesRef)52 FieldType (org.apache.lucene.document.FieldType)50 Test (org.junit.Test)49 Query (org.apache.lucene.search.Query)45