use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class MoreLikeThisQueryTests method testSimple.
public void testSimple() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
indexWriter.commit();
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
document.add(new TextField("text", "lucene", Field.Store.YES));
indexWriter.addDocument(document);
document = new Document();
document.add(new TextField("_id", "2", Field.Store.YES));
document.add(new TextField("text", "lucene release", Field.Store.YES));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery("lucene", new String[] { "text" }, Lucene.STANDARD_ANALYZER);
mltQuery.setLikeText("lucene");
mltQuery.setMinTermFrequency(1);
mltQuery.setMinDocFreq(1);
long count = searcher.count(mltQuery);
assertThat(count, equalTo(2L));
reader.close();
indexWriter.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class LuceneTests method testCleanIndex.
public void testCleanIndex() throws IOException {
MockDirectoryWrapper dir = newMockDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setMaxBufferedDocs(2);
IndexWriter writer = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.commit();
doc = new Document();
doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
doc = new Document();
doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.commit();
doc = new Document();
doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.deleteDocuments(new Term("id", "2"));
writer.commit();
try (DirectoryReader open = DirectoryReader.open(writer)) {
assertEquals(3, open.numDocs());
assertEquals(1, open.numDeletedDocs());
assertEquals(4, open.maxDoc());
}
writer.close();
if (random().nextBoolean()) {
for (String file : dir.listAll()) {
if (file.startsWith("_1")) {
// delete a random file
dir.deleteFile(file);
break;
}
}
}
Lucene.cleanLuceneIndex(dir);
if (dir.listAll().length > 0) {
for (String file : dir.listAll()) {
if (file.startsWith("extra") == false) {
assertEquals(file, "write.lock");
}
}
}
dir.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class LuceneTests method testNumDocs.
public void testNumDocs() throws IOException {
MockDirectoryWrapper dir = newMockDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
IndexWriter writer = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.commit();
SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);
assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
doc = new Document();
doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
doc = new Document();
doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
segmentCommitInfos = Lucene.readSegmentInfos(dir);
assertEquals(1, Lucene.getNumDocs(segmentCommitInfos));
writer.commit();
segmentCommitInfos = Lucene.readSegmentInfos(dir);
assertEquals(3, Lucene.getNumDocs(segmentCommitInfos));
writer.deleteDocuments(new Term("id", "2"));
writer.commit();
segmentCommitInfos = Lucene.readSegmentInfos(dir);
assertEquals(2, Lucene.getNumDocs(segmentCommitInfos));
int numDocsToIndex = randomIntBetween(10, 50);
List<Term> deleteTerms = new ArrayList<>();
for (int i = 0; i < numDocsToIndex; i++) {
doc = new Document();
doc.add(new TextField("id", "extra_" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
deleteTerms.add(new Term("id", "extra_" + i));
writer.addDocument(doc);
}
int numDocsToDelete = randomIntBetween(0, numDocsToIndex);
Collections.shuffle(deleteTerms, random());
for (int i = 0; i < numDocsToDelete; i++) {
Term remove = deleteTerms.remove(0);
writer.deleteDocuments(remove);
}
writer.commit();
segmentCommitInfos = Lucene.readSegmentInfos(dir);
assertEquals(2 + deleteTerms.size(), Lucene.getNumDocs(segmentCommitInfos));
writer.close();
dir.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class SimpleAllTests method testMultipleTokensAllNoBoost.
public void testMultipleTokensAllNoBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
FieldType allFt = getAllFieldType();
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
doc.add(new AllField("_all", "something moo", 1.0f, allFt));
doc.add(new AllField("_all", "else koo", 1.0f, allFt));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
doc.add(new AllField("_all", "else koo", 1.0f, allFt));
doc.add(new AllField("_all", "something moo", 1.0f, allFt));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
indexWriter.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.
the class SimpleAllTests method testMultipleTokensAllWithBoost.
public void testMultipleTokensAllWithBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
FieldType allFt = getAllFieldType();
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
doc.add(new AllField("_all", "something moo", 1.0f, allFt));
doc.add(new AllField("_all", "else koo", 1.0f, allFt));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
doc.add(new AllField("_all", "else koo", 2.0f, allFt));
doc.add(new AllField("_all", "something moo", 1.0f, allFt));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(1));
assertThat(docs.scoreDocs[1].doc, equalTo(0));
docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(1));
assertThat(docs.scoreDocs[1].doc, equalTo(0));
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
indexWriter.close();
}
Aggregations