use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.
the class MinDocQueryTests method testRandom.
public void testRandom() throws IOException {
final int numDocs = randomIntBetween(10, 200);
final Document doc = new Document();
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(doc);
}
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i <= numDocs; ++i) {
assertEquals(numDocs - i, searcher.count(new MinDocQuery(i)));
}
w.close();
reader.close();
dir.close();
}
use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.
the class CustomPostingsHighlighterTests method testCustomPostingsHighlighter.
public void testCustomPostingsHighlighter() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
//good position but only one match
final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter.";
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue(firstValue);
//two matches, not the best snippet due to its length though
final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.";
Field body2 = new Field("body", "", offsetsType);
doc.add(body2);
body2.setStringValue(secondValue);
//two matches and short, will be scored highest
final String thirdValue = "This is highlighting the third short highlighting value.";
Field body3 = new Field("body", "", offsetsType);
doc.add(body3);
body3.setStringValue(thirdValue);
//one match, same as first but at the end, will be scored lower due to its position
final String fourthValue = "Just a test4 highlighting from postings highlighter.";
Field body4 = new Field("body", "", offsetsType);
doc.add(body4);
body4.setStringValue(fourthValue);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter.";
String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower.";
String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter.";
IndexSearcher searcher = newSearcher(ir);
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false);
Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
assertThat(snippets.length, equalTo(4));
assertThat(snippets[0].getText(), equalTo(firstHlValue));
assertThat(snippets[1].getText(), equalTo(secondHlValue));
assertThat(snippets[2].getText(), equalTo(thirdHlValue));
assertThat(snippets[3].getText(), equalTo(fourthHlValue));
ir.close();
dir.close();
}
use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.
the class PercolatorFieldMapperTests method testCreateCandidateQuery.
public void testCreateCandidateQuery() throws Exception {
addQueryMapping();
MemoryIndex memoryIndex = new MemoryIndex(false);
memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader);
assertEquals(2, candidateQuery.clauses().size());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur());
TermInSetQuery termsQuery = (TermInSetQuery) candidateQuery.clauses().get(0).getQuery();
PrefixCodedTerms terms = termsQuery.getTermData();
assertThat(terms.size(), equalTo(14L));
PrefixCodedTerms.TermIterator termIterator = terms.iterator();
assertTermIterator(termIterator, "_field3 me", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "_field3 unhide", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 brown", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 dog", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 fox", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 jumps", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 lazy", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 over", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 quick", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 the", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 more", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 some", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 text", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field4 123", fieldType.queryTermsField.name());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur());
assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)), candidateQuery.clauses().get(1).getQuery());
}
use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.
the class SimpleLuceneTests method testBoost.
public void testBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
for (int i = 0; i < 100; i++) {
// TODO (just setting the boost value does not seem to work...)
StringBuilder value = new StringBuilder().append("value");
for (int j = 0; j < i; j++) {
value.append(" ").append("value");
}
Document document = new Document();
TextField textField = new TextField("_id", Integer.toString(i), Field.Store.YES);
textField.setBoost(i);
document.add(textField);
textField = new TextField("value", value.toString(), Field.Store.YES);
textField.setBoost(i);
document.add(textField);
indexWriter.addDocument(document);
}
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TermQuery query = new TermQuery(new Term("value", "value"));
TopDocs topDocs = searcher.search(query, 100);
assertThat(100, equalTo(topDocs.totalHits));
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
// System.out.println(doc.get("id") + ": " + searcher.explain(query, topDocs.scoreDocs[i].doc));
assertThat(doc.get("_id"), equalTo(Integer.toString(100 - i - 1)));
}
indexWriter.close();
}
use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.
the class SimpleLuceneTests method testOrdering.
/**
* Here, we verify that the order that we add fields to a document counts, and not the lexi order
* of the field. This means that heavily accessed fields that use field selector should be added
* first (with load and break).
*/
public void testOrdering() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
document.add(new TextField("#id", "1", Field.Store.YES));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
final ArrayList<String> fieldsOrder = new ArrayList<>();
searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() {
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
fieldsOrder.add(fieldInfo.name);
return Status.YES;
}
});
assertThat(fieldsOrder.size(), equalTo(2));
assertThat(fieldsOrder.get(0), equalTo("_id"));
assertThat(fieldsOrder.get(1), equalTo("#id"));
indexWriter.close();
}
Aggregations