use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestPerFieldPostingsFormat2 method doTestMixedPostings.
private void doTestMixedPostings(Codec codec) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setCodec(codec);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
// turn on vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
ft.setStoreTermVectorPositions(true);
Field idField = new Field("id", "", ft);
Field dateField = new Field("date", "", ft);
doc.add(idField);
doc.add(dateField);
for (int i = 0; i < 100; i++) {
idField.setStringValue(Integer.toString(random().nextInt(50)));
dateField.setStringValue(Integer.toString(random().nextInt(100)));
iw.addDocument(doc);
}
iw.close();
// checkindex
dir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestBinaryDocument method testBinaryFieldInIndex.
public void testBinaryFieldInIndex() throws Exception {
FieldType ft = new FieldType();
ft.setStored(true);
StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes(StandardCharsets.UTF_8));
Field stringFldStored = new Field("stringStored", binaryValStored, ft);
Document doc = new Document();
doc.add(binaryFldStored);
doc.add(stringFldStored);
/** test for field count */
assertEquals(2, doc.getFields().size());
/** add the doc to a ram index */
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
writer.addDocument(doc);
/** open a reader and fetch the document */
IndexReader reader = writer.getReader();
Document docFromReader = reader.document(0);
assertTrue(docFromReader != null);
/** fetch the binary stored field and compare its content with the original one */
BytesRef bytes = docFromReader.getBinaryValue("binaryStored");
assertNotNull(bytes);
String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, StandardCharsets.UTF_8);
assertTrue(binaryFldStoredTest.equals(binaryValStored));
/** fetch the string field and compare its content with the original one */
String stringFldStoredTest = docFromReader.get("stringStored");
assertTrue(stringFldStoredTest.equals(binaryValStored));
writer.close();
reader.close();
dir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestPhrasePrefixQuery method testPhrasePrefix.
/**
*
*/
public void testPhrasePrefix() throws IOException {
Directory indexStore = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
Document doc1 = new Document();
Document doc2 = new Document();
Document doc3 = new Document();
Document doc4 = new Document();
Document doc5 = new Document();
doc1.add(newTextField("body", "blueberry pie", Field.Store.YES));
doc2.add(newTextField("body", "blueberry strudel", Field.Store.YES));
doc3.add(newTextField("body", "blueberry pizza", Field.Store.YES));
doc4.add(newTextField("body", "blueberry chewing gum", Field.Store.YES));
doc5.add(newTextField("body", "piccadilly circus", Field.Store.YES));
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.addDocument(doc4);
writer.addDocument(doc5);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
// PhrasePrefixQuery query1 = new PhrasePrefixQuery();
MultiPhraseQuery.Builder query1builder = new MultiPhraseQuery.Builder();
// PhrasePrefixQuery query2 = new PhrasePrefixQuery();
MultiPhraseQuery.Builder query2builder = new MultiPhraseQuery.Builder();
query1builder.add(new Term("body", "blueberry"));
query2builder.add(new Term("body", "strawberry"));
LinkedList<Term> termsWithPrefix = new LinkedList<>();
// this TermEnum gives "piccadilly", "pie" and "pizza".
String prefix = "pi";
TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
te.seekCeil(new BytesRef(prefix));
do {
String s = te.term().utf8ToString();
if (s.startsWith(prefix)) {
termsWithPrefix.add(new Term("body", s));
} else {
break;
}
} while (te.next() != null);
query1builder.add(termsWithPrefix.toArray(new Term[0]));
query2builder.add(termsWithPrefix.toArray(new Term[0]));
ScoreDoc[] result;
result = searcher.search(query1builder.build(), 1000).scoreDocs;
assertEquals(2, result.length);
result = searcher.search(query2builder.build(), 1000).scoreDocs;
assertEquals(0, result.length);
reader.close();
indexStore.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestPhraseQuery method testPhraseQueryInConjunctionScorer.
public void testPhraseQueryInConjunctionScorer() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(newTextField("source", "marketing info", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newTextField("contents", "foobar", Field.Store.YES));
doc.add(newTextField("source", "marketing info", Field.Store.YES));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
PhraseQuery phraseQuery = new PhraseQuery("source", "marketing", "info");
ScoreDoc[] hits = searcher.search(phraseQuery, 1000).scoreDocs;
assertEquals(2, hits.length);
QueryUtils.check(random(), phraseQuery, searcher);
TermQuery termQuery = new TermQuery(new Term("contents", "foobar"));
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
hits = searcher.search(booleanQuery.build(), 1000).scoreDocs;
assertEquals(1, hits.length);
QueryUtils.check(random(), termQuery, searcher);
reader.close();
writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
doc = new Document();
doc.add(newTextField("contents", "map entry woo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newTextField("contents", "woo map entry", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(newTextField("contents", "map foobarword entry woo", Field.Store.YES));
writer.addDocument(doc);
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
termQuery = new TermQuery(new Term("contents", "woo"));
phraseQuery = new PhraseQuery("contents", "map", "entry");
hits = searcher.search(termQuery, 1000).scoreDocs;
assertEquals(3, hits.length);
hits = searcher.search(phraseQuery, 1000).scoreDocs;
assertEquals(2, hits.length);
booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
hits = searcher.search(booleanQuery.build(), 1000).scoreDocs;
assertEquals(2, hits.length);
booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST);
booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
hits = searcher.search(booleanQuery.build(), 1000).scoreDocs;
assertEquals(2, hits.length);
QueryUtils.check(random(), booleanQuery.build(), searcher);
reader.close();
directory.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestPhraseQuery method testSlopScoring.
public void testSlopScoring() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()).setSimilarity(new BM25Similarity()));
Document doc = new Document();
doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
writer.addDocument(doc);
Document doc2 = new Document();
doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES));
writer.addDocument(doc2);
Document doc3 = new Document();
doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES));
writer.addDocument(doc3);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new ClassicSimilarity());
PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname");
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
// Make sure that those matches where the terms appear closer to
// each other get a higher score:
assertEquals(1.0, hits[0].score, 0.01);
assertEquals(0, hits[0].doc);
assertEquals(0.63, hits[1].score, 0.01);
assertEquals(1, hits[1].doc);
assertEquals(0.47, hits[2].score, 0.01);
assertEquals(2, hits[2].doc);
QueryUtils.check(random(), query, searcher);
reader.close();
directory.close();
}
Aggregations