use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestSuggestField method testExtremeDeduplication.
public void testExtremeDeduplication() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
final int num = atLeast(5000);
int bestWeight = Integer.MIN_VALUE;
for (int i = 0; i < num; i++) {
Document document = new Document();
int weight = TestUtil.nextInt(random(), 10, 100);
bestWeight = Math.max(weight, bestWeight);
document.add(new SuggestField("suggest_field", "abc", weight));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
}
Document document = new Document();
document.add(new SuggestField("suggest_field", "abd", 7));
iw.addDocument(document);
if (random().nextBoolean()) {
iw.forceMerge(1);
}
DirectoryReader reader = iw.getReader();
Entry[] expectedEntries = new Entry[2];
expectedEntries[0] = new Entry("abc", bestWeight);
expectedEntries[1] = new Entry("abd", 7);
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
suggestIndexSearcher.suggest(query, collector);
TopSuggestDocs lookupDocs = collector.get();
assertSuggestions(lookupDocs, expectedEntries);
reader.close();
iw.close();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestSuggestField method testSuggestOnAllDeletedDocuments.
@Test
public void testSuggestOnAllDeletedDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
// using IndexWriter instead of RandomIndexWriter
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, i));
document.add(newStringField("delete", "delete", Field.Store.NO));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
iw.deleteDocuments(new Term("delete", "delete"));
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertThat(suggest.totalHits, equalTo(0));
reader.close();
iw.close();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestUtil method addIndexesSlowly.
public static void addIndexesSlowly(IndexWriter writer, DirectoryReader... readers) throws IOException {
List<CodecReader> leaves = new ArrayList<>();
for (DirectoryReader reader : readers) {
for (LeafReaderContext context : reader.leaves()) {
leaves.add(SlowCodecReaderWrapper.wrap(context.reader()));
}
}
writer.addIndexes(leaves.toArray(new CodecReader[leaves.size()]));
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestCompressingTermVectorsFormat method testChunkCleanup.
/**
* writes some tiny segments with incomplete compressed blocks,
* and ensures merge recompresses them.
*/
public void testChunkCleanup() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMergePolicy(NoMergePolicy.INSTANCE);
// we have to enforce certain things like maxDocsPerChunk to cause dirty chunks to be created
// by this test.
iwConf.setCodec(CompressingCodec.randomInstance(random(), 4 * 1024, 100, false, 8));
IndexWriter iw = new IndexWriter(dir, iwConf);
DirectoryReader ir = DirectoryReader.open(iw);
for (int i = 0; i < 5; i++) {
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(new Field("text", "not very long at all", ft));
iw.addDocument(doc);
// force flush
DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
assertNotNull(ir2);
ir.close();
ir = ir2;
// examine dirty counts:
for (LeafReaderContext leaf : ir2.leaves()) {
CodecReader sr = (CodecReader) leaf.reader();
CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
assertEquals(1, reader.getNumChunks());
assertEquals(1, reader.getNumDirtyChunks());
}
}
iw.getConfig().setMergePolicy(newLogMergePolicy());
iw.forceMerge(1);
DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
assertNotNull(ir2);
ir.close();
ir = ir2;
CodecReader sr = (CodecReader) getOnlyLeafReader(ir);
CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
// we could get lucky, and have zero, but typically one.
assertTrue(reader.getNumDirtyChunks() <= 1);
ir.close();
iw.close();
dir.close();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class DocSetUtil method createDocSet.
public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
// raw reader to avoid extra wrapping overhead
DirectoryReader reader = searcher.getRawReader();
int maxDoc = searcher.getIndexReader().maxDoc();
int smallSetSize = smallSetSize(maxDoc);
String field = term.field();
BytesRef termVal = term.bytes();
int maxCount = 0;
int firstReader = -1;
List<LeafReaderContext> leaves = reader.leaves();
// use array for slightly higher scanning cost, but fewer memory allocations
PostingsEnum[] postList = new PostingsEnum[leaves.size()];
for (LeafReaderContext ctx : leaves) {
assert leaves.get(ctx.ord) == ctx;
LeafReader r = ctx.reader();
Fields f = r.fields();
Terms t = f.terms(field);
// field is missing
if (t == null)
continue;
TermsEnum te = t.iterator();
if (te.seekExact(termVal)) {
maxCount += te.docFreq();
postList[ctx.ord] = te.postings(null, PostingsEnum.NONE);
if (firstReader < 0)
firstReader = ctx.ord;
}
}
DocSet answer = null;
if (maxCount == 0) {
answer = DocSet.EMPTY;
} else if (maxCount <= smallSetSize) {
answer = createSmallSet(leaves, postList, maxCount, firstReader);
} else {
answer = createBigSet(leaves, postList, maxDoc, firstReader);
}
return DocSetUtil.getDocSet(answer, searcher);
}
Aggregations