use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestNRTCachingDirectory method testNRTAndCommit.
public void testNRTAndCommit() throws Exception {
Directory dir = newDirectory();
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
final LineFileDocs docs = new LineFileDocs(random());
final int numDocs = TestUtil.nextInt(random(), 100, 400);
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs);
}
final List<BytesRef> ids = new ArrayList<>();
DirectoryReader r = null;
for (int docCount = 0; docCount < numDocs; docCount++) {
final Document doc = docs.nextDoc();
ids.add(new BytesRef(doc.get("docid")));
w.addDocument(doc);
if (random().nextInt(20) == 17) {
if (r == null) {
r = DirectoryReader.open(w.w);
} else {
final DirectoryReader r2 = DirectoryReader.openIfChanged(r);
if (r2 != null) {
r.close();
r = r2;
}
}
assertEquals(1 + docCount, r.numDocs());
final IndexSearcher s = newSearcher(r);
// Just make sure search can run; we can't assert
// totHits since it could be 0
TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
// System.out.println("tot hits " + hits.totalHits);
}
}
if (r != null) {
r.close();
}
// Close should force cache to clear since all files are sync'd
w.close();
final String[] cachedFiles = cachedDir.listCachedFiles();
for (String file : cachedFiles) {
System.out.println("FAIL: cached file " + file + " remains after sync");
}
assertEquals(0, cachedFiles.length);
r = DirectoryReader.open(dir);
for (BytesRef id : ids) {
assertEquals(1, r.docFreq(new Term("docid", id)));
}
r.close();
cachedDir.close();
docs.close();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TaxonomyMergeUtils method merge.
/**
* Merges the given taxonomy and index directories and commits the changes to
* the given writers.
*/
public static void merge(Directory srcIndexDir, Directory srcTaxoDir, OrdinalMap map, IndexWriter destIndexWriter, DirectoryTaxonomyWriter destTaxoWriter, FacetsConfig srcConfig) throws IOException {
// merge the taxonomies
destTaxoWriter.addTaxonomy(srcTaxoDir, map);
int[] ordinalMap = map.getMap();
DirectoryReader reader = DirectoryReader.open(srcIndexDir);
try {
List<LeafReaderContext> leaves = reader.leaves();
int numReaders = leaves.size();
CodecReader[] wrappedLeaves = new CodecReader[numReaders];
for (int i = 0; i < numReaders; i++) {
wrappedLeaves[i] = SlowCodecReaderWrapper.wrap(new OrdinalMappingLeafReader(leaves.get(i).reader(), ordinalMap, srcConfig));
}
destIndexWriter.addIndexes(wrappedLeaves);
// commit changes to taxonomy and index respectively.
destTaxoWriter.commit();
destIndexWriter.commit();
} finally {
reader.close();
}
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class DirectoryTaxonomyReader method doOpenIfChanged.
/**
* Implements the opening of a new {@link DirectoryTaxonomyReader} instance if
* the taxonomy has changed.
*
* <p>
* <b>NOTE:</b> the returned {@link DirectoryTaxonomyReader} shares the
* ordinal and category caches with this reader. This is not expected to cause
* any issues, unless the two instances continue to live. The reader
* guarantees that the two instances cannot affect each other in terms of
* correctness of the caches, however if the size of the cache is changed
* through {@link #setCacheSize(int)}, it will affect both reader instances.
*/
@Override
protected DirectoryTaxonomyReader doOpenIfChanged() throws IOException {
ensureOpen();
// This works for both NRT and non-NRT readers (i.e. an NRT reader remains NRT).
final DirectoryReader r2 = DirectoryReader.openIfChanged(indexReader);
if (r2 == null) {
// no changes, nothing to do
return null;
}
// check if the taxonomy was recreated
boolean success = false;
try {
boolean recreated = false;
if (taxoWriter == null) {
// not NRT, check epoch from commit data
String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
if (t1 == null) {
if (t2 != null) {
recreated = true;
}
} else if (!t1.equals(t2)) {
// t1 != null and t2 must not be null b/c DirTaxoWriter always puts the commit data.
// it's ok to use String.equals because we require the two epoch values to be the same.
recreated = true;
}
} else {
// NRT, compare current taxoWriter.epoch() vs the one that was given at construction
if (taxoEpoch != taxoWriter.getTaxonomyEpoch()) {
recreated = true;
}
}
final DirectoryTaxonomyReader newtr;
if (recreated) {
// if recreated, do not reuse anything from this instace. the information
// will be lazily computed by the new instance when needed.
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
} else {
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
}
success = true;
return newtr;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(r2);
}
}
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class DirectoryTaxonomyWriter method addTaxonomy.
/**
* Takes the categories from the given taxonomy directory, and adds the
* missing ones to this taxonomy. Additionally, it fills the given
* {@link OrdinalMap} with a mapping from the original ordinal to the new
* ordinal.
*/
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
ensureOpen();
DirectoryReader r = DirectoryReader.open(taxoDir);
try {
final int size = r.numDocs();
final OrdinalMap ordinalMap = map;
ordinalMap.setSize(size);
int base = 0;
PostingsEnum docs = null;
for (final LeafReaderContext ctx : r.leaves()) {
final LeafReader ar = ctx.reader();
final Terms terms = ar.terms(Consts.FULL);
// TODO: share per-segment TermsEnum here!
TermsEnum te = terms.iterator();
while (te.next() != null) {
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
final int ordinal = addCategory(cp);
docs = te.postings(docs, PostingsEnum.NONE);
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
}
// no deletions, so we're ok
base += ar.maxDoc();
}
ordinalMap.addDone();
} finally {
r.close();
}
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestDocValuesStatsCollector method testDocsWithSortedSetValues.
public void testDocsWithSortedSetValues() throws IOException {
try (Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
String field = "sorted";
int numDocs = TestUtil.nextInt(random(), 1, 100);
BytesRef[][] docValues = new BytesRef[numDocs][];
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
if (random().nextBoolean()) {
// not all documents have a value
int numValues = TestUtil.nextInt(random(), 1, 5);
docValues[i] = new BytesRef[numValues];
for (int j = 0; j < numValues; j++) {
BytesRef val = TestUtil.randomBinaryTerm(random());
doc.add(new SortedSetDocValuesField(field, val));
docValues[i][j] = val;
}
doc.add(new StringField("id", "doc" + i, Store.NO));
}
indexWriter.addDocument(doc);
}
// 20% of cases delete some docs
if (random().nextDouble() < 0.2) {
for (int i = 0; i < numDocs; i++) {
if (random().nextBoolean()) {
indexWriter.deleteDocuments(new Term("id", "doc" + i));
docValues[i] = null;
}
}
}
try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
IndexSearcher searcher = new IndexSearcher(reader);
SortedSetDocValuesStats stats = new SortedSetDocValuesStats(field);
TotalHitCountCollector totalHitCount = new TotalHitCountCollector();
searcher.search(new MatchAllDocsQuery(), MultiCollector.wrap(totalHitCount, new DocValuesStatsCollector(stats)));
int expCount = (int) nonNull(docValues).count();
assertEquals(expCount, stats.count());
int numDocsWithoutField = (int) isNull(docValues).count();
assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
if (stats.count() > 0) {
assertEquals(nonNull(docValues).flatMap(Arrays::stream).min(BytesRef::compareTo).get(), stats.min());
assertEquals(nonNull(docValues).flatMap(Arrays::stream).max(BytesRef::compareTo).get(), stats.max());
}
}
}
}
Aggregations