use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.
the class ThreadedIndexingAndSearchingTestCase method runTest.
public void runTest(String testName) throws Exception {
failed.set(false);
addCount.set(0);
delCount.set(0);
packCount.set(0);
final long t0 = System.currentTimeMillis();
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random);
final Path tempDir = createTempDir(testName);
// some subclasses rely on this being MDW
dir = getDirectory(newMockFSDirectory(tempDir));
if (dir instanceof BaseDirectoryWrapper) {
// don't double-checkIndex, we do it ourselves.
((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false);
}
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
}
if (LuceneTestCase.TEST_NIGHTLY) {
// newIWConfig makes smallish max seg size, which
// results in tons and tons of segments for this test
// when run nightly:
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof TieredMergePolicy) {
((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
} else if (mp instanceof LogByteSizeMergePolicy) {
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
} else if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setMaxMergeDocs(100000);
}
// when running nightly, merging can still have crazy parameters,
// and might use many per-field codecs. turn on CFS for IW flushes
// and ensure CFS ratio is reasonable to keep it contained.
conf.setUseCompoundFile(true);
mp.setNoCFSRatio(Math.max(0.25d, mp.getNoCFSRatio()));
}
conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
@Override
public void warm(LeafReader reader) throws IOException {
if (VERBOSE) {
System.out.println("TEST: now warm merged reader=" + reader);
}
warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
int sum = 0;
final int inc = Math.max(1, maxDoc / 50);
for (int docID = 0; docID < maxDoc; docID += inc) {
if (liveDocs == null || liveDocs.get(docID)) {
final Document doc = reader.document(docID);
sum += doc.getFields().size();
}
}
IndexSearcher searcher = newSearcher(reader, false);
sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
if (VERBOSE) {
System.out.println("TEST: warm visited " + sum + " fields");
}
}
});
if (VERBOSE) {
conf.setInfoStream(new PrintStreamInfoStream(System.out) {
@Override
public void message(String component, String message) {
if ("TP".equals(component)) {
// ignore test points!
return;
}
super.message(component, message);
}
});
}
writer = new IndexWriter(dir, conf);
TestUtil.reduceOpenFiles(writer);
final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
doAfterWriter(es);
final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);
final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;
final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());
final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);
if (VERBOSE) {
System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]");
}
// Let index build up a bit
Thread.sleep(100);
doSearching(es, stopTime);
if (VERBOSE) {
System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
}
for (Thread thread : indexThreads) {
thread.join();
}
if (VERBOSE) {
System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
}
final IndexSearcher s = getFinalSearcher();
if (VERBOSE) {
System.out.println("TEST: finalSearcher=" + s);
}
assertFalse(failed.get());
boolean doFail = false;
// Verify: make sure delIDs are in fact deleted:
for (String id : delIDs) {
final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
if (hits.totalHits != 0) {
System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
doFail = true;
}
}
// Verify: make sure delPackIDs are in fact deleted:
for (String id : delPackIDs) {
final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
if (hits.totalHits != 0) {
System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
doFail = true;
}
}
// Verify: make sure each group of sub-docs are still in docID order:
for (SubDocs subDocs : allSubDocs) {
TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
if (!subDocs.deleted) {
// We sort by relevance but the scores should be identical so sort falls back to by docID:
if (hits.totalHits != subDocs.subIDs.size()) {
System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
doFail = true;
} else {
int lastDocID = -1;
int startDocID = -1;
for (ScoreDoc scoreDoc : hits.scoreDocs) {
final int docID = scoreDoc.doc;
if (lastDocID != -1) {
assertEquals(1 + lastDocID, docID);
} else {
startDocID = docID;
}
lastDocID = docID;
final Document doc = s.doc(docID);
assertEquals(subDocs.packID, doc.get("packID"));
}
lastDocID = startDocID - 1;
for (String subID : subDocs.subIDs) {
hits = s.search(new TermQuery(new Term("docid", subID)), 1);
assertEquals(1, hits.totalHits);
final int docID = hits.scoreDocs[0].doc;
if (lastDocID != -1) {
assertEquals(1 + lastDocID, docID);
}
lastDocID = docID;
}
}
} else {
// because we can re-use packID for update:
for (String subID : subDocs.subIDs) {
assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
}
}
}
// Verify: make sure all not-deleted docs are in fact
// not deleted:
final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
docs.close();
for (int id = 0; id < endID; id++) {
String stringID = "" + id;
if (!delIDs.contains(stringID)) {
final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
if (hits.totalHits != 1) {
System.out.println("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs);
doFail = true;
}
}
}
assertFalse(doFail);
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs());
releaseSearcher(s);
writer.commit();
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
doClose();
try {
writer.commit();
} finally {
writer.close();
}
// searches, and that IS may be using this es!
if (es != null) {
es.shutdown();
es.awaitTermination(1, TimeUnit.SECONDS);
}
TestUtil.checkIndex(dir);
dir.close();
if (VERBOSE) {
System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
}
}
use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.
the class TestSpellChecker method testConcurrentAccess.
/*
* tests if the internally shared indexsearcher is correctly closed
* when the spellchecker is concurrently accessed and closed.
*/
public void testConcurrentAccess() throws IOException, InterruptedException {
assertEquals(1, searchers.size());
final IndexReader r = DirectoryReader.open(userindex);
spellChecker.clearIndex();
assertEquals(2, searchers.size());
addwords(r, spellChecker, "field1");
assertEquals(3, searchers.size());
int num_field1 = this.numdoc();
addwords(r, spellChecker, "field2");
assertEquals(4, searchers.size());
int num_field2 = this.numdoc();
assertEquals(num_field2, num_field1 + 1);
int numThreads = 5 + random().nextInt(5);
ExecutorService executor = Executors.newFixedThreadPool(numThreads, new NamedThreadFactory("testConcurrentAccess"));
SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
for (int i = 0; i < numThreads; i++) {
SpellCheckWorker spellCheckWorker = new SpellCheckWorker(r);
executor.execute(spellCheckWorker);
workers[i] = spellCheckWorker;
}
int iterations = 5 + random().nextInt(5);
for (int i = 0; i < iterations; i++) {
Thread.sleep(100);
// concurrently reset the spell index
spellChecker.setSpellIndex(this.spellindex);
// for debug - prints the internal open searchers
// showSearchersOpen();
}
spellChecker.close();
executor.shutdown();
// wait for 60 seconds - usually this is very fast but coverage runs could take quite long
executor.awaitTermination(60L, TimeUnit.SECONDS);
for (int i = 0; i < workers.length; i++) {
assertFalse(String.format(Locale.ROOT, "worker thread %d failed", i), workers[i].failed);
assertTrue(String.format(Locale.ROOT, "worker thread %d is still running but should be terminated", i), workers[i].terminated);
}
// 4 searchers more than iterations
// 1. at creation
// 2. clearIndex()
// 2. and 3. during addwords
assertEquals(iterations + 4, searchers.size());
assertSearchersClosed();
r.close();
}
use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.
the class ConfusionMatrixGenerator method getConfusionMatrix.
/**
* get the {@link org.apache.lucene.classification.utils.ConfusionMatrixGenerator.ConfusionMatrix} of a given {@link Classifier},
* generated on the given {@link IndexReader}, class and text fields.
*
* @param reader the {@link IndexReader} containing the index used for creating the {@link Classifier}
* @param classifier the {@link Classifier} whose confusion matrix has to be generated
* @param classFieldName the name of the Lucene field used as the classifier's output
* @param textFieldName the nome the Lucene field used as the classifier's input
* @param timeoutMilliseconds timeout to wait before stopping creating the confusion matrix
* @param <T> the return type of the {@link ClassificationResult} returned by the given {@link Classifier}
* @return a {@link org.apache.lucene.classification.utils.ConfusionMatrixGenerator.ConfusionMatrix}
* @throws IOException if problems occurr while reading the index or using the classifier
*/
public static <T> ConfusionMatrix getConfusionMatrix(IndexReader reader, Classifier<T> classifier, String classFieldName, String textFieldName, long timeoutMilliseconds) throws IOException {
ExecutorService executorService = Executors.newFixedThreadPool(1, new NamedThreadFactory("confusion-matrix-gen-"));
try {
Map<String, Map<String, Long>> counts = new HashMap<>();
IndexSearcher indexSearcher = new IndexSearcher(reader);
TopDocs topDocs = indexSearcher.search(new TermRangeQuery(classFieldName, null, null, true, true), Integer.MAX_VALUE);
double time = 0d;
int counter = 0;
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
if (timeoutMilliseconds > 0 && time >= timeoutMilliseconds) {
break;
}
Document doc = reader.document(scoreDoc.doc);
String[] correctAnswers = doc.getValues(classFieldName);
if (correctAnswers != null && correctAnswers.length > 0) {
Arrays.sort(correctAnswers);
ClassificationResult<T> result;
String text = doc.get(textFieldName);
if (text != null) {
try {
// fail if classification takes more than 5s
long start = System.currentTimeMillis();
result = executorService.submit(() -> classifier.assignClass(text)).get(5, TimeUnit.SECONDS);
long end = System.currentTimeMillis();
time += end - start;
if (result != null) {
T assignedClass = result.getAssignedClass();
if (assignedClass != null) {
counter++;
String classified = assignedClass instanceof BytesRef ? ((BytesRef) assignedClass).utf8ToString() : assignedClass.toString();
String correctAnswer;
if (Arrays.binarySearch(correctAnswers, classified) >= 0) {
correctAnswer = classified;
} else {
correctAnswer = correctAnswers[0];
}
Map<String, Long> stringLongMap = counts.get(correctAnswer);
if (stringLongMap != null) {
Long aLong = stringLongMap.get(classified);
if (aLong != null) {
stringLongMap.put(classified, aLong + 1);
} else {
stringLongMap.put(classified, 1L);
}
} else {
stringLongMap = new HashMap<>();
stringLongMap.put(classified, 1L);
counts.put(correctAnswer, stringLongMap);
}
}
}
} catch (TimeoutException timeoutException) {
// add classification timeout
time += 5000;
} catch (ExecutionException | InterruptedException executionException) {
throw new RuntimeException(executionException);
}
}
}
}
return new ConfusionMatrix(counts, time / counter, counter);
} finally {
executorService.shutdown();
}
}
use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.
the class TestIndexSearcher method testHugeN.
// should not throw exception
public void testHugeN() throws Exception {
ExecutorService service = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("TestIndexSearcher"));
IndexSearcher[] searchers = new IndexSearcher[] { new IndexSearcher(reader), new IndexSearcher(reader, service) };
Query[] queries = new Query[] { new MatchAllDocsQuery(), new TermQuery(new Term("field", "1")) };
Sort[] sorts = new Sort[] { null, new Sort(new SortField("field2", SortField.Type.STRING)) };
ScoreDoc[] afters = new ScoreDoc[] { null, new FieldDoc(0, 0f, new Object[] { new BytesRef("boo!") }) };
for (IndexSearcher searcher : searchers) {
for (ScoreDoc after : afters) {
for (Query query : queries) {
for (Sort sort : sorts) {
searcher.search(query, Integer.MAX_VALUE);
searcher.searchAfter(after, query, Integer.MAX_VALUE);
if (sort != null) {
searcher.search(query, Integer.MAX_VALUE, sort);
searcher.search(query, Integer.MAX_VALUE, sort, true, true);
searcher.search(query, Integer.MAX_VALUE, sort, true, false);
searcher.search(query, Integer.MAX_VALUE, sort, false, true);
searcher.search(query, Integer.MAX_VALUE, sort, false, false);
searcher.searchAfter(after, query, Integer.MAX_VALUE, sort);
searcher.searchAfter(after, query, Integer.MAX_VALUE, sort, true, true);
searcher.searchAfter(after, query, Integer.MAX_VALUE, sort, true, false);
searcher.searchAfter(after, query, Integer.MAX_VALUE, sort, false, true);
searcher.searchAfter(after, query, Integer.MAX_VALUE, sort, false, false);
}
}
}
}
}
TestUtil.shutdownExecutorService(service);
}
use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.
the class TestCodecLoadingDeadlock method main.
// this method is called in a spawned process:
public static void main(final String... args) throws Exception {
final String codecName = args[0];
final String pfName = args[1];
final String dvfName = args[2];
// two times the modulo in switch statement below
final int numThreads = 14;
final ExecutorService pool = Executors.newFixedThreadPool(numThreads, new NamedThreadFactory("deadlockchecker"));
final CyclicBarrier barrier = new CyclicBarrier(numThreads);
IntStream.range(0, numThreads).forEach(taskNo -> pool.execute(() -> {
try {
barrier.await();
switch(taskNo % 7) {
case 0:
Codec.getDefault();
break;
case 1:
Codec.forName(codecName);
break;
case 2:
PostingsFormat.forName(pfName);
break;
case 3:
DocValuesFormat.forName(dvfName);
break;
case 4:
Codec.availableCodecs();
break;
case 5:
PostingsFormat.availablePostingsFormats();
break;
case 6:
DocValuesFormat.availableDocValuesFormats();
break;
default:
throw new AssertionError();
}
} catch (Throwable t) {
synchronized (args) {
System.err.println(Thread.currentThread().getName() + " failed to lookup codec service:");
t.printStackTrace(System.err);
}
Runtime.getRuntime().halt(1);
}
}));
pool.shutdown();
while (!pool.awaitTermination(1, TimeUnit.MINUTES)) ;
}
Aggregations