Search in sources :

Example 1 with NamedThreadFactory

use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.

the class ThreadedIndexingAndSearchingTestCase method runTest.

public void runTest(String testName) throws Exception {
    failed.set(false);
    addCount.set(0);
    delCount.set(0);
    packCount.set(0);
    final long t0 = System.currentTimeMillis();
    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random);
    final Path tempDir = createTempDir(testName);
    // some subclasses rely on this being MDW
    dir = getDirectory(newMockFSDirectory(tempDir));
    if (dir instanceof BaseDirectoryWrapper) {
        // don't double-checkIndex, we do it ourselves.
        ((BaseDirectoryWrapper) dir).setCheckIndexOnClose(false);
    }
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    final IndexWriterConfig conf = newIndexWriterConfig(analyzer).setCommitOnClose(false);
    conf.setInfoStream(new FailOnNonBulkMergesInfoStream());
    if (conf.getMergePolicy() instanceof MockRandomMergePolicy) {
        ((MockRandomMergePolicy) conf.getMergePolicy()).setDoNonBulkMerges(false);
    }
    if (LuceneTestCase.TEST_NIGHTLY) {
        // newIWConfig makes smallish max seg size, which
        // results in tons and tons of segments for this test
        // when run nightly:
        MergePolicy mp = conf.getMergePolicy();
        if (mp instanceof TieredMergePolicy) {
            ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
        } else if (mp instanceof LogByteSizeMergePolicy) {
            ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
        } else if (mp instanceof LogMergePolicy) {
            ((LogMergePolicy) mp).setMaxMergeDocs(100000);
        }
        // when running nightly, merging can still have crazy parameters, 
        // and might use many per-field codecs. turn on CFS for IW flushes
        // and ensure CFS ratio is reasonable to keep it contained.
        conf.setUseCompoundFile(true);
        mp.setNoCFSRatio(Math.max(0.25d, mp.getNoCFSRatio()));
    }
    conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {

        @Override
        public void warm(LeafReader reader) throws IOException {
            if (VERBOSE) {
                System.out.println("TEST: now warm merged reader=" + reader);
            }
            warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
            final int maxDoc = reader.maxDoc();
            final Bits liveDocs = reader.getLiveDocs();
            int sum = 0;
            final int inc = Math.max(1, maxDoc / 50);
            for (int docID = 0; docID < maxDoc; docID += inc) {
                if (liveDocs == null || liveDocs.get(docID)) {
                    final Document doc = reader.document(docID);
                    sum += doc.getFields().size();
                }
            }
            IndexSearcher searcher = newSearcher(reader, false);
            sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
            if (VERBOSE) {
                System.out.println("TEST: warm visited " + sum + " fields");
            }
        }
    });
    if (VERBOSE) {
        conf.setInfoStream(new PrintStreamInfoStream(System.out) {

            @Override
            public void message(String component, String message) {
                if ("TP".equals(component)) {
                    // ignore test points!
                    return;
                }
                super.message(component, message);
            }
        });
    }
    writer = new IndexWriter(dir, conf);
    TestUtil.reduceOpenFiles(writer);
    final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
    doAfterWriter(es);
    final int NUM_INDEX_THREADS = TestUtil.nextInt(random(), 2, 4);
    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;
    final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
    final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
    final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());
    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
    final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);
    if (VERBOSE) {
        System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis() - t0) + " ms]");
    }
    // Let index build up a bit
    Thread.sleep(100);
    doSearching(es, stopTime);
    if (VERBOSE) {
        System.out.println("TEST: all searching done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
    for (Thread thread : indexThreads) {
        thread.join();
    }
    if (VERBOSE) {
        System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis() - t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
    }
    final IndexSearcher s = getFinalSearcher();
    if (VERBOSE) {
        System.out.println("TEST: finalSearcher=" + s);
    }
    assertFalse(failed.get());
    boolean doFail = false;
    // Verify: make sure delIDs are in fact deleted:
    for (String id : delIDs) {
        final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
        if (hits.totalHits != 0) {
            System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
            doFail = true;
        }
    }
    // Verify: make sure delPackIDs are in fact deleted:
    for (String id : delPackIDs) {
        final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
        if (hits.totalHits != 0) {
            System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
            doFail = true;
        }
    }
    // Verify: make sure each group of sub-docs are still in docID order:
    for (SubDocs subDocs : allSubDocs) {
        TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
        if (!subDocs.deleted) {
            // We sort by relevance but the scores should be identical so sort falls back to by docID:
            if (hits.totalHits != subDocs.subIDs.size()) {
                System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
                doFail = true;
            } else {
                int lastDocID = -1;
                int startDocID = -1;
                for (ScoreDoc scoreDoc : hits.scoreDocs) {
                    final int docID = scoreDoc.doc;
                    if (lastDocID != -1) {
                        assertEquals(1 + lastDocID, docID);
                    } else {
                        startDocID = docID;
                    }
                    lastDocID = docID;
                    final Document doc = s.doc(docID);
                    assertEquals(subDocs.packID, doc.get("packID"));
                }
                lastDocID = startDocID - 1;
                for (String subID : subDocs.subIDs) {
                    hits = s.search(new TermQuery(new Term("docid", subID)), 1);
                    assertEquals(1, hits.totalHits);
                    final int docID = hits.scoreDocs[0].doc;
                    if (lastDocID != -1) {
                        assertEquals(1 + lastDocID, docID);
                    }
                    lastDocID = docID;
                }
            }
        } else {
            // because we can re-use packID for update:
            for (String subID : subDocs.subIDs) {
                assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
            }
        }
    }
    // Verify: make sure all not-deleted docs are in fact
    // not deleted:
    final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
    docs.close();
    for (int id = 0; id < endID; id++) {
        String stringID = "" + id;
        if (!delIDs.contains(stringID)) {
            final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
            if (hits.totalHits != 1) {
                System.out.println("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits + "; delIDs=" + delIDs);
                doFail = true;
            }
        }
    }
    assertFalse(doFail);
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs());
    releaseSearcher(s);
    writer.commit();
    assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
    doClose();
    try {
        writer.commit();
    } finally {
        writer.close();
    }
    // searches, and that IS may be using this es!
    if (es != null) {
        es.shutdown();
        es.awaitTermination(1, TimeUnit.SECONDS);
    }
    TestUtil.checkIndex(dir);
    dir.close();
    if (VERBOSE) {
        System.out.println("TEST: done [" + (System.currentTimeMillis() - t0) + " ms]");
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) LineFileDocs(org.apache.lucene.util.LineFileDocs) Path(java.nio.file.Path) TermQuery(org.apache.lucene.search.TermQuery) NamedThreadFactory(org.apache.lucene.util.NamedThreadFactory) FailOnNonBulkMergesInfoStream(org.apache.lucene.util.FailOnNonBulkMergesInfoStream) IOException(java.io.IOException) ExecutorService(java.util.concurrent.ExecutorService) Bits(org.apache.lucene.util.Bits) PrintStreamInfoStream(org.apache.lucene.util.PrintStreamInfoStream)

Example 2 with NamedThreadFactory

use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.

the class TestSpellChecker method testConcurrentAccess.

/*
   * tests if the internally shared indexsearcher is correctly closed 
   * when the spellchecker is concurrently accessed and closed.
   */
public void testConcurrentAccess() throws IOException, InterruptedException {
    assertEquals(1, searchers.size());
    final IndexReader r = DirectoryReader.open(userindex);
    spellChecker.clearIndex();
    assertEquals(2, searchers.size());
    addwords(r, spellChecker, "field1");
    assertEquals(3, searchers.size());
    int num_field1 = this.numdoc();
    addwords(r, spellChecker, "field2");
    assertEquals(4, searchers.size());
    int num_field2 = this.numdoc();
    assertEquals(num_field2, num_field1 + 1);
    int numThreads = 5 + random().nextInt(5);
    ExecutorService executor = Executors.newFixedThreadPool(numThreads, new NamedThreadFactory("testConcurrentAccess"));
    SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
    for (int i = 0; i < numThreads; i++) {
        SpellCheckWorker spellCheckWorker = new SpellCheckWorker(r);
        executor.execute(spellCheckWorker);
        workers[i] = spellCheckWorker;
    }
    int iterations = 5 + random().nextInt(5);
    for (int i = 0; i < iterations; i++) {
        Thread.sleep(100);
        // concurrently reset the spell index
        spellChecker.setSpellIndex(this.spellindex);
    // for debug - prints the internal open searchers 
    // showSearchersOpen();
    }
    spellChecker.close();
    executor.shutdown();
    // wait for 60 seconds - usually this is very fast but coverage runs could take quite long
    executor.awaitTermination(60L, TimeUnit.SECONDS);
    for (int i = 0; i < workers.length; i++) {
        assertFalse(String.format(Locale.ROOT, "worker thread %d failed", i), workers[i].failed);
        assertTrue(String.format(Locale.ROOT, "worker thread %d is still running but should be terminated", i), workers[i].terminated);
    }
    // 4 searchers more than iterations
    // 1. at creation
    // 2. clearIndex()
    // 2. and 3. during addwords
    assertEquals(iterations + 4, searchers.size());
    assertSearchersClosed();
    r.close();
}
Also used : NamedThreadFactory(org.apache.lucene.util.NamedThreadFactory) IndexReader(org.apache.lucene.index.IndexReader) ExecutorService(java.util.concurrent.ExecutorService)

Example 3 with NamedThreadFactory

use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.

the class ConfusionMatrixGenerator method getConfusionMatrix.

/**
   * get the {@link org.apache.lucene.classification.utils.ConfusionMatrixGenerator.ConfusionMatrix} of a given {@link Classifier},
   * generated on the given {@link IndexReader}, class and text fields.
   *
   * @param reader              the {@link IndexReader} containing the index used for creating the {@link Classifier}
   * @param classifier          the {@link Classifier} whose confusion matrix has to be generated
   * @param classFieldName      the name of the Lucene field used as the classifier's output
   * @param textFieldName       the nome the Lucene field used as the classifier's input
   * @param timeoutMilliseconds timeout to wait before stopping creating the confusion matrix
   * @param <T>                 the return type of the {@link ClassificationResult} returned by the given {@link Classifier}
   * @return a {@link org.apache.lucene.classification.utils.ConfusionMatrixGenerator.ConfusionMatrix}
   * @throws IOException if problems occurr while reading the index or using the classifier
   */
public static <T> ConfusionMatrix getConfusionMatrix(IndexReader reader, Classifier<T> classifier, String classFieldName, String textFieldName, long timeoutMilliseconds) throws IOException {
    ExecutorService executorService = Executors.newFixedThreadPool(1, new NamedThreadFactory("confusion-matrix-gen-"));
    try {
        Map<String, Map<String, Long>> counts = new HashMap<>();
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        TopDocs topDocs = indexSearcher.search(new TermRangeQuery(classFieldName, null, null, true, true), Integer.MAX_VALUE);
        double time = 0d;
        int counter = 0;
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            if (timeoutMilliseconds > 0 && time >= timeoutMilliseconds) {
                break;
            }
            Document doc = reader.document(scoreDoc.doc);
            String[] correctAnswers = doc.getValues(classFieldName);
            if (correctAnswers != null && correctAnswers.length > 0) {
                Arrays.sort(correctAnswers);
                ClassificationResult<T> result;
                String text = doc.get(textFieldName);
                if (text != null) {
                    try {
                        // fail if classification takes more than 5s
                        long start = System.currentTimeMillis();
                        result = executorService.submit(() -> classifier.assignClass(text)).get(5, TimeUnit.SECONDS);
                        long end = System.currentTimeMillis();
                        time += end - start;
                        if (result != null) {
                            T assignedClass = result.getAssignedClass();
                            if (assignedClass != null) {
                                counter++;
                                String classified = assignedClass instanceof BytesRef ? ((BytesRef) assignedClass).utf8ToString() : assignedClass.toString();
                                String correctAnswer;
                                if (Arrays.binarySearch(correctAnswers, classified) >= 0) {
                                    correctAnswer = classified;
                                } else {
                                    correctAnswer = correctAnswers[0];
                                }
                                Map<String, Long> stringLongMap = counts.get(correctAnswer);
                                if (stringLongMap != null) {
                                    Long aLong = stringLongMap.get(classified);
                                    if (aLong != null) {
                                        stringLongMap.put(classified, aLong + 1);
                                    } else {
                                        stringLongMap.put(classified, 1L);
                                    }
                                } else {
                                    stringLongMap = new HashMap<>();
                                    stringLongMap.put(classified, 1L);
                                    counts.put(correctAnswer, stringLongMap);
                                }
                            }
                        }
                    } catch (TimeoutException timeoutException) {
                        // add classification timeout
                        time += 5000;
                    } catch (ExecutionException | InterruptedException executionException) {
                        throw new RuntimeException(executionException);
                    }
                }
            }
        }
        return new ConfusionMatrix(counts, time / counter, counter);
    } finally {
        executorService.shutdown();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) HashMap(java.util.HashMap) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) ExecutionException(java.util.concurrent.ExecutionException) BytesRef(org.apache.lucene.util.BytesRef) TimeoutException(java.util.concurrent.TimeoutException) NamedThreadFactory(org.apache.lucene.util.NamedThreadFactory) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) ExecutorService(java.util.concurrent.ExecutorService) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with NamedThreadFactory

use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.

the class TestIndexSearcher method testHugeN.

// should not throw exception
public void testHugeN() throws Exception {
    ExecutorService service = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("TestIndexSearcher"));
    IndexSearcher[] searchers = new IndexSearcher[] { new IndexSearcher(reader), new IndexSearcher(reader, service) };
    Query[] queries = new Query[] { new MatchAllDocsQuery(), new TermQuery(new Term("field", "1")) };
    Sort[] sorts = new Sort[] { null, new Sort(new SortField("field2", SortField.Type.STRING)) };
    ScoreDoc[] afters = new ScoreDoc[] { null, new FieldDoc(0, 0f, new Object[] { new BytesRef("boo!") }) };
    for (IndexSearcher searcher : searchers) {
        for (ScoreDoc after : afters) {
            for (Query query : queries) {
                for (Sort sort : sorts) {
                    searcher.search(query, Integer.MAX_VALUE);
                    searcher.searchAfter(after, query, Integer.MAX_VALUE);
                    if (sort != null) {
                        searcher.search(query, Integer.MAX_VALUE, sort);
                        searcher.search(query, Integer.MAX_VALUE, sort, true, true);
                        searcher.search(query, Integer.MAX_VALUE, sort, true, false);
                        searcher.search(query, Integer.MAX_VALUE, sort, false, true);
                        searcher.search(query, Integer.MAX_VALUE, sort, false, false);
                        searcher.searchAfter(after, query, Integer.MAX_VALUE, sort);
                        searcher.searchAfter(after, query, Integer.MAX_VALUE, sort, true, true);
                        searcher.searchAfter(after, query, Integer.MAX_VALUE, sort, true, false);
                        searcher.searchAfter(after, query, Integer.MAX_VALUE, sort, false, true);
                        searcher.searchAfter(after, query, Integer.MAX_VALUE, sort, false, false);
                    }
                }
            }
        }
    }
    TestUtil.shutdownExecutorService(service);
}
Also used : NamedThreadFactory(org.apache.lucene.util.NamedThreadFactory) Term(org.apache.lucene.index.Term) ExecutorService(java.util.concurrent.ExecutorService) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) BytesRef(org.apache.lucene.util.BytesRef)

Example 5 with NamedThreadFactory

use of org.apache.lucene.util.NamedThreadFactory in project lucene-solr by apache.

the class TestCodecLoadingDeadlock method main.

// this method is called in a spawned process:
public static void main(final String... args) throws Exception {
    final String codecName = args[0];
    final String pfName = args[1];
    final String dvfName = args[2];
    // two times the modulo in switch statement below
    final int numThreads = 14;
    final ExecutorService pool = Executors.newFixedThreadPool(numThreads, new NamedThreadFactory("deadlockchecker"));
    final CyclicBarrier barrier = new CyclicBarrier(numThreads);
    IntStream.range(0, numThreads).forEach(taskNo -> pool.execute(() -> {
        try {
            barrier.await();
            switch(taskNo % 7) {
                case 0:
                    Codec.getDefault();
                    break;
                case 1:
                    Codec.forName(codecName);
                    break;
                case 2:
                    PostingsFormat.forName(pfName);
                    break;
                case 3:
                    DocValuesFormat.forName(dvfName);
                    break;
                case 4:
                    Codec.availableCodecs();
                    break;
                case 5:
                    PostingsFormat.availablePostingsFormats();
                    break;
                case 6:
                    DocValuesFormat.availableDocValuesFormats();
                    break;
                default:
                    throw new AssertionError();
            }
        } catch (Throwable t) {
            synchronized (args) {
                System.err.println(Thread.currentThread().getName() + " failed to lookup codec service:");
                t.printStackTrace(System.err);
            }
            Runtime.getRuntime().halt(1);
        }
    }));
    pool.shutdown();
    while (!pool.awaitTermination(1, TimeUnit.MINUTES)) ;
}
Also used : NamedThreadFactory(org.apache.lucene.util.NamedThreadFactory) ExecutorService(java.util.concurrent.ExecutorService) CyclicBarrier(java.util.concurrent.CyclicBarrier)

Aggregations

ExecutorService (java.util.concurrent.ExecutorService)7 NamedThreadFactory (org.apache.lucene.util.NamedThreadFactory)7 Document (org.apache.lucene.document.Document)4 IndexReader (org.apache.lucene.index.IndexReader)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)2 Term (org.apache.lucene.index.Term)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 ScoreDoc (org.apache.lucene.search.ScoreDoc)2 TopDocs (org.apache.lucene.search.TopDocs)2 Directory (org.apache.lucene.store.Directory)2 BytesRef (org.apache.lucene.util.BytesRef)2 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 CyclicBarrier (java.util.concurrent.CyclicBarrier)1 ExecutionException (java.util.concurrent.ExecutionException)1 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)1