Search in sources :

Example 11 with Statistics

use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.

the class IndexDatabase method getDocument.

/**
 * @param file File object of a file under source root
 * @return Document object for the file or {@code null}
 * @throws IOException on I/O error
 * @throws ParseException on problem with building Query
 */
public static Document getDocument(File file) throws IOException, ParseException {
    RuntimeEnvironment env = RuntimeEnvironment.getInstance();
    String path;
    try {
        path = env.getPathRelativeToSourceRoot(file);
    } catch (ForbiddenSymlinkException e) {
        LOGGER.log(Level.FINER, e.getMessage());
        return null;
    }
    // Sanitize Windows path delimiters in order not to conflict with Lucene escape character.
    path = path.replace("\\", "/");
    try (IndexReader ireader = getIndexReader(path)) {
        if (ireader == null) {
            // No index, no document..
            return null;
        }
        Document doc;
        Query q = new QueryBuilder().setPath(path).build();
        IndexSearcher searcher = new IndexSearcher(ireader);
        Statistics stat = new Statistics();
        TopDocs top = searcher.search(q, 1);
        stat.report(LOGGER, Level.FINEST, "search via getDocument done", "search.latency", new String[] { "category", "getdocument", "outcome", top.totalHits.value == 0 ? "empty" : "success" });
        if (top.totalHits.value == 0) {
            // No hits, no document...
            return null;
        }
        doc = searcher.doc(top.scoreDocs[0].doc);
        String foundPath = doc.get(QueryBuilder.PATH);
        // Only use the document if we found an exact match.
        if (!path.equals(foundPath)) {
            return null;
        }
        return doc;
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) ForbiddenSymlinkException(org.opengrok.indexer.util.ForbiddenSymlinkException) Query(org.apache.lucene.search.Query) IndexReader(org.apache.lucene.index.IndexReader) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) Document(org.apache.lucene.document.Document) Statistics(org.opengrok.indexer.util.Statistics)

Example 12 with Statistics

use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.

the class HistoryGuru method invalidateRepositories.

/**
 * Go through the list of specified repositories and determine if they
 * are valid. Those that make it through will form the new HistoryGuru
 * internal map. This means this method should be used only if dealing
 * with whole collection of repositories.
 * <br>
 * The caller is expected to reflect the new list via {@code getRepositories()}.
 * <br>
 * The processing is done via thread pool since the operation
 * is expensive (see {@code RepositoryFactory.getRepository()}).
 *
 * @param repos collection of repositories to invalidate.
 * If null or empty, the internal map of repositories will be cleared.
 * @param cmdType command timeout type
 */
public void invalidateRepositories(Collection<? extends RepositoryInfo> repos, CommandTimeoutType cmdType) {
    if (repos == null || repos.isEmpty()) {
        clear();
        return;
    }
    Map<String, Repository> newrepos = Collections.synchronizedMap(new HashMap<>(repos.size()));
    Statistics elapsed = new Statistics();
    LOGGER.log(Level.FINE, "invalidating {0} repositories", repos.size());
    /*
         * getRepository() below does various checks of the repository
         * which involves executing commands and I/O so make the checks
         * run in parallel to speed up the process.
         */
    final CountDownLatch latch = new CountDownLatch(repos.size());
    int parallelismLevel;
    // Both indexer and web app startup should be as quick as possible.
    if (cmdType == CommandTimeoutType.INDEXER || cmdType == CommandTimeoutType.WEBAPP_START) {
        parallelismLevel = env.getIndexingParallelism();
    } else {
        parallelismLevel = env.getRepositoryInvalidationParallelism();
    }
    final ExecutorService executor = Executors.newFixedThreadPool(parallelismLevel, runnable -> {
        Thread thread = Executors.defaultThreadFactory().newThread(runnable);
        thread.setName("invalidate-repos-" + thread.getId());
        return thread;
    });
    for (RepositoryInfo rinfo : repos) {
        executor.submit(() -> {
            try {
                Repository r = RepositoryFactory.getRepository(rinfo, cmdType);
                if (r == null) {
                    LOGGER.log(Level.WARNING, "Failed to instantiate internal repository data for {0} in {1}", new Object[] { rinfo.getType(), rinfo.getDirectoryName() });
                } else {
                    newrepos.put(r.getDirectoryName(), r);
                }
            } catch (Exception ex) {
                // We want to catch any exception since we are in thread.
                LOGGER.log(Level.WARNING, "Could not create " + rinfo.getType() + " for '" + rinfo.getDirectoryName(), ex);
            } finally {
                latch.countDown();
            }
        });
    }
    // Wait until all repositories are validated.
    try {
        latch.await();
    } catch (InterruptedException ex) {
        LOGGER.log(Level.SEVERE, "latch exception", ex);
    }
    executor.shutdown();
    clear();
    newrepos.forEach((_key, repo) -> putRepository(repo));
    elapsed.report(LOGGER, String.format("Done invalidating %d repositories", newrepos.size()), "history.repositories.invalidate");
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) CountDownLatch(java.util.concurrent.CountDownLatch) Statistics(org.opengrok.indexer.util.Statistics) ForbiddenSymlinkException(org.opengrok.indexer.util.ForbiddenSymlinkException) IOException(java.io.IOException) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 13 with Statistics

use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.

the class SearchEngine method searchIndex.

private void searchIndex(IndexSearcher searcher, boolean paging) throws IOException {
    collector = TopScoreDocCollector.create(hitsPerPage * cachePages, Short.MAX_VALUE);
    Statistics stat = new Statistics();
    searcher.search(query, collector);
    totalHits = collector.getTotalHits();
    stat.report(LOGGER, Level.FINEST, "search via SearchEngine done", "search.latency", new String[] { "category", "engine", "outcome", totalHits > 0 ? "success" : "empty" });
    if (!paging && totalHits > 0) {
        collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
        searcher.search(query, collector);
    }
    hits = collector.topDocs().scoreDocs;
    for (ScoreDoc hit : hits) {
        int docId = hit.doc;
        Document d = searcher.doc(docId);
        docs.add(d);
    }
}
Also used : Document(org.apache.lucene.document.Document) Statistics(org.opengrok.indexer.util.Statistics) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 14 with Statistics

use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.

the class Indexer method prepareIndexer.

/**
 * Generate history cache and/or scan the repositories.
 *
 * This is the first phase of the indexing where history cache is being
 * generated for repositories (at least for those which support getting
 * history per directory).
 *
 * @param env runtime environment
 * @param searchPaths list of paths in which to search for repositories
 * @param addProjects if true, add projects
 * @param createDict if true, create dictionary
 * @param createHistoryCache create history cache flag
 * @param subFiles list of directories
 * @param repositories list of repositories
 * @throws IndexerException indexer exception
 * @throws IOException I/O exception
 */
public void prepareIndexer(RuntimeEnvironment env, Set<String> searchPaths, boolean addProjects, boolean createDict, boolean createHistoryCache, List<String> subFiles, List<String> repositories) throws IndexerException, IOException {
    if (!env.validateUniversalCtags()) {
        throw new IndexerException("Didn't find Universal Ctags");
    }
    // some project properties might be needed for that.
    if (addProjects) {
        File[] files = env.getSourceRootFile().listFiles();
        Map<String, Project> projects = env.getProjects();
        addProjects(files, projects);
    }
    if (!searchPaths.isEmpty()) {
        LOGGER.log(Level.INFO, "Scanning for repositories in {0}...", searchPaths);
        Statistics stats = new Statistics();
        env.setRepositories(searchPaths.toArray(new String[0]));
        stats.report(LOGGER, String.format("Done scanning for repositories, found %d repositories", env.getRepositories().size()), "indexer.repository.scan");
    }
    if (createHistoryCache) {
        // Even if history is disabled globally, it can be enabled for some repositories.
        if (repositories != null && !repositories.isEmpty()) {
            LOGGER.log(Level.INFO, "Generating history cache for repositories: {0}", String.join(",", repositories));
            HistoryGuru.getInstance().createCache(repositories);
        } else {
            LOGGER.log(Level.INFO, "Generating history cache for all repositories ...");
            HistoryGuru.getInstance().createCache();
        }
        LOGGER.info("Done generating history cache");
    }
    if (createDict) {
        IndexDatabase.listFrequentTokens(subFiles);
    }
}
Also used : Project(org.opengrok.indexer.configuration.Project) File(java.io.File) Statistics(org.opengrok.indexer.util.Statistics)

Example 15 with Statistics

use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.

the class DirectoryExtraReader method search.

/**
 * Search for supplemental file information in the specified {@code path}.
 * @param searcher a defined instance
 * @param path a defined path to qualify the search
 * @return a list of results, limited to 2000 values
 * @throws IOException if an error occurs searching the index
 */
public List<NullableNumLinesLOC> search(IndexSearcher searcher, String path) throws IOException {
    if (searcher == null) {
        throw new IllegalArgumentException("`searcher' is null");
    }
    if (path == null) {
        throw new IllegalArgumentException("`path' is null");
    }
    QueryBuilder qbuild = new QueryBuilder();
    qbuild.setDirPath(path);
    Query query;
    try {
        query = qbuild.build();
    } catch (ParseException e) {
        final String PARSE_ERROR = "An error occurred while parsing dirpath query";
        LOGGER.log(Level.WARNING, PARSE_ERROR, e);
        throw new IOException(PARSE_ERROR);
    }
    Statistics stat = new Statistics();
    TopDocs hits = searcher.search(query, DIR_LIMIT_NUM);
    stat.report(LOGGER, Level.FINEST, "search via DirectoryExtraReader done", "search.latency", new String[] { "category", "extra", "outcome", hits.scoreDocs.length > 0 ? "success" : "empty" });
    List<NullableNumLinesLOC> results = processHits(searcher, hits);
    return results;
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) NullableNumLinesLOC(org.opengrok.indexer.analysis.NullableNumLinesLOC) Query(org.apache.lucene.search.Query) ParseException(org.apache.lucene.queryparser.classic.ParseException) IOException(java.io.IOException) Statistics(org.opengrok.indexer.util.Statistics)

Aggregations

Statistics (org.opengrok.indexer.util.Statistics)16 IOException (java.io.IOException)8 ForbiddenSymlinkException (org.opengrok.indexer.util.ForbiddenSymlinkException)5 File (java.io.File)4 InvocationTargetException (java.lang.reflect.InvocationTargetException)4 ArrayList (java.util.ArrayList)4 CountDownLatch (java.util.concurrent.CountDownLatch)3 Project (org.opengrok.indexer.configuration.Project)3 RuntimeEnvironment (org.opengrok.indexer.configuration.RuntimeEnvironment)3 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ExecutorService (java.util.concurrent.ExecutorService)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 Document (org.apache.lucene.document.Document)2 IndexWriter (org.apache.lucene.index.IndexWriter)2 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)2 Query (org.apache.lucene.search.Query)2 TopDocs (org.apache.lucene.search.TopDocs)2 AbstractAnalyzer (org.opengrok.indexer.analysis.AbstractAnalyzer)2