Search in sources :

Example 6 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class SearchHelper method prepareExec.

/**
 * Create the searcher to use w.r.t. currently set parameters and the given
 * projects. Does not produce any {@link #redirect} link. It also does
 * nothing if {@link #redirect} or {@link #errorMsg} have a
 * none-{@code null} value.
 * <p>
 * Parameters which should be populated/set at this time:
 * <ul>
 * <li>{@link #builder}</li> <li>{@link #dataRoot}</li>
 * <li>{@link #order} (falls back to relevance if unset)</li>
 * </ul>
 * Populates/sets:
 * <ul>
 * <li>{@link #query}</li> <li>{@link #searcher}</li> <li>{@link #sort}</li>
 * <li>{@link #projects}</li> <li>{@link #errorMsg} if an error occurs</li>
 * </ul>
 *
 * @param projects project names. If empty, a no-project setup
 * is assumed (i.e. DATA_ROOT/index will be used instead of possible
 * multiple DATA_ROOT/$project/index). If the set contains projects
 * not known in the configuration or projects not yet indexed,
 * an error will be returned in {@link #errorMsg}.
 * @return this instance
 */
public SearchHelper prepareExec(SortedSet<String> projects) {
    if (redirect != null || errorMsg != null) {
        return this;
    }
    settingsHelper = null;
    // the Query created by the QueryBuilder
    try {
        indexDir = new File(dataRoot, IndexDatabase.INDEX_DIR);
        query = builder.build();
        if (projects == null) {
            errorMsg = "No project selected!";
            return this;
        }
        this.projects = projects;
        if (projects.isEmpty()) {
            // no project setup
            FSDirectory dir = FSDirectory.open(indexDir.toPath());
            reader = DirectoryReader.open(dir);
            searcher = new IndexSearcher(reader);
            closeOnDestroy = true;
        } else {
            // Check list of project names first to make sure all of them
            // are valid and indexed.
            closeOnDestroy = false;
            Set<String> invalidProjects = projects.stream().filter(proj -> (Project.getByName(proj) == null)).collect(Collectors.toSet());
            if (!invalidProjects.isEmpty()) {
                errorMsg = "Project list contains invalid projects: " + String.join(", ", invalidProjects);
                return this;
            }
            Set<Project> notIndexedProjects = projects.stream().map(Project::getByName).filter(proj -> !proj.isIndexed()).collect(Collectors.toSet());
            if (!notIndexedProjects.isEmpty()) {
                errorMsg = "Some of the projects to be searched are not indexed yet: " + String.join(", ", notIndexedProjects.stream().map(Project::getName).collect(Collectors.toSet()));
                return this;
            }
            // We use MultiReader even for single project. This should
            // not matter given that MultiReader is just a cheap wrapper
            // around set of IndexReader objects.
            reader = RuntimeEnvironment.getInstance().getMultiReader(projects, searcherList);
            if (reader != null) {
                searcher = new IndexSearcher(reader);
            } else {
                errorMsg = "Failed to initialize search. Check the index";
                if (!projects.isEmpty()) {
                    errorMsg += " for projects: " + String.join(", ", projects);
                }
                return this;
            }
        }
        // Most probably they are not reused. SearcherLifetimeManager might help here.
        switch(order) {
            case LASTMODIFIED:
                sort = new Sort(new SortField(QueryBuilder.DATE, SortField.Type.STRING, true));
                break;
            case BY_PATH:
                sort = new Sort(new SortField(QueryBuilder.FULLPATH, SortField.Type.STRING));
                break;
            default:
                sort = Sort.RELEVANCE;
                break;
        }
        checker = new DirectSpellChecker();
    } catch (ParseException e) {
        errorMsg = PARSE_ERROR_MSG + e.getMessage();
    } catch (FileNotFoundException e) {
        errorMsg = "Index database not found. Check the index";
        if (!projects.isEmpty()) {
            errorMsg += " for projects: " + String.join(", ", projects);
        }
        errorMsg += "; " + e.getMessage();
    } catch (IOException e) {
        errorMsg = e.getMessage();
    }
    return this;
}
Also used : SuperIndexSearcher(org.opengrok.indexer.configuration.SuperIndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) HistoryContext(org.opengrok.indexer.search.context.HistoryContext) SortedSet(java.util.SortedSet) ScoreDoc(org.apache.lucene.search.ScoreDoc) Context(org.opengrok.indexer.search.context.Context) SettingsHelper(org.opengrok.indexer.search.SettingsHelper) MatchesUtils(org.apache.lucene.search.MatchesUtils) IndexableField(org.apache.lucene.index.IndexableField) Summarizer(org.opengrok.indexer.search.Summarizer) Term(org.apache.lucene.index.Term) Project(org.opengrok.indexer.configuration.Project) ForbiddenSymlinkException(org.opengrok.indexer.util.ForbiddenSymlinkException) Document(org.apache.lucene.document.Document) Map(java.util.Map) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SortField(org.apache.lucene.search.SortField) Path(java.nio.file.Path) SuggestWord(org.apache.lucene.search.spell.SuggestWord) MatchesIterator(org.apache.lucene.search.MatchesIterator) Definitions(org.opengrok.indexer.analysis.Definitions) Sort(org.apache.lucene.search.Sort) DirectoryReader(org.apache.lucene.index.DirectoryReader) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker) Set(java.util.Set) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) IndexDatabase(org.opengrok.indexer.index.IndexDatabase) FileNotFoundException(java.io.FileNotFoundException) List(java.util.List) SuperIndexSearcher(org.opengrok.indexer.configuration.SuperIndexSearcher) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) IndexedSymlink(org.opengrok.indexer.index.IndexedSymlink) LoggerFactory(org.opengrok.indexer.logger.LoggerFactory) Pattern(java.util.regex.Pattern) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) ReaderUtil(org.apache.lucene.index.ReaderUtil) ParseException(org.apache.lucene.queryparser.classic.ParseException) SuggestMode(org.apache.lucene.search.spell.SuggestMode) Weight(org.apache.lucene.search.Weight) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) CompatibleAnalyser(org.opengrok.indexer.analysis.CompatibleAnalyser) FSDirectory(org.apache.lucene.store.FSDirectory) TopDocs(org.apache.lucene.search.TopDocs) AnalyzerGuru(org.opengrok.indexer.analysis.AnalyzerGuru) IOUtils(org.opengrok.indexer.util.IOUtils) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) IOException(java.io.IOException) File(java.io.File) ScoreMode(org.apache.lucene.search.ScoreMode) TermQuery(org.apache.lucene.search.TermQuery) Paths(java.nio.file.Paths) Matches(org.apache.lucene.search.Matches) FileNotFoundException(java.io.FileNotFoundException) FSDirectory(org.apache.lucene.store.FSDirectory) SortField(org.apache.lucene.search.SortField) IOException(java.io.IOException) Project(org.opengrok.indexer.configuration.Project) Sort(org.apache.lucene.search.Sort) ParseException(org.apache.lucene.queryparser.classic.ParseException) File(java.io.File) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker)

Example 7 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class SearchHelper method searchSingle.

/**
 * Searches for a document for a single file from the index.
 * @param file the file whose definitions to find
 * @return {@link ScoreDoc#doc} or -1 if it could not be found
 * @throws IOException if an error happens when accessing the index
 * @throws ParseException if an error happens when building the Lucene query
 */
public int searchSingle(File file) throws IOException, ParseException {
    RuntimeEnvironment env = RuntimeEnvironment.getInstance();
    String path;
    try {
        path = env.getPathRelativeToSourceRoot(file);
    } catch (ForbiddenSymlinkException e) {
        LOGGER.log(Level.FINER, e.getMessage());
        return -1;
    }
    // sanitize windows path delimiters
    // in order not to conflict with Lucene escape character
    path = path.replace("\\", "/");
    QueryBuilder singleBuilder = new QueryBuilder();
    if (builder != null) {
        singleBuilder.reset(builder);
    }
    query = singleBuilder.setPath(path).build();
    TopDocs top = searcher.search(query, 1);
    if (top.totalHits.value == 0) {
        return -1;
    }
    int docID = top.scoreDocs[0].doc;
    Document doc = searcher.doc(docID);
    String foundPath = doc.get(QueryBuilder.PATH);
    // Only use the result if PATH matches exactly.
    if (!path.equals(foundPath)) {
        return -1;
    }
    return docID;
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) ForbiddenSymlinkException(org.opengrok.indexer.util.ForbiddenSymlinkException) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) Document(org.apache.lucene.document.Document)

Example 8 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class IndexDatabase method getDocument.

/**
 * @param file File object of a file under source root
 * @return Document object for the file or {@code null}
 * @throws IOException on I/O error
 * @throws ParseException on problem with building Query
 */
public static Document getDocument(File file) throws IOException, ParseException {
    RuntimeEnvironment env = RuntimeEnvironment.getInstance();
    String path;
    try {
        path = env.getPathRelativeToSourceRoot(file);
    } catch (ForbiddenSymlinkException e) {
        LOGGER.log(Level.FINER, e.getMessage());
        return null;
    }
    // Sanitize Windows path delimiters in order not to conflict with Lucene escape character.
    path = path.replace("\\", "/");
    try (IndexReader ireader = getIndexReader(path)) {
        if (ireader == null) {
            // No index, no document..
            return null;
        }
        Document doc;
        Query q = new QueryBuilder().setPath(path).build();
        IndexSearcher searcher = new IndexSearcher(ireader);
        Statistics stat = new Statistics();
        TopDocs top = searcher.search(q, 1);
        stat.report(LOGGER, Level.FINEST, "search via getDocument done", "search.latency", new String[] { "category", "getdocument", "outcome", top.totalHits.value == 0 ? "empty" : "success" });
        if (top.totalHits.value == 0) {
            // No hits, no document...
            return null;
        }
        doc = searcher.doc(top.scoreDocs[0].doc);
        String foundPath = doc.get(QueryBuilder.PATH);
        // Only use the document if we found an exact match.
        if (!path.equals(foundPath)) {
            return null;
        }
        return doc;
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) ForbiddenSymlinkException(org.opengrok.indexer.util.ForbiddenSymlinkException) Query(org.apache.lucene.search.Query) IndexReader(org.apache.lucene.index.IndexReader) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) Document(org.apache.lucene.document.Document) Statistics(org.opengrok.indexer.util.Statistics)

Example 9 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class ContextTest method testAllLinkWithLongLines.

/**
 * Test that we get the [all...] link if a very long line crosses the buffer
 * boundary. Bug 383.
 * @throws org.apache.lucene.queryparser.classic.ParseException parse exception
 */
@Test
public void testAllLinkWithLongLines() throws ParseException {
    // Create input which consists of one single line longer than
    // Context.MAXFILEREAD.
    StringBuilder sb = new StringBuilder();
    sb.append("search_for_me");
    while (sb.length() <= Context.MAXFILEREAD) {
        sb.append(" more words");
    }
    Reader in = new StringReader(sb.toString());
    StringWriter out = new StringWriter();
    QueryBuilder qb = new QueryBuilder().setFreetext("search_for_me");
    Context c = new Context(qb.build(), qb);
    boolean match = c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null);
    assertTrue(match, "No match found");
    String s = out.toString();
    assertTrue(s.contains(">[all...]</a>"), "No [all...] link");
}
Also used : StringWriter(java.io.StringWriter) StringReader(java.io.StringReader) CharArrayReader(java.io.CharArrayReader) Reader(java.io.Reader) StringReader(java.io.StringReader) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) Test(org.junit.jupiter.api.Test)

Example 10 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class ContextTest method searchContextTestHelper.

/**
 * Helper method for testing presence of expected words in search context.
 * @param searchInText Context of document we are searching in.
 * @param queryString Definition of search query.
 * @param expectWordInContext Word expected to be found by 'queryString' in
 * 'searchInText' and to be included in context. Set null if context is
 * expected to be empty.
 * @throws ParseException parse exception
 */
private void searchContextTestHelper(String searchInText, String queryString, String expectWordInContext) throws ParseException {
    Reader in = new StringReader(searchInText);
    StringWriter out = new StringWriter();
    QueryBuilder qb = new QueryBuilder().setFreetext(queryString);
    Context c = new Context(qb.build(), qb);
    boolean match = c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null);
    if (expectWordInContext == null) {
        assertFalse(match, "Match found");
    } else {
        assertTrue(match, "No match found");
        String s = out.toString();
        assertTrue(s.contains("<b>" + expectWordInContext + "</b>"), "Searched word '" + expectWordInContext + "' not in context");
    }
}
Also used : StringWriter(java.io.StringWriter) StringReader(java.io.StringReader) CharArrayReader(java.io.CharArrayReader) Reader(java.io.Reader) StringReader(java.io.StringReader) QueryBuilder(org.opengrok.indexer.search.QueryBuilder)

Aggregations

QueryBuilder (org.opengrok.indexer.search.QueryBuilder)12 StringReader (java.io.StringReader)7 StringWriter (java.io.StringWriter)7 Test (org.junit.jupiter.api.Test)7 CharArrayReader (java.io.CharArrayReader)4 Reader (java.io.Reader)4 Document (org.apache.lucene.document.Document)3 TopDocs (org.apache.lucene.search.TopDocs)3 RuntimeEnvironment (org.opengrok.indexer.configuration.RuntimeEnvironment)3 ForbiddenSymlinkException (org.opengrok.indexer.util.ForbiddenSymlinkException)3 ArrayList (java.util.ArrayList)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 Query (org.apache.lucene.search.Query)2 Definitions (org.opengrok.indexer.analysis.Definitions)2 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 Paths (java.nio.file.Paths)1