Search in sources :

Example 1 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class SearchHelper method prepareExec.

/**
 * Create the searcher to use w.r.t. currently set parameters and the given
 * projects. Does not produce any {@link #redirect} link. It also does
 * nothing if {@link #redirect} or {@link #errorMsg} have a
 * none-{@code null} value.
 * <p>
 * Parameters which should be populated/set at this time:
 * <ul>
 * <li>{@link #builder}</li> <li>{@link #dataRoot}</li>
 * <li>{@link #order} (falls back to relevance if unset)</li>
 * </ul>
 * Populates/sets:
 * <ul>
 * <li>{@link #query}</li> <li>{@link #searcher}</li> <li>{@link #sort}</li>
 * <li>{@link #projects}</li> <li>{@link #errorMsg} if an error occurs</li>
 * </ul>
 *
 * @param projects project names. If empty, a no-project setup
 * is assumed (i.e. DATA_ROOT/index will be used instead of possible
 * multiple DATA_ROOT/$project/index). If the set contains projects
 * not known in the configuration or projects not yet indexed,
 * an error will be returned in {@link #errorMsg}.
 * @return this instance
 */
public SearchHelper prepareExec(SortedSet<String> projects) {
    if (redirect != null || errorMsg != null) {
        return this;
    }
    settingsHelper = null;
    // the Query created by the QueryBuilder
    try {
        indexDir = new File(dataRoot, IndexDatabase.INDEX_DIR);
        query = builder.build();
        if (projects == null) {
            errorMsg = "No project selected!";
            return this;
        }
        this.projects = projects;
        if (projects.isEmpty()) {
            // no project setup
            FSDirectory dir = FSDirectory.open(indexDir.toPath());
            reader = DirectoryReader.open(dir);
            searcher = new IndexSearcher(reader);
            closeOnDestroy = true;
        } else {
            // Check list of project names first to make sure all of them
            // are valid and indexed.
            closeOnDestroy = false;
            Set<String> invalidProjects = projects.stream().filter(proj -> (Project.getByName(proj) == null)).collect(Collectors.toSet());
            if (!invalidProjects.isEmpty()) {
                errorMsg = "Project list contains invalid projects: " + String.join(", ", invalidProjects);
                return this;
            }
            Set<Project> notIndexedProjects = projects.stream().map(Project::getByName).filter(proj -> !proj.isIndexed()).collect(Collectors.toSet());
            if (!notIndexedProjects.isEmpty()) {
                errorMsg = "Some of the projects to be searched are not indexed yet: " + String.join(", ", notIndexedProjects.stream().map(Project::getName).collect(Collectors.toSet()));
                return this;
            }
            // We use MultiReader even for single project. This should
            // not matter given that MultiReader is just a cheap wrapper
            // around set of IndexReader objects.
            reader = RuntimeEnvironment.getInstance().getMultiReader(projects, searcherList);
            if (reader != null) {
                searcher = new IndexSearcher(reader);
            } else {
                errorMsg = "Failed to initialize search. Check the index";
                if (!projects.isEmpty()) {
                    errorMsg += " for projects: " + String.join(", ", projects);
                }
                return this;
            }
        }
        // Most probably they are not reused. SearcherLifetimeManager might help here.
        switch(order) {
            case LASTMODIFIED:
                sort = new Sort(new SortField(QueryBuilder.DATE, SortField.Type.STRING, true));
                break;
            case BY_PATH:
                sort = new Sort(new SortField(QueryBuilder.FULLPATH, SortField.Type.STRING));
                break;
            default:
                sort = Sort.RELEVANCE;
                break;
        }
        checker = new DirectSpellChecker();
    } catch (ParseException e) {
        errorMsg = PARSE_ERROR_MSG + e.getMessage();
    } catch (FileNotFoundException e) {
        errorMsg = "Index database not found. Check the index";
        if (!projects.isEmpty()) {
            errorMsg += " for projects: " + String.join(", ", projects);
        }
        errorMsg += "; " + e.getMessage();
    } catch (IOException e) {
        errorMsg = e.getMessage();
    }
    return this;
}
Also used : SuperIndexSearcher(org.opengrok.indexer.configuration.SuperIndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) HistoryContext(org.opengrok.indexer.search.context.HistoryContext) SortedSet(java.util.SortedSet) ScoreDoc(org.apache.lucene.search.ScoreDoc) Context(org.opengrok.indexer.search.context.Context) SettingsHelper(org.opengrok.indexer.search.SettingsHelper) MatchesUtils(org.apache.lucene.search.MatchesUtils) IndexableField(org.apache.lucene.index.IndexableField) Summarizer(org.opengrok.indexer.search.Summarizer) Term(org.apache.lucene.index.Term) Project(org.opengrok.indexer.configuration.Project) ForbiddenSymlinkException(org.opengrok.indexer.util.ForbiddenSymlinkException) Document(org.apache.lucene.document.Document) Map(java.util.Map) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SortField(org.apache.lucene.search.SortField) Path(java.nio.file.Path) SuggestWord(org.apache.lucene.search.spell.SuggestWord) MatchesIterator(org.apache.lucene.search.MatchesIterator) Definitions(org.opengrok.indexer.analysis.Definitions) Sort(org.apache.lucene.search.Sort) DirectoryReader(org.apache.lucene.index.DirectoryReader) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker) Set(java.util.Set) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) IndexDatabase(org.opengrok.indexer.index.IndexDatabase) FileNotFoundException(java.io.FileNotFoundException) List(java.util.List) SuperIndexSearcher(org.opengrok.indexer.configuration.SuperIndexSearcher) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) IndexedSymlink(org.opengrok.indexer.index.IndexedSymlink) LoggerFactory(org.opengrok.indexer.logger.LoggerFactory) Pattern(java.util.regex.Pattern) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) ReaderUtil(org.apache.lucene.index.ReaderUtil) ParseException(org.apache.lucene.queryparser.classic.ParseException) SuggestMode(org.apache.lucene.search.spell.SuggestMode) Weight(org.apache.lucene.search.Weight) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) CompatibleAnalyser(org.opengrok.indexer.analysis.CompatibleAnalyser) FSDirectory(org.apache.lucene.store.FSDirectory) TopDocs(org.apache.lucene.search.TopDocs) AnalyzerGuru(org.opengrok.indexer.analysis.AnalyzerGuru) IOUtils(org.opengrok.indexer.util.IOUtils) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) IOException(java.io.IOException) File(java.io.File) ScoreMode(org.apache.lucene.search.ScoreMode) TermQuery(org.apache.lucene.search.TermQuery) Paths(java.nio.file.Paths) Matches(org.apache.lucene.search.Matches) FileNotFoundException(java.io.FileNotFoundException) FSDirectory(org.apache.lucene.store.FSDirectory) SortField(org.apache.lucene.search.SortField) IOException(java.io.IOException) Project(org.opengrok.indexer.configuration.Project) Sort(org.apache.lucene.search.Sort) ParseException(org.apache.lucene.queryparser.classic.ParseException) File(java.io.File) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker)

Example 2 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class SearchHelper method searchSingle.

/**
 * Searches for a document for a single file from the index.
 * @param file the file whose definitions to find
 * @return {@link ScoreDoc#doc} or -1 if it could not be found
 * @throws IOException if an error happens when accessing the index
 * @throws ParseException if an error happens when building the Lucene query
 */
public int searchSingle(File file) throws IOException, ParseException {
    RuntimeEnvironment env = RuntimeEnvironment.getInstance();
    String path;
    try {
        path = env.getPathRelativeToSourceRoot(file);
    } catch (ForbiddenSymlinkException e) {
        LOGGER.log(Level.FINER, e.getMessage());
        return -1;
    }
    // sanitize windows path delimiters
    // in order not to conflict with Lucene escape character
    path = path.replace("\\", "/");
    QueryBuilder singleBuilder = new QueryBuilder();
    if (builder != null) {
        singleBuilder.reset(builder);
    }
    query = singleBuilder.setPath(path).build();
    TopDocs top = searcher.search(query, 1);
    if (top.totalHits.value == 0) {
        return -1;
    }
    int docID = top.scoreDocs[0].doc;
    Document doc = searcher.doc(docID);
    String foundPath = doc.get(QueryBuilder.PATH);
    // Only use the result if PATH matches exactly.
    if (!path.equals(foundPath)) {
        return -1;
    }
    return docID;
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) ForbiddenSymlinkException(org.opengrok.indexer.util.ForbiddenSymlinkException) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) Document(org.apache.lucene.document.Document)

Example 3 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class ContextTest method testLongLineNearBufferBoundary.

/**
 * Test that we don't get an {@code ArrayIndexOutOfBoundsException} when a
 * long (&gt;100 characters) line which contains a match is not terminated
 * with a newline character before the buffer boundary. Bug #383.
 * @throws org.apache.lucene.queryparser.classic.ParseException parse exception
 */
@Test
public void testLongLineNearBufferBoundary() throws ParseException {
    char[] chars = new char[Context.MAXFILEREAD];
    Arrays.fill(chars, 'a');
    char[] substring = " this is a test ".toCharArray();
    System.arraycopy(substring, 0, chars, Context.MAXFILEREAD - substring.length, substring.length);
    Reader in = new CharArrayReader(chars);
    QueryBuilder qb = new QueryBuilder().setFreetext("test");
    Context c = new Context(qb.build(), qb);
    StringWriter out = new StringWriter();
    boolean match = c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null);
    assertTrue(match, "No match found");
    String s = out.toString();
    assertTrue(s.contains(" this is a <b>test</b>"), "Match not written to Writer");
    assertTrue(s.contains("href=\"#1\""), "No match on line #1");
}
Also used : CharArrayReader(java.io.CharArrayReader) StringWriter(java.io.StringWriter) CharArrayReader(java.io.CharArrayReader) Reader(java.io.Reader) StringReader(java.io.StringReader) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) Test(org.junit.jupiter.api.Test)

Example 4 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class ContextTest method testMultiLineMatch.

/**
 * Test that valid HTML is generated for a match that spans multiple lines.
 * It used to nest the tags incorrectly. Bug #15632.
 * @throws java.lang.Exception exception
 */
@Test
public void testMultiLineMatch() throws Exception {
    StringReader in = new StringReader("a\nb\nc\n");
    StringWriter out = new StringWriter();
    // XML boilerplate
    out.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
    out.append("<document>\n");
    // Search for a multi-token phrase that spans multiple lines in the
    // input file. The generated HTML fragment is inserted inside a root
    // element so that the StringWriter contains a valid XML document.
    QueryBuilder qb = new QueryBuilder().setFreetext("\"a b c\"");
    Context c = new Context(qb.build(), qb);
    assertTrue(c.getContext(in, out, "", "", "", null, true, qb.isDefSearch(), null), "No match found");
    // Close the XML document body
    out.append("\n</document>");
    // Check that valid XML was generated. This call used to fail with
    // SAXParseException: [Fatal Error] :3:55: The element type "b" must
    // be terminated by the matching end-tag "</b>".
    assertNotNull(parseXML(out.toString()));
}
Also used : StringWriter(java.io.StringWriter) StringReader(java.io.StringReader) QueryBuilder(org.opengrok.indexer.search.QueryBuilder) Test(org.junit.jupiter.api.Test)

Example 5 with QueryBuilder

use of org.opengrok.indexer.search.QueryBuilder in project OpenGrok by OpenGrok.

the class ContextTest method bug17582.

/**
 * The results from mixed-case symbol search should contain tags.
 * @throws java.lang.Exception exception
 */
@Test
public void bug17582() throws Exception {
    // Freetext search should match regardless of case
    bug17582(new QueryBuilder().setFreetext("Bug17582"), new int[] { 2, 3 }, new String[] { "type1", "type2" });
    // Defs search should only match if case matches
    bug17582(new QueryBuilder().setDefs("Bug17582"), new int[] { 3 }, new String[] { "type2" });
    // Refs search should only match if case matches
    bug17582(new QueryBuilder().setRefs("Bug17582"), new int[] { 3 }, new String[] { "type2" });
    // Path search shouldn't match anything in source
    bug17582(new QueryBuilder().setPath("Bug17582"), new int[0], new String[0]);
    // Refs should only match if case matches, but freetext will match
    // regardless of case
    bug17582(new QueryBuilder().setRefs("Bug17582").setFreetext("Bug17582"), new int[] { 2, 3 }, new String[] { "type1", "type2" });
    // Refs should only match if case matches, hist shouldn't match
    // anything in source
    bug17582(new QueryBuilder().setRefs("Bug17582").setHist("bug17582"), new int[] { 3 }, new String[] { "type2" });
}
Also used : QueryBuilder(org.opengrok.indexer.search.QueryBuilder) Test(org.junit.jupiter.api.Test)

Aggregations

QueryBuilder (org.opengrok.indexer.search.QueryBuilder)12 StringReader (java.io.StringReader)7 StringWriter (java.io.StringWriter)7 Test (org.junit.jupiter.api.Test)7 CharArrayReader (java.io.CharArrayReader)4 Reader (java.io.Reader)4 Document (org.apache.lucene.document.Document)3 TopDocs (org.apache.lucene.search.TopDocs)3 RuntimeEnvironment (org.opengrok.indexer.configuration.RuntimeEnvironment)3 ForbiddenSymlinkException (org.opengrok.indexer.util.ForbiddenSymlinkException)3 ArrayList (java.util.ArrayList)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 Query (org.apache.lucene.search.Query)2 Definitions (org.opengrok.indexer.analysis.Definitions)2 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 Paths (java.nio.file.Paths)1