Search in sources :

Example 1 with Scopes

use of org.opengrok.indexer.analysis.Scopes in project OpenGrok by OpenGrok.

the class CAnalyzerFactoryTest method testScopeAnalyzer.

/**
 * Test of writeXref method, of class CAnalyzerFactory.
 *
 * @throws java.lang.Exception exception
 */
@Test
void testScopeAnalyzer() throws Exception {
    String path = repository.getSourceRoot() + "/c/sample.c";
    File f = new File(path);
    assertTrue(f.canRead() && f.isFile(), "c testfile " + f + " not found");
    Document doc = new Document();
    doc.add(new Field(QueryBuilder.FULLPATH, path, string_ft_nstored_nanalyzed_norms));
    StringWriter xrefOut = new StringWriter();
    analyzer.setCtags(ctags);
    analyzer.setScopesEnabled(true);
    analyzer.analyze(doc, getStreamSource(path), xrefOut);
    IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
    assertNotNull(scopesField);
    Scopes scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
    Scope globalScope = scopes.getScope(-1);
    // foo, bar, main
    assertEquals(3, scopes.size());
    for (int i = 0; i < 50; ++i) {
        if (i >= 8 && i <= 22) {
            assertEquals("foo", scopes.getScope(i).getName());
            assertNull(scopes.getScope(i).getNamespace());
        } else if (i >= 24 && i <= 38) {
            assertEquals("bar", scopes.getScope(i).getName());
            assertNull(scopes.getScope(i).getNamespace());
        } else if (i >= 41 && i <= 48) {
            assertEquals("main", scopes.getScope(i).getName());
            assertNull(scopes.getScope(i).getNamespace());
        } else {
            assertEquals(scopes.getScope(i), globalScope);
            assertNull(scopes.getScope(i).getNamespace());
        }
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) IndexableField(org.apache.lucene.index.IndexableField) Field(org.apache.lucene.document.Field) StringWriter(java.io.StringWriter) Scope(org.opengrok.indexer.analysis.Scopes.Scope) Scopes(org.opengrok.indexer.analysis.Scopes) Document(org.apache.lucene.document.Document) File(java.io.File) Test(org.junit.jupiter.api.Test)

Example 2 with Scopes

use of org.opengrok.indexer.analysis.Scopes in project OpenGrok by OpenGrok.

the class PlainAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
    Definitions defs = null;
    NullWriter nullWriter = null;
    doc.add(new OGKTextField(QueryBuilder.FULL, getReader(src.getStream())));
    String fullPath = doc.get(QueryBuilder.FULLPATH);
    if (fullPath != null && ctags != null) {
        defs = ctags.doCtags(fullPath);
        if (defs != null && defs.numberOfSymbols() > 0) {
            tryAddingDefs(doc, defs, src);
            byte[] tags = defs.serialize();
            doc.add(new StoredField(QueryBuilder.TAGS, tags));
        }
    }
    /*
         * This is to explicitly use appropriate analyzer's token stream to
         * work around #1376: symbols search works like full text search.
         */
    JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
    OGKTextField ref = new OGKTextField(QueryBuilder.REFS, symbolTokenizer);
    symbolTokenizer.setReader(getReader(src.getStream()));
    doc.add(ref);
    if (scopesEnabled && xrefOut == null) {
        /*
             * Scopes are generated during xref generation. If xrefs are
             * turned off we still need to run writeXref() to produce scopes,
             * we use a dummy writer that will throw away any xref output.
             */
        nullWriter = new NullWriter();
        xrefOut = nullWriter;
    }
    if (xrefOut != null) {
        try (Reader in = getReader(src.getStream())) {
            RuntimeEnvironment env = RuntimeEnvironment.getInstance();
            WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
            args.setDefs(defs);
            args.setProject(project);
            CompletableFuture<XrefWork> future = CompletableFuture.supplyAsync(() -> {
                try {
                    return new XrefWork(writeXref(args));
                } catch (IOException e) {
                    return new XrefWork(e);
                }
            }, env.getIndexerParallelizer().getXrefWatcherExecutor()).orTimeout(env.getXrefTimeout(), TimeUnit.SECONDS);
            // Will throw ExecutionException wrapping TimeoutException on timeout.
            XrefWork xrefWork = future.get();
            Xrefer xref = xrefWork.xrefer;
            if (xref != null) {
                Scopes scopes = xref.getScopes();
                if (scopes.size() > 0) {
                    byte[] scopesSerialized = scopes.serialize();
                    doc.add(new StoredField(QueryBuilder.SCOPES, scopesSerialized));
                }
                String path = doc.get(QueryBuilder.PATH);
                addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
            } else {
                // Re-throw the exception from writeXref().
                throw new IOException(xrefWork.exception);
            }
        } catch (ExecutionException e) {
            throw new InterruptedException("failed to generate xref :" + e);
        } finally {
            if (nullWriter != null) {
                nullWriter.close();
            }
        }
    }
}
Also used : JFlexTokenizer(org.opengrok.indexer.analysis.JFlexTokenizer) OGKTextField(org.opengrok.indexer.analysis.OGKTextField) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) Definitions(org.opengrok.indexer.analysis.Definitions) Xrefer(org.opengrok.indexer.analysis.Xrefer) Reader(java.io.Reader) ExpandTabsReader(org.opengrok.indexer.analysis.ExpandTabsReader) IOException(java.io.IOException) WriteXrefArgs(org.opengrok.indexer.analysis.WriteXrefArgs) NullWriter(org.opengrok.indexer.util.NullWriter) StoredField(org.apache.lucene.document.StoredField) Scopes(org.opengrok.indexer.analysis.Scopes) ExecutionException(java.util.concurrent.ExecutionException)

Example 3 with Scopes

use of org.opengrok.indexer.analysis.Scopes in project OpenGrok by OpenGrok.

the class Results method printPlain.

private static void printPlain(PrintPlainFinalArgs fargs, Document doc, int docId, String rpath) throws ClassNotFoundException, IOException {
    fargs.shelp.getSourceContext().toggleAlt();
    boolean didPresentNew = fargs.shelp.getSourceContext().getContext2(fargs.env, fargs.shelp.getSearcher(), docId, fargs.out, fargs.xrefPrefix, fargs.morePrefix, true, fargs.tabSize);
    if (!didPresentNew) {
        /*
             * Fall back to the old view, which re-analyzes text using
             * PlainLinetokenizer. E.g., when source code is updated (thus
             * affecting timestamps) but re-indexing is not yet complete.
             */
        Definitions tags = null;
        IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
        if (tagsField != null) {
            tags = Definitions.deserialize(tagsField.binaryValue().bytes);
        }
        Scopes scopes;
        IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
        if (scopesField != null) {
            scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
        } else {
            scopes = new Scopes();
        }
        boolean isDefSearch = fargs.shelp.getBuilder().isDefSearch();
        // SRCROOT is read with UTF-8 as a default.
        File sourceFile = new File(fargs.shelp.getSourceRoot(), rpath);
        try (FileInputStream fis = new FileInputStream(sourceFile);
            Reader r = IOUtils.createBOMStrippedReader(fis, StandardCharsets.UTF_8.name())) {
            fargs.shelp.getSourceContext().getContext(r, fargs.out, fargs.xrefPrefix, fargs.morePrefix, rpath, tags, true, isDefSearch, null, scopes);
        } catch (IOException ex) {
            String errMsg = String.format("No context for %s", sourceFile);
            if (LOGGER.isLoggable(Level.FINE)) {
                // WARNING but with FINE detail
                LOGGER.log(Level.WARNING, errMsg, ex);
            } else {
                LOGGER.log(Level.WARNING, errMsg);
            }
        }
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) Scopes(org.opengrok.indexer.analysis.Scopes) Definitions(org.opengrok.indexer.analysis.Definitions) Reader(java.io.Reader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) File(java.io.File) FileInputStream(java.io.FileInputStream)

Example 4 with Scopes

use of org.opengrok.indexer.analysis.Scopes in project OpenGrok by OpenGrok.

the class Context method getContext2.

/**
 * Look for context for this instance's initialized query in a search result
 * {@link Document}, and output according to the parameters.
 * @param env required environment
 * @param searcher required search that produced the document
 * @param docId document ID for producing context
 * @param dest required target to write
 * @param urlPrefix prefix for links
 * @param morePrefix optional link to more... page
 * @param limit a value indicating if the number of matching lines should be
 * limited. N.b. unlike
 * {@link #getContext(java.io.Reader, java.io.Writer, java.lang.String, java.lang.String, java.lang.String,
 * org.opengrok.indexer.analysis.Definitions, boolean, boolean, java.util.List, org.opengrok.indexer.analysis.Scopes)},
 * the {@code limit} argument will not be interpreted w.r.t.
 * {@link RuntimeEnvironment#isQuickContextScan()}.
 * @param tabSize optional positive tab size that must accord with the value
 * used when indexing or else postings may be wrongly shifted until
 * re-indexing
 * @return Did it get any matching context?
 */
public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, int docId, Appendable dest, String urlPrefix, String morePrefix, boolean limit, int tabSize) {
    if (isEmpty()) {
        return false;
    }
    Document doc;
    try {
        doc = searcher.doc(docId);
    } catch (IOException e) {
        LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e);
        return false;
    }
    Definitions tags = null;
    try {
        IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
        if (tagsField != null) {
            tags = Definitions.deserialize(tagsField.binaryValue().bytes);
        }
    } catch (ClassNotFoundException | IOException e) {
        LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e);
        return false;
    }
    Scopes scopes;
    try {
        IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
        if (scopesField != null) {
            scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
        } else {
            scopes = new Scopes();
        }
    } catch (ClassNotFoundException | IOException e) {
        LOGGER.log(Level.WARNING, "ERROR Scopes.deserialize(...)", e);
        return false;
    }
    /*
         * UnifiedHighlighter demands an analyzer "even if in some
         * circumstances it isn't used"; here it is not meant to be used.
         */
    PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE;
    AbstractAnalyzer anz = fac.getAnalyzer();
    String path = doc.get(QueryBuilder.PATH);
    String pathE = Util.uriEncodePath(path);
    String urlPrefixE = urlPrefix == null ? "" : Util.uriEncodePath(urlPrefix);
    String moreURL = morePrefix == null ? null : Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI;
    ContextArgs args = new ContextArgs(env.getContextSurround(), env.getContextLimit());
    /*
         * Lucene adds to the following value in FieldHighlighter, so avoid
         * integer overflow by not using Integer.MAX_VALUE -- Short is good
         * enough.
         */
    int linelimit = limit ? args.getContextLimit() : Short.MAX_VALUE;
    ContextFormatter formatter = new ContextFormatter(args);
    formatter.setUrl(urlPrefixE + pathE);
    formatter.setDefs(tags);
    formatter.setScopes(scopes);
    formatter.setMoreUrl(moreURL);
    formatter.setMoreLimit(linelimit);
    OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, searcher, anz);
    uhi.setBreakIterator(StrictLineBreakIterator::new);
    uhi.setFormatter(formatter);
    uhi.setTabSize(tabSize);
    try {
        List<String> fieldList = qbuilder.getContextFields();
        String[] fields = fieldList.toArray(new String[0]);
        String res = uhi.highlightFieldsUnion(fields, query, docId, linelimit);
        if (res != null) {
            dest.append(res);
            return true;
        }
    } catch (IOException e) {
        LOGGER.log(Level.WARNING, "ERROR highlightFieldsUnion(...)", e);
    // Continue below.
    } catch (Throwable e) {
        LOGGER.log(Level.SEVERE, "ERROR highlightFieldsUnion(...)", e);
        throw e;
    }
    return false;
}
Also used : Definitions(org.opengrok.indexer.analysis.Definitions) PlainAnalyzerFactory(org.opengrok.indexer.analysis.plain.PlainAnalyzerFactory) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) Scopes(org.opengrok.indexer.analysis.Scopes) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer)

Example 5 with Scopes

use of org.opengrok.indexer.analysis.Scopes in project OpenGrok by OpenGrok.

the class SearchEngine method results.

/**
 * Get results , if no search was started before, no results are returned.
 * This method will requery if {@code end} is more than first query from search,
 * hence performance hit applies, if you want results in later pages than
 * number of cachePages. {@code end} has to be bigger than {@code start} !
 *
 * @param start start of the hit list
 * @param end end of the hit list
 * @param ret list of results from start to end or null/empty if no search
 * was started
 */
public void results(int start, int end, List<Hit> ret) {
    // return if no start search() was done
    if (hits == null || (end < start)) {
        ret.clear();
        return;
    }
    ret.clear();
    // TODO check if below fits for if end=old hits.length, or it should include it
    if (end > hits.length && !allCollected) {
        // do the requery, we want more than 5 pages
        collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
        try {
            searcher.search(query, collector);
        } catch (Exception e) {
            // this exception should never be hit, since search() will hit this before
            LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e);
        }
        hits = collector.topDocs().scoreDocs;
        Document d = null;
        for (int i = start; i < hits.length; i++) {
            int docId = hits[i].doc;
            try {
                d = searcher.doc(docId);
            } catch (Exception e) {
                LOGGER.log(Level.SEVERE, SEARCH_EXCEPTION_MSG, e);
            }
            docs.add(d);
        }
        allCollected = true;
    }
    // the only problem is that count of docs is usually smaller than number of results
    for (int ii = start; ii < end; ++ii) {
        boolean alt = (ii % 2 == 0);
        boolean hasContext = false;
        try {
            Document doc = docs.get(ii);
            String filename = doc.get(QueryBuilder.PATH);
            AbstractAnalyzer.Genre genre = AbstractAnalyzer.Genre.get(doc.get(QueryBuilder.T));
            Definitions tags = null;
            IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
            if (tagsField != null) {
                tags = Definitions.deserialize(tagsField.binaryValue().bytes);
            }
            Scopes scopes = null;
            IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
            if (scopesField != null) {
                scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
            }
            int nhits = docs.size();
            if (sourceContext != null) {
                sourceContext.toggleAlt();
                try {
                    if (AbstractAnalyzer.Genre.PLAIN == genre && (source != null)) {
                        // SRCROOT is read with UTF-8 as a default.
                        hasContext = sourceContext.getContext(new InputStreamReader(new FileInputStream(source + filename), StandardCharsets.UTF_8), null, null, null, filename, tags, nhits > 100, getDefinition() != null, ret, scopes);
                    } else if (AbstractAnalyzer.Genre.XREFABLE == genre && data != null && summarizer != null) {
                        int l;
                        /**
                         * For backward compatibility, read the
                         * OpenGrok-produced document using the system
                         * default charset.
                         */
                        try (Reader r = RuntimeEnvironment.getInstance().isCompressXref() ? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(TandemPath.join(data + Prefix.XREF_P + filename, ".gz")))))) : new HTMLStripCharFilter(new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) {
                            l = r.read(content);
                        }
                        // TODO FIX below fragmenter according to either summarizer or context
                        // (to get line numbers, might be hard, since xref writers will need to be fixed too,
                        // they generate just one line of html code now :( )
                        Summary sum = summarizer.getSummary(new String(content, 0, l));
                        Fragment[] fragments = sum.getFragments();
                        for (Fragment fragment : fragments) {
                            String match = fragment.toString();
                            if (match.length() > 0) {
                                if (!fragment.isEllipsis()) {
                                    Hit hit = new Hit(filename, fragment.toString(), "", true, alt);
                                    ret.add(hit);
                                }
                                hasContext = true;
                            }
                        }
                    } else {
                        LOGGER.log(Level.WARNING, "Unknown genre: {0} for {1}", new Object[] { genre, filename });
                        hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
                    }
                } catch (FileNotFoundException exp) {
                    LOGGER.log(Level.WARNING, "Couldn''t read summary from {0} ({1})", new Object[] { filename, exp.getMessage() });
                    hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
                }
            }
            if (historyContext != null) {
                hasContext |= historyContext.getContext(source + filename, filename, ret);
            }
            if (!hasContext) {
                ret.add(new Hit(filename, "...", "", false, alt));
            }
        } catch (IOException | ClassNotFoundException | HistoryException e) {
            LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e);
        }
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) MultiReader(org.apache.lucene.index.MultiReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) Fragment(org.opengrok.indexer.search.Summary.Fragment) GZIPInputStream(java.util.zip.GZIPInputStream) HTMLStripCharFilter(org.apache.lucene.analysis.charfilter.HTMLStripCharFilter) FileReader(java.io.FileReader) InputStreamReader(java.io.InputStreamReader) Definitions(org.opengrok.indexer.analysis.Definitions) HistoryException(org.opengrok.indexer.history.HistoryException) IOException(java.io.IOException) ParseException(org.apache.lucene.queryparser.classic.ParseException) HistoryException(org.opengrok.indexer.history.HistoryException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) FileInputStream(java.io.FileInputStream) IndexableField(org.apache.lucene.index.IndexableField) Scopes(org.opengrok.indexer.analysis.Scopes) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) BufferedReader(java.io.BufferedReader)

Aggregations

Scopes (org.opengrok.indexer.analysis.Scopes)8 IndexableField (org.apache.lucene.index.IndexableField)7 Document (org.apache.lucene.document.Document)6 File (java.io.File)5 IOException (java.io.IOException)4 StringWriter (java.io.StringWriter)4 Field (org.apache.lucene.document.Field)4 Test (org.junit.jupiter.api.Test)4 Definitions (org.opengrok.indexer.analysis.Definitions)4 Scope (org.opengrok.indexer.analysis.Scopes.Scope)4 Reader (java.io.Reader)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 AbstractAnalyzer (org.opengrok.indexer.analysis.AbstractAnalyzer)2 FileNotFoundException (java.io.FileNotFoundException)1 FileReader (java.io.FileReader)1 InputStreamReader (java.io.InputStreamReader)1 ExecutionException (java.util.concurrent.ExecutionException)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 HTMLStripCharFilter (org.apache.lucene.analysis.charfilter.HTMLStripCharFilter)1