Search in sources :

Example 6 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class PlainAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
    Definitions defs = null;
    NullWriter nullWriter = null;
    doc.add(new OGKTextField(QueryBuilder.FULL, getReader(src.getStream())));
    String fullPath = doc.get(QueryBuilder.FULLPATH);
    if (fullPath != null && ctags != null) {
        defs = ctags.doCtags(fullPath);
        if (defs != null && defs.numberOfSymbols() > 0) {
            tryAddingDefs(doc, defs, src);
            byte[] tags = defs.serialize();
            doc.add(new StoredField(QueryBuilder.TAGS, tags));
        }
    }
    /*
         * This is to explicitly use appropriate analyzer's token stream to
         * work around #1376: symbols search works like full text search.
         */
    JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
    OGKTextField ref = new OGKTextField(QueryBuilder.REFS, symbolTokenizer);
    symbolTokenizer.setReader(getReader(src.getStream()));
    doc.add(ref);
    if (scopesEnabled && xrefOut == null) {
        /*
             * Scopes are generated during xref generation. If xrefs are
             * turned off we still need to run writeXref() to produce scopes,
             * we use a dummy writer that will throw away any xref output.
             */
        nullWriter = new NullWriter();
        xrefOut = nullWriter;
    }
    if (xrefOut != null) {
        try (Reader in = getReader(src.getStream())) {
            RuntimeEnvironment env = RuntimeEnvironment.getInstance();
            WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
            args.setDefs(defs);
            args.setProject(project);
            CompletableFuture<XrefWork> future = CompletableFuture.supplyAsync(() -> {
                try {
                    return new XrefWork(writeXref(args));
                } catch (IOException e) {
                    return new XrefWork(e);
                }
            }, env.getIndexerParallelizer().getXrefWatcherExecutor()).orTimeout(env.getXrefTimeout(), TimeUnit.SECONDS);
            // Will throw ExecutionException wrapping TimeoutException on timeout.
            XrefWork xrefWork = future.get();
            Xrefer xref = xrefWork.xrefer;
            if (xref != null) {
                Scopes scopes = xref.getScopes();
                if (scopes.size() > 0) {
                    byte[] scopesSerialized = scopes.serialize();
                    doc.add(new StoredField(QueryBuilder.SCOPES, scopesSerialized));
                }
                String path = doc.get(QueryBuilder.PATH);
                addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
            } else {
                // Re-throw the exception from writeXref().
                throw new IOException(xrefWork.exception);
            }
        } catch (ExecutionException e) {
            throw new InterruptedException("failed to generate xref :" + e);
        } finally {
            if (nullWriter != null) {
                nullWriter.close();
            }
        }
    }
}
Also used : JFlexTokenizer(org.opengrok.indexer.analysis.JFlexTokenizer) OGKTextField(org.opengrok.indexer.analysis.OGKTextField) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) Definitions(org.opengrok.indexer.analysis.Definitions) Xrefer(org.opengrok.indexer.analysis.Xrefer) Reader(java.io.Reader) ExpandTabsReader(org.opengrok.indexer.analysis.ExpandTabsReader) IOException(java.io.IOException) WriteXrefArgs(org.opengrok.indexer.analysis.WriteXrefArgs) NullWriter(org.opengrok.indexer.util.NullWriter) StoredField(org.apache.lucene.document.StoredField) Scopes(org.opengrok.indexer.analysis.Scopes) ExecutionException(java.util.concurrent.ExecutionException)

Example 7 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class Results method printPlain.

private static void printPlain(PrintPlainFinalArgs fargs, Document doc, int docId, String rpath) throws ClassNotFoundException, IOException {
    fargs.shelp.getSourceContext().toggleAlt();
    boolean didPresentNew = fargs.shelp.getSourceContext().getContext2(fargs.env, fargs.shelp.getSearcher(), docId, fargs.out, fargs.xrefPrefix, fargs.morePrefix, true, fargs.tabSize);
    if (!didPresentNew) {
        /*
             * Fall back to the old view, which re-analyzes text using
             * PlainLinetokenizer. E.g., when source code is updated (thus
             * affecting timestamps) but re-indexing is not yet complete.
             */
        Definitions tags = null;
        IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
        if (tagsField != null) {
            tags = Definitions.deserialize(tagsField.binaryValue().bytes);
        }
        Scopes scopes;
        IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
        if (scopesField != null) {
            scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
        } else {
            scopes = new Scopes();
        }
        boolean isDefSearch = fargs.shelp.getBuilder().isDefSearch();
        // SRCROOT is read with UTF-8 as a default.
        File sourceFile = new File(fargs.shelp.getSourceRoot(), rpath);
        try (FileInputStream fis = new FileInputStream(sourceFile);
            Reader r = IOUtils.createBOMStrippedReader(fis, StandardCharsets.UTF_8.name())) {
            fargs.shelp.getSourceContext().getContext(r, fargs.out, fargs.xrefPrefix, fargs.morePrefix, rpath, tags, true, isDefSearch, null, scopes);
        } catch (IOException ex) {
            String errMsg = String.format("No context for %s", sourceFile);
            if (LOGGER.isLoggable(Level.FINE)) {
                // WARNING but with FINE detail
                LOGGER.log(Level.WARNING, errMsg, ex);
            } else {
                LOGGER.log(Level.WARNING, errMsg);
            }
        }
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) Scopes(org.opengrok.indexer.analysis.Scopes) Definitions(org.opengrok.indexer.analysis.Definitions) Reader(java.io.Reader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) File(java.io.File) FileInputStream(java.io.FileInputStream)

Example 8 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class Context method getContext.

/**
 * ???.
 * Closes the given <var>in</var> reader on return.
 *
 * @param in File to be matched
 * @param out to write the context
 * @param urlPrefix URL prefix
 * @param morePrefix to link to more... page
 * @param path path of the file
 * @param tags format to highlight defs.
 * @param limit should the number of matching lines be limited?
 * @param isDefSearch is definition search
 * @param hits list of hits
 * @param scopes scopes object
 * @return Did it get any matching context?
 */
public boolean getContext(Reader in, Writer out, String urlPrefix, String morePrefix, String path, Definitions tags, boolean limit, boolean isDefSearch, List<Hit> hits, Scopes scopes) {
    if (m == null) {
        IOUtils.close(in);
        return false;
    }
    boolean anything = false;
    TreeMap<Integer, String[]> matchingTags = null;
    String urlPrefixE = (urlPrefix == null) ? "" : Util.uriEncodePath(urlPrefix);
    String pathE = Util.uriEncodePath(path);
    if (tags != null) {
        matchingTags = new TreeMap<>();
        try {
            for (Definitions.Tag tag : tags.getTags()) {
                for (LineMatcher lineMatcher : m) {
                    if (lineMatcher.match(tag.symbol) == LineMatcher.MATCHED) {
                        String scope = null;
                        String scopeUrl = null;
                        if (scopes != null) {
                            Scope scp = scopes.getScope(tag.line);
                            scope = scp.getName() + "()";
                            scopeUrl = "<a href=\"" + urlPrefixE + pathE + "#" + scp.getLineFrom() + "\">" + scope + "</a>";
                        }
                        /* desc[0] is matched symbol
                             * desc[1] is line number
                             * desc[2] is type
                             * desc[3] is matching line;
                             * desc[4] is scope
                             */
                        String[] desc = { tag.symbol, Integer.toString(tag.line), tag.type, tag.text, scope };
                        if (in == null) {
                            if (out == null) {
                                Hit hit = new Hit(path, Util.htmlize(desc[3]).replace(desc[0], "<b>" + desc[0] + "</b>"), desc[1], false, alt);
                                hits.add(hit);
                            } else {
                                out.write("<a class=\"s\" href=\"");
                                out.write(urlPrefixE);
                                out.write(pathE);
                                out.write("#");
                                out.write(desc[1]);
                                out.write("\"><span class=\"l\">");
                                out.write(desc[1]);
                                out.write("</span> ");
                                out.write(Util.htmlize(desc[3]).replace(desc[0], "<b>" + desc[0] + "</b>"));
                                out.write("</a> ");
                                if (desc[4] != null) {
                                    out.write("<span class=\"scope\"><a href\"");
                                    out.write(scopeUrl);
                                    out.write("\">in ");
                                    out.write(desc[4]);
                                    out.write("</a></span> ");
                                }
                                out.write("<i>");
                                out.write(desc[2]);
                                out.write("</i><br/>");
                            }
                            anything = true;
                        } else {
                            matchingTags.put(tag.line, desc);
                        }
                        break;
                    }
                }
            }
        } catch (Exception e) {
            if (hits != null) {
                // @todo verify why we ignore all exceptions?
                LOGGER.log(Level.WARNING, "Could not get context for " + path, e);
            }
        }
    }
    // Just to get the matching tag send a null in
    if (in == null) {
        return anything;
    }
    PlainLineTokenizer tokens = new PlainLineTokenizer(null);
    boolean truncated = false;
    boolean lim = limit;
    RuntimeEnvironment env = RuntimeEnvironment.getInstance();
    if (!env.isQuickContextScan()) {
        lim = false;
    }
    if (lim) {
        char[] buffer = new char[MAXFILEREAD];
        int charsRead;
        try {
            charsRead = in.read(buffer);
            if (charsRead == MAXFILEREAD) {
                // we probably only read parts of the file, so set the
                // truncated flag to enable the [all...] link that
                // requests all matches
                truncated = true;
                // characters back)
                for (int i = charsRead - 1; i > charsRead - 100; i--) {
                    if (buffer[i] == '\n') {
                        charsRead = i;
                        break;
                    }
                }
            }
        } catch (IOException e) {
            LOGGER.log(Level.WARNING, "An error occurred while reading data", e);
            return anything;
        }
        if (charsRead == 0) {
            return anything;
        }
        tokens.reInit(buffer, charsRead, out, urlPrefixE + pathE + "#", matchingTags, scopes);
    } else {
        tokens.reInit(in, out, urlPrefixE + pathE + "#", matchingTags, scopes);
    }
    if (hits != null) {
        tokens.setAlt(alt);
        tokens.setHitList(hits);
        tokens.setFilename(path);
    }
    int limit_max_lines = env.getContextLimit();
    try {
        String token;
        int matchState;
        int matchedLines = 0;
        while ((token = tokens.yylex()) != null && (!lim || matchedLines < limit_max_lines)) {
            for (LineMatcher lineMatcher : m) {
                matchState = lineMatcher.match(token);
                if (matchState == LineMatcher.MATCHED) {
                    if (!isDefSearch) {
                        tokens.printContext();
                    } else if (tokens.tags.containsKey(tokens.markedLine)) {
                        tokens.printContext();
                    }
                    matchedLines++;
                    break;
                } else if (matchState == LineMatcher.WAIT) {
                    tokens.holdOn();
                } else {
                    tokens.neverMind();
                }
            }
        }
        anything = matchedLines > 0;
        tokens.dumpRest();
        if (lim && (truncated || matchedLines == limit_max_lines) && out != null) {
            out.write("<a href=\"" + Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI + "\">[all...]</a>");
        }
    } catch (IOException e) {
        LOGGER.log(Level.WARNING, "Could not get context for " + path, e);
    } finally {
        IOUtils.close(in);
        if (out != null) {
            try {
                out.flush();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "Failed to flush stream: ", e);
            }
        }
    }
    return anything;
}
Also used : RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) Definitions(org.opengrok.indexer.analysis.Definitions) IOException(java.io.IOException) IOException(java.io.IOException) Hit(org.opengrok.indexer.search.Hit) Scope(org.opengrok.indexer.analysis.Scopes.Scope)

Example 9 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class Context method getContext2.

/**
 * Look for context for this instance's initialized query in a search result
 * {@link Document}, and output according to the parameters.
 * @param env required environment
 * @param searcher required search that produced the document
 * @param docId document ID for producing context
 * @param dest required target to write
 * @param urlPrefix prefix for links
 * @param morePrefix optional link to more... page
 * @param limit a value indicating if the number of matching lines should be
 * limited. N.b. unlike
 * {@link #getContext(java.io.Reader, java.io.Writer, java.lang.String, java.lang.String, java.lang.String,
 * org.opengrok.indexer.analysis.Definitions, boolean, boolean, java.util.List, org.opengrok.indexer.analysis.Scopes)},
 * the {@code limit} argument will not be interpreted w.r.t.
 * {@link RuntimeEnvironment#isQuickContextScan()}.
 * @param tabSize optional positive tab size that must accord with the value
 * used when indexing or else postings may be wrongly shifted until
 * re-indexing
 * @return Did it get any matching context?
 */
public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, int docId, Appendable dest, String urlPrefix, String morePrefix, boolean limit, int tabSize) {
    if (isEmpty()) {
        return false;
    }
    Document doc;
    try {
        doc = searcher.doc(docId);
    } catch (IOException e) {
        LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e);
        return false;
    }
    Definitions tags = null;
    try {
        IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
        if (tagsField != null) {
            tags = Definitions.deserialize(tagsField.binaryValue().bytes);
        }
    } catch (ClassNotFoundException | IOException e) {
        LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e);
        return false;
    }
    Scopes scopes;
    try {
        IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
        if (scopesField != null) {
            scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
        } else {
            scopes = new Scopes();
        }
    } catch (ClassNotFoundException | IOException e) {
        LOGGER.log(Level.WARNING, "ERROR Scopes.deserialize(...)", e);
        return false;
    }
    /*
         * UnifiedHighlighter demands an analyzer "even if in some
         * circumstances it isn't used"; here it is not meant to be used.
         */
    PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE;
    AbstractAnalyzer anz = fac.getAnalyzer();
    String path = doc.get(QueryBuilder.PATH);
    String pathE = Util.uriEncodePath(path);
    String urlPrefixE = urlPrefix == null ? "" : Util.uriEncodePath(urlPrefix);
    String moreURL = morePrefix == null ? null : Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI;
    ContextArgs args = new ContextArgs(env.getContextSurround(), env.getContextLimit());
    /*
         * Lucene adds to the following value in FieldHighlighter, so avoid
         * integer overflow by not using Integer.MAX_VALUE -- Short is good
         * enough.
         */
    int linelimit = limit ? args.getContextLimit() : Short.MAX_VALUE;
    ContextFormatter formatter = new ContextFormatter(args);
    formatter.setUrl(urlPrefixE + pathE);
    formatter.setDefs(tags);
    formatter.setScopes(scopes);
    formatter.setMoreUrl(moreURL);
    formatter.setMoreLimit(linelimit);
    OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, searcher, anz);
    uhi.setBreakIterator(StrictLineBreakIterator::new);
    uhi.setFormatter(formatter);
    uhi.setTabSize(tabSize);
    try {
        List<String> fieldList = qbuilder.getContextFields();
        String[] fields = fieldList.toArray(new String[0]);
        String res = uhi.highlightFieldsUnion(fields, query, docId, linelimit);
        if (res != null) {
            dest.append(res);
            return true;
        }
    } catch (IOException e) {
        LOGGER.log(Level.WARNING, "ERROR highlightFieldsUnion(...)", e);
    // Continue below.
    } catch (Throwable e) {
        LOGGER.log(Level.SEVERE, "ERROR highlightFieldsUnion(...)", e);
        throw e;
    }
    return false;
}
Also used : Definitions(org.opengrok.indexer.analysis.Definitions) PlainAnalyzerFactory(org.opengrok.indexer.analysis.plain.PlainAnalyzerFactory) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) Scopes(org.opengrok.indexer.analysis.Scopes) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer)

Example 10 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class SearchHelper method maybeRedirectToDefinition.

private void maybeRedirectToDefinition(int docID, TermQuery termQuery) throws IOException, ClassNotFoundException {
    // Bug #3900: Check if this is a search for a single term, and that
    // term is a definition. If that's the case, and we only have one match,
    // we'll generate a direct link instead of a listing.
    // 
    // Attempt to create a direct link to the definition if we search for
    // one single definition term AND we have exactly one match AND there
    // is only one definition of that symbol in the document that matches.
    Document doc = searcher.doc(docID);
    IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
    if (tagsField != null) {
        byte[] rawTags = tagsField.binaryValue().bytes;
        Definitions tags = Definitions.deserialize(rawTags);
        String symbol = termQuery.getTerm().text();
        if (tags.occurrences(symbol) == 1) {
            String anchor = Util.uriEncode(symbol);
            redirect = contextPath + Prefix.XREF_P + Util.uriEncodePath(doc.get(QueryBuilder.PATH)) + '?' + QueryParameters.FRAGMENT_IDENTIFIER_PARAM_EQ + anchor + '#' + anchor;
        }
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) Definitions(org.opengrok.indexer.analysis.Definitions) Document(org.apache.lucene.document.Document)

Aggregations

Definitions (org.opengrok.indexer.analysis.Definitions)14 IOException (java.io.IOException)5 Document (org.apache.lucene.document.Document)5 File (java.io.File)4 Reader (java.io.Reader)4 IndexableField (org.apache.lucene.index.IndexableField)4 Test (org.junit.jupiter.api.Test)4 Scopes (org.opengrok.indexer.analysis.Scopes)4 StringWriter (java.io.StringWriter)3 Hit (org.opengrok.indexer.search.Hit)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 StringReader (java.io.StringReader)2 ArrayList (java.util.ArrayList)2 Field (org.apache.lucene.document.Field)2 AbstractAnalyzer (org.opengrok.indexer.analysis.AbstractAnalyzer)2 ExpandTabsReader (org.opengrok.indexer.analysis.ExpandTabsReader)2 RuntimeEnvironment (org.opengrok.indexer.configuration.RuntimeEnvironment)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileNotFoundException (java.io.FileNotFoundException)1