Examples with AbstractAnalyzer - org.opengrok.indexer.analysis.AbstractAnalyzer

Example 6 with AbstractAnalyzer

use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.

the class HaskellXrefTest method basicTest.

@Test
public void basicTest() throws IOException {
    String s = "putStrLn \"Hello, world!\"";
    Writer w = new StringWriter();
    HaskellAnalyzerFactory fac = new HaskellAnalyzerFactory();
    AbstractAnalyzer analyzer = fac.getAnalyzer();
    WriteXrefArgs xargs = new WriteXrefArgs(new StringReader(s), w);
    Xrefer xref = analyzer.writeXref(xargs);
    assertLinesEqual("Haskell basicTest", "<a class=\"l\" name=\"1\" href=\"#1\">1</a>" + "<a href=\"/source/s?defs=putStrLn\" class=\"intelliWindow-symbol\"" + " data-definition-place=\"undefined-in-file\">putStrLn</a>" + " <span class=\"s\">&quot;Hello, world!&quot;</span>\n", w.toString());
    assertEquals(1, xref.getLOC(), "Haskell LOC");
}

Also used : StringWriter(java.io.StringWriter) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) StringReader(java.io.StringReader) Xrefer(org.opengrok.indexer.analysis.Xrefer) WriteXrefArgs(org.opengrok.indexer.analysis.WriteXrefArgs) StringWriter(java.io.StringWriter) Writer(java.io.Writer) Test(org.junit.jupiter.api.Test)

Example 7 with AbstractAnalyzer

use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.

the class GZIPAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
    AbstractAnalyzer fa;
    StreamSource gzSrc = wrap(src);
    String path = doc.get(QueryBuilder.PATH);
    if (path != null && path.toLowerCase(Locale.ROOT).endsWith(".gz")) {
        String newname = path.substring(0, path.length() - 3);
        // System.err.println("GZIPPED OF = " + newname);
        try (InputStream gzis = gzSrc.getStream()) {
            fa = AnalyzerGuru.getAnalyzer(gzis, newname);
        }
        if (fa == null) {
            this.g = Genre.DATA;
            LOGGER.log(Level.WARNING, "Did not analyze {0}, detected as data.", newname);
        // TODO we could probably wrap tar analyzer here, need to do research on reader coming from gzis ...
        } else {
            // simple file gziped case captured here
            if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) {
                this.g = Genre.XREFABLE;
            } else {
                this.g = Genre.DATA;
            }
            fa.analyze(doc, gzSrc, xrefOut);
            if (doc.get(QueryBuilder.T) != null) {
                doc.removeField(QueryBuilder.T);
                if (g == Genre.XREFABLE) {
                    doc.add(new Field(QueryBuilder.T, g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms));
                }
            }
        }
    }
}

Also used : Field(org.apache.lucene.document.Field) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) StreamSource(org.opengrok.indexer.analysis.StreamSource)

Example 8 with AbstractAnalyzer

use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.

the class Context method getContext2.

/**
 * Look for context for this instance's initialized query in a search result
 * {@link Document}, and output according to the parameters.
 * @param env required environment
 * @param searcher required search that produced the document
 * @param docId document ID for producing context
 * @param dest required target to write
 * @param urlPrefix prefix for links
 * @param morePrefix optional link to more... page
 * @param limit a value indicating if the number of matching lines should be
 * limited. N.b. unlike
 * {@link #getContext(java.io.Reader, java.io.Writer, java.lang.String, java.lang.String, java.lang.String,
 * org.opengrok.indexer.analysis.Definitions, boolean, boolean, java.util.List, org.opengrok.indexer.analysis.Scopes)},
 * the {@code limit} argument will not be interpreted w.r.t.
 * {@link RuntimeEnvironment#isQuickContextScan()}.
 * @param tabSize optional positive tab size that must accord with the value
 * used when indexing or else postings may be wrongly shifted until
 * re-indexing
 * @return Did it get any matching context?
 */
public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, int docId, Appendable dest, String urlPrefix, String morePrefix, boolean limit, int tabSize) {
    if (isEmpty()) {
        return false;
    }
    Document doc;
    try {
        doc = searcher.doc(docId);
    } catch (IOException e) {
        LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e);
        return false;
    }
    Definitions tags = null;
    try {
        IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
        if (tagsField != null) {
            tags = Definitions.deserialize(tagsField.binaryValue().bytes);
        }
    } catch (ClassNotFoundException | IOException e) {
        LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e);
        return false;
    }
    Scopes scopes;
    try {
        IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
        if (scopesField != null) {
            scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
        } else {
            scopes = new Scopes();
        }
    } catch (ClassNotFoundException | IOException e) {
        LOGGER.log(Level.WARNING, "ERROR Scopes.deserialize(...)", e);
        return false;
    }
    /*
         * UnifiedHighlighter demands an analyzer "even if in some
         * circumstances it isn't used"; here it is not meant to be used.
         */
    PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE;
    AbstractAnalyzer anz = fac.getAnalyzer();
    String path = doc.get(QueryBuilder.PATH);
    String pathE = Util.uriEncodePath(path);
    String urlPrefixE = urlPrefix == null ? "" : Util.uriEncodePath(urlPrefix);
    String moreURL = morePrefix == null ? null : Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI;
    ContextArgs args = new ContextArgs(env.getContextSurround(), env.getContextLimit());
    /*
         * Lucene adds to the following value in FieldHighlighter, so avoid
         * integer overflow by not using Integer.MAX_VALUE -- Short is good
         * enough.
         */
    int linelimit = limit ? args.getContextLimit() : Short.MAX_VALUE;
    ContextFormatter formatter = new ContextFormatter(args);
    formatter.setUrl(urlPrefixE + pathE);
    formatter.setDefs(tags);
    formatter.setScopes(scopes);
    formatter.setMoreUrl(moreURL);
    formatter.setMoreLimit(linelimit);
    OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, searcher, anz);
    uhi.setBreakIterator(StrictLineBreakIterator::new);
    uhi.setFormatter(formatter);
    uhi.setTabSize(tabSize);
    try {
        List<String> fieldList = qbuilder.getContextFields();
        String[] fields = fieldList.toArray(new String[0]);
        String res = uhi.highlightFieldsUnion(fields, query, docId, linelimit);
        if (res != null) {
            dest.append(res);
            return true;
        }
    } catch (IOException e) {
        LOGGER.log(Level.WARNING, "ERROR highlightFieldsUnion(...)", e);
    // Continue below.
    } catch (Throwable e) {
        LOGGER.log(Level.SEVERE, "ERROR highlightFieldsUnion(...)", e);
        throw e;
    }
    return false;
}

Also used : Definitions(org.opengrok.indexer.analysis.Definitions) PlainAnalyzerFactory(org.opengrok.indexer.analysis.plain.PlainAnalyzerFactory) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) Scopes(org.opengrok.indexer.analysis.Scopes) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer)

Example 9 with AbstractAnalyzer

use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.

the class IndexDatabase method addFile.

/**
 * Add a file to the Lucene index (and generate a xref file).
 *
 * @param file The file to add
 * @param path The path to the file (from source root)
 * @param ctags a defined instance to use (only if its binary is not null)
 * @throws java.io.IOException if an error occurs
 * @throws InterruptedException if a timeout occurs
 */
private void addFile(File file, String path, Ctags ctags) throws IOException, InterruptedException {
    RuntimeEnvironment env = RuntimeEnvironment.getInstance();
    AbstractAnalyzer fa = getAnalyzerFor(file, path);
    for (IndexChangedListener listener : listeners) {
        listener.fileAdd(path, fa.getClass().getSimpleName());
    }
    ctags.setTabSize(project != null ? project.getTabSize() : 0);
    if (env.getCtagsTimeout() != 0) {
        ctags.setTimeout(env.getCtagsTimeout());
    }
    fa.setCtags(ctags);
    fa.setCountsAggregator(countsAggregator);
    fa.setProject(Project.getProject(path));
    fa.setScopesEnabled(env.isScopesEnabled());
    fa.setFoldingEnabled(env.isFoldingEnabled());
    Document doc = new Document();
    CountingWriter xrefOut = null;
    try {
        String xrefAbs = null;
        File transientXref = null;
        if (env.isGenerateHtml()) {
            xrefAbs = getXrefPath(path);
            transientXref = new File(TandemPath.join(xrefAbs, PendingFileCompleter.PENDING_EXTENSION));
            xrefOut = newXrefWriter(path, transientXref, env.isCompressXref());
        }
        analyzerGuru.populateDocument(doc, file, path, fa, xrefOut);
        // Avoid producing empty xref files.
        if (xrefOut != null && xrefOut.getCount() > 0) {
            PendingFileRenaming ren = new PendingFileRenaming(xrefAbs, transientXref.getAbsolutePath());
            completer.add(ren);
        } else if (xrefOut != null) {
            LOGGER.log(Level.FINER, "xref for {0} would be empty, will remove", path);
            completer.add(new PendingFileDeletion(transientXref.toString()));
        }
    } catch (InterruptedException e) {
        LOGGER.log(Level.WARNING, "File ''{0}'' interrupted--{1}", new Object[] { path, e.getMessage() });
        cleanupResources(doc);
        throw e;
    } catch (Exception e) {
        LOGGER.log(Level.INFO, "Skipped file ''{0}'' because the analyzer didn''t " + "understand it.", path);
        if (LOGGER.isLoggable(Level.FINE)) {
            LOGGER.log(Level.FINE, "Exception from analyzer " + fa.getClass().getName(), e);
        }
        cleanupResources(doc);
        return;
    } finally {
        fa.setCtags(null);
        fa.setCountsAggregator(null);
        if (xrefOut != null) {
            xrefOut.close();
        }
    }
    try {
        writer.addDocument(doc);
    } catch (Throwable t) {
        cleanupResources(doc);
        throw t;
    }
    setDirty();
    for (IndexChangedListener listener : listeners) {
        listener.fileAdded(path, fa.getClass().getSimpleName());
    }
}

Also used : RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) Document(org.apache.lucene.document.Document) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) ForbiddenSymlinkException(org.opengrok.indexer.util.ForbiddenSymlinkException) FileNotFoundException(java.io.FileNotFoundException) ParseException(org.apache.lucene.queryparser.classic.ParseException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) File(java.io.File)

Example 10 with AbstractAnalyzer

use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.

the class IndexDatabase method checkSettings.

/**
 * Verify TABSIZE, and evaluate AnalyzerGuru version together with ZVER --
 * or return a value to indicate mismatch.
 * @param file the source file object
 * @param path the source file path
 * @return {@code false} if a mismatch is detected
 */
private boolean checkSettings(File file, String path) throws IOException {
    RuntimeEnvironment env = RuntimeEnvironment.getInstance();
    // potential xref writer
    boolean outIsXrefWriter = false;
    int reqTabSize = project != null && project.hasTabSizeSetting() ? project.getTabSize() : 0;
    Integer actTabSize = settings.getTabSize();
    if (actTabSize != null && !actTabSize.equals(reqTabSize)) {
        LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path);
        return false;
    }
    int n = 0;
    postsIter = uidIter.postings(postsIter);
    while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        ++n;
        // Read a limited-fields version of the document.
        Document doc = reader.document(postsIter.docID(), CHECK_FIELDS);
        if (doc == null) {
            LOGGER.log(Level.FINER, "No Document: {0}", path);
            continue;
        }
        long reqGuruVersion = AnalyzerGuru.getVersionNo();
        Long actGuruVersion = settings.getAnalyzerGuruVersion();
        /*
             * For an older OpenGrok index that does not yet have a defined,
             * stored analyzerGuruVersion, break so that no extra work is done.
             * After a re-index, the guru version check will be active.
             */
        if (actGuruVersion == null) {
            break;
        }
        AbstractAnalyzer fa = null;
        String fileTypeName;
        if (actGuruVersion.equals(reqGuruVersion)) {
            fileTypeName = doc.get(QueryBuilder.TYPE);
            if (fileTypeName == null) {
                // (Should not get here, but break just in case.)
                LOGGER.log(Level.FINEST, "Missing TYPE field: {0}", path);
                break;
            }
            AnalyzerFactory fac = AnalyzerGuru.findByFileTypeName(fileTypeName);
            if (fac != null) {
                fa = fac.getAnalyzer();
            }
        } else {
            /*
                 * If the stored guru version does not match, re-verify the
                 * selection of analyzer or return a value to indicate the
                 * analyzer is now mis-matched.
                 */
            LOGGER.log(Level.FINER, "Guru version mismatch: {0}", path);
            fa = getAnalyzerFor(file, path);
            fileTypeName = fa.getFileTypeName();
            String oldTypeName = doc.get(QueryBuilder.TYPE);
            if (!fileTypeName.equals(oldTypeName)) {
                if (LOGGER.isLoggable(Level.FINE)) {
                    LOGGER.log(Level.FINE, "Changed {0} to {1}: {2}", new Object[] { oldTypeName, fileTypeName, path });
                }
                return false;
            }
        }
        // Verify Analyzer version, or return a value to indicate mismatch.
        long reqVersion = AnalyzerGuru.getAnalyzerVersionNo(fileTypeName);
        Long actVersion = settings.getAnalyzerVersion(fileTypeName);
        if (actVersion == null || !actVersion.equals(reqVersion)) {
            if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.log(Level.FINE, "{0} version mismatch: {1}", new Object[] { fileTypeName, path });
            }
            return false;
        }
        if (fa != null) {
            outIsXrefWriter = true;
        }
        // The versions checks have passed.
        break;
    }
    if (n < 1) {
        LOGGER.log(Level.FINER, "Missing index Documents: {0}", path);
        return false;
    }
    // If the economy mode is on, this should be treated as a match.
    if (!env.isGenerateHtml()) {
        if (xrefExistsFor(path)) {
            LOGGER.log(Level.FINEST, "Extraneous {0} , removing its xref file", path);
            removeXrefFile(path);
        }
        return true;
    }
    return (!outIsXrefWriter || xrefExistsFor(path));
}

Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) Document(org.apache.lucene.document.Document) AnalyzerFactory(org.opengrok.indexer.analysis.AnalyzerFactory)

Aggregations

AbstractAnalyzer (org.opengrok.indexer.analysis.AbstractAnalyzer)15 StringWriter (java.io.StringWriter)8 WriteXrefArgs (org.opengrok.indexer.analysis.WriteXrefArgs)8 StringReader (java.io.StringReader)6 Test (org.junit.jupiter.api.Test)6 Writer (java.io.Writer)5 Document (org.apache.lucene.document.Document)5 Xrefer (org.opengrok.indexer.analysis.Xrefer)5 PlainAnalyzerFactory (org.opengrok.indexer.analysis.plain.PlainAnalyzerFactory)3 BufferedInputStream (java.io.BufferedInputStream)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2 Field (org.apache.lucene.document.Field)2 ScoreDoc (org.apache.lucene.search.ScoreDoc)2 StreamSource (org.opengrok.indexer.analysis.StreamSource)2 RuntimeEnvironment (org.opengrok.indexer.configuration.RuntimeEnvironment)2 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 ExecutionException (java.util.concurrent.ExecutionException)1