Search in sources :

Example 1 with JFlexTokenizer

use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.

the class MandocAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
    // this is to explicitly use appropriate analyzers tokenstream to
    // workaround #1376 symbols search works like full text search
    JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
    symbolTokenizer.setReader(getReader(src.getStream()));
    OGKTextField full = new OGKTextField(QueryBuilder.FULL, symbolTokenizer);
    doc.add(full);
    if (xrefOut != null) {
        try (Reader in = getReader(src.getStream())) {
            WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
            args.setProject(project);
            Xrefer xref = writeXref(args);
            String path = doc.get(QueryBuilder.PATH);
            addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
        }
    }
}
Also used : JFlexTokenizer(org.opengrok.indexer.analysis.JFlexTokenizer) OGKTextField(org.opengrok.indexer.analysis.OGKTextField) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) Xrefer(org.opengrok.indexer.analysis.Xrefer) Reader(java.io.Reader) WriteXrefArgs(org.opengrok.indexer.analysis.WriteXrefArgs)

Example 2 with JFlexTokenizer

use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.

the class TroffAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
    // this is to explicitly use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search
    JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
    symbolTokenizer.setReader(getReader(src.getStream()));
    OGKTextField full = new OGKTextField(QueryBuilder.FULL, symbolTokenizer);
    doc.add(full);
    if (xrefOut != null) {
        try (Reader in = getReader(src.getStream())) {
            WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
            args.setProject(project);
            Xrefer xref = writeXref(args);
            String path = doc.get(QueryBuilder.PATH);
            addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
        }
    }
}
Also used : JFlexTokenizer(org.opengrok.indexer.analysis.JFlexTokenizer) OGKTextField(org.opengrok.indexer.analysis.OGKTextField) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) Xrefer(org.opengrok.indexer.analysis.Xrefer) Reader(java.io.Reader) WriteXrefArgs(org.opengrok.indexer.analysis.WriteXrefArgs)

Example 3 with JFlexTokenizer

use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.

the class PlainAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
    Definitions defs = null;
    NullWriter nullWriter = null;
    doc.add(new OGKTextField(QueryBuilder.FULL, getReader(src.getStream())));
    String fullPath = doc.get(QueryBuilder.FULLPATH);
    if (fullPath != null && ctags != null) {
        defs = ctags.doCtags(fullPath);
        if (defs != null && defs.numberOfSymbols() > 0) {
            tryAddingDefs(doc, defs, src);
            byte[] tags = defs.serialize();
            doc.add(new StoredField(QueryBuilder.TAGS, tags));
        }
    }
    /*
         * This is to explicitly use appropriate analyzer's token stream to
         * work around #1376: symbols search works like full text search.
         */
    JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
    OGKTextField ref = new OGKTextField(QueryBuilder.REFS, symbolTokenizer);
    symbolTokenizer.setReader(getReader(src.getStream()));
    doc.add(ref);
    if (scopesEnabled && xrefOut == null) {
        /*
             * Scopes are generated during xref generation. If xrefs are
             * turned off we still need to run writeXref() to produce scopes,
             * we use a dummy writer that will throw away any xref output.
             */
        nullWriter = new NullWriter();
        xrefOut = nullWriter;
    }
    if (xrefOut != null) {
        try (Reader in = getReader(src.getStream())) {
            RuntimeEnvironment env = RuntimeEnvironment.getInstance();
            WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
            args.setDefs(defs);
            args.setProject(project);
            CompletableFuture<XrefWork> future = CompletableFuture.supplyAsync(() -> {
                try {
                    return new XrefWork(writeXref(args));
                } catch (IOException e) {
                    return new XrefWork(e);
                }
            }, env.getIndexerParallelizer().getXrefWatcherExecutor()).orTimeout(env.getXrefTimeout(), TimeUnit.SECONDS);
            // Will throw ExecutionException wrapping TimeoutException on timeout.
            XrefWork xrefWork = future.get();
            Xrefer xref = xrefWork.xrefer;
            if (xref != null) {
                Scopes scopes = xref.getScopes();
                if (scopes.size() > 0) {
                    byte[] scopesSerialized = scopes.serialize();
                    doc.add(new StoredField(QueryBuilder.SCOPES, scopesSerialized));
                }
                String path = doc.get(QueryBuilder.PATH);
                addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
            } else {
                // Re-throw the exception from writeXref().
                throw new IOException(xrefWork.exception);
            }
        } catch (ExecutionException e) {
            throw new InterruptedException("failed to generate xref :" + e);
        } finally {
            if (nullWriter != null) {
                nullWriter.close();
            }
        }
    }
}
Also used : JFlexTokenizer(org.opengrok.indexer.analysis.JFlexTokenizer) OGKTextField(org.opengrok.indexer.analysis.OGKTextField) RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) Definitions(org.opengrok.indexer.analysis.Definitions) Xrefer(org.opengrok.indexer.analysis.Xrefer) Reader(java.io.Reader) ExpandTabsReader(org.opengrok.indexer.analysis.ExpandTabsReader) IOException(java.io.IOException) WriteXrefArgs(org.opengrok.indexer.analysis.WriteXrefArgs) NullWriter(org.opengrok.indexer.util.NullWriter) StoredField(org.apache.lucene.document.StoredField) Scopes(org.opengrok.indexer.analysis.Scopes) ExecutionException(java.util.concurrent.ExecutionException)

Example 4 with JFlexTokenizer

use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.

the class UuencodeAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
    // this is to explicitly use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search
    JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
    OGKTextField full = new OGKTextField(QueryBuilder.FULL, symbolTokenizer);
    symbolTokenizer.setReader(getReader(src.getStream()));
    doc.add(full);
    if (xrefOut != null) {
        try (Reader in = getReader(src.getStream())) {
            WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
            args.setProject(project);
            writeXref(args);
        }
    }
}
Also used : JFlexTokenizer(org.opengrok.indexer.analysis.JFlexTokenizer) OGKTextField(org.opengrok.indexer.analysis.OGKTextField) Reader(java.io.Reader) WriteXrefArgs(org.opengrok.indexer.analysis.WriteXrefArgs)

Example 5 with JFlexTokenizer

use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.

the class PerlSymbolTokenizerTest method testOffsetAttribute.

/**
 * Helper method for {@link #testOffsetAttribute()} that runs the test on
 * one single implementation class with the specified input text and
 * expected tokens.
 */
private void testOffsetAttribute(Class<? extends JFlexSymbolMatcher> klass, String inputText, String[] expectedTokens) throws Exception {
    JFlexSymbolMatcher matcher = klass.getConstructor(Reader.class).newInstance(new StringReader(inputText));
    JFlexTokenizer tokenizer = new JFlexTokenizer(matcher);
    CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
    OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
    int count = 0;
    while (tokenizer.incrementToken()) {
        assertTrue(count < expectedTokens.length, "too many tokens");
        String expected = expectedTokens[count];
        // 0-based offset to accord with String[]
        assertEquals(expected, term.toString(), "term" + count);
        assertEquals(inputText.indexOf(expected), offset.startOffset(), "start" + count);
        assertEquals(inputText.indexOf(expected) + expected.length(), offset.endOffset(), "end" + count);
        count++;
    }
    assertEquals(expectedTokens.length, count, "wrong number of tokens");
}
Also used : JFlexTokenizer(org.opengrok.indexer.analysis.JFlexTokenizer) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) JFlexSymbolMatcher(org.opengrok.indexer.analysis.JFlexSymbolMatcher) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Reader(java.io.Reader) StringReader(java.io.StringReader)

Aggregations

JFlexTokenizer (org.opengrok.indexer.analysis.JFlexTokenizer)9 Reader (java.io.Reader)5 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)5 IOException (java.io.IOException)4 OGKTextField (org.opengrok.indexer.analysis.OGKTextField)4 WriteXrefArgs (org.opengrok.indexer.analysis.WriteXrefArgs)4 LinkedList (java.util.LinkedList)3 NumLinesLOC (org.opengrok.indexer.analysis.NumLinesLOC)3 Xrefer (org.opengrok.indexer.analysis.Xrefer)3 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 InputStreamReader (java.io.InputStreamReader)1 StringReader (java.io.StringReader)1 ArrayList (java.util.ArrayList)1 ExecutionException (java.util.concurrent.ExecutionException)1 StoredField (org.apache.lucene.document.StoredField)1 Definitions (org.opengrok.indexer.analysis.Definitions)1 ExpandTabsReader (org.opengrok.indexer.analysis.ExpandTabsReader)1 JFlexSymbolMatcher (org.opengrok.indexer.analysis.JFlexSymbolMatcher)1 Scopes (org.opengrok.indexer.analysis.Scopes)1