Search in sources :

Example 11 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class DefinitionsTokenStream method createTokens.

private void createTokens(Definitions defs, LineBreaker brk) {
    for (Definitions.Tag tag : defs.getTags()) {
        // Shift from ctags's convention.
        int lineno = tag.line - 1;
        if (lineno >= 0 && lineno < brk.count() && tag.symbol != null && tag.text != null) {
            int lineoff = brk.getOffset(lineno);
            if (tag.lineStart >= 0) {
                PendingToken tok = new PendingToken(tag.symbol, lineoff + tag.lineStart, lineoff + tag.lineEnd);
                events.add(tok);
            }
        }
    }
    events.sort(PendingTokenOffsetsComparator.INSTANCE);
}
Also used : PendingToken(org.opengrok.indexer.analysis.PendingToken) Definitions(org.opengrok.indexer.analysis.Definitions)

Example 12 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class SearchEngine method results.

/**
 * Get results , if no search was started before, no results are returned.
 * This method will requery if {@code end} is more than first query from search,
 * hence performance hit applies, if you want results in later pages than
 * number of cachePages. {@code end} has to be bigger than {@code start} !
 *
 * @param start start of the hit list
 * @param end end of the hit list
 * @param ret list of results from start to end or null/empty if no search
 * was started
 */
public void results(int start, int end, List<Hit> ret) {
    // return if no start search() was done
    if (hits == null || (end < start)) {
        ret.clear();
        return;
    }
    ret.clear();
    // TODO check if below fits for if end=old hits.length, or it should include it
    if (end > hits.length && !allCollected) {
        // do the requery, we want more than 5 pages
        collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
        try {
            searcher.search(query, collector);
        } catch (Exception e) {
            // this exception should never be hit, since search() will hit this before
            LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e);
        }
        hits = collector.topDocs().scoreDocs;
        Document d = null;
        for (int i = start; i < hits.length; i++) {
            int docId = hits[i].doc;
            try {
                d = searcher.doc(docId);
            } catch (Exception e) {
                LOGGER.log(Level.SEVERE, SEARCH_EXCEPTION_MSG, e);
            }
            docs.add(d);
        }
        allCollected = true;
    }
    // the only problem is that count of docs is usually smaller than number of results
    for (int ii = start; ii < end; ++ii) {
        boolean alt = (ii % 2 == 0);
        boolean hasContext = false;
        try {
            Document doc = docs.get(ii);
            String filename = doc.get(QueryBuilder.PATH);
            AbstractAnalyzer.Genre genre = AbstractAnalyzer.Genre.get(doc.get(QueryBuilder.T));
            Definitions tags = null;
            IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
            if (tagsField != null) {
                tags = Definitions.deserialize(tagsField.binaryValue().bytes);
            }
            Scopes scopes = null;
            IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
            if (scopesField != null) {
                scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
            }
            int nhits = docs.size();
            if (sourceContext != null) {
                sourceContext.toggleAlt();
                try {
                    if (AbstractAnalyzer.Genre.PLAIN == genre && (source != null)) {
                        // SRCROOT is read with UTF-8 as a default.
                        hasContext = sourceContext.getContext(new InputStreamReader(new FileInputStream(source + filename), StandardCharsets.UTF_8), null, null, null, filename, tags, nhits > 100, getDefinition() != null, ret, scopes);
                    } else if (AbstractAnalyzer.Genre.XREFABLE == genre && data != null && summarizer != null) {
                        int l;
                        /**
                         * For backward compatibility, read the
                         * OpenGrok-produced document using the system
                         * default charset.
                         */
                        try (Reader r = RuntimeEnvironment.getInstance().isCompressXref() ? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(TandemPath.join(data + Prefix.XREF_P + filename, ".gz")))))) : new HTMLStripCharFilter(new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) {
                            l = r.read(content);
                        }
                        // TODO FIX below fragmenter according to either summarizer or context
                        // (to get line numbers, might be hard, since xref writers will need to be fixed too,
                        // they generate just one line of html code now :( )
                        Summary sum = summarizer.getSummary(new String(content, 0, l));
                        Fragment[] fragments = sum.getFragments();
                        for (Fragment fragment : fragments) {
                            String match = fragment.toString();
                            if (match.length() > 0) {
                                if (!fragment.isEllipsis()) {
                                    Hit hit = new Hit(filename, fragment.toString(), "", true, alt);
                                    ret.add(hit);
                                }
                                hasContext = true;
                            }
                        }
                    } else {
                        LOGGER.log(Level.WARNING, "Unknown genre: {0} for {1}", new Object[] { genre, filename });
                        hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
                    }
                } catch (FileNotFoundException exp) {
                    LOGGER.log(Level.WARNING, "Couldn''t read summary from {0} ({1})", new Object[] { filename, exp.getMessage() });
                    hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
                }
            }
            if (historyContext != null) {
                hasContext |= historyContext.getContext(source + filename, filename, ret);
            }
            if (!hasContext) {
                ret.add(new Hit(filename, "...", "", false, alt));
            }
        } catch (IOException | ClassNotFoundException | HistoryException e) {
            LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e);
        }
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) MultiReader(org.apache.lucene.index.MultiReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) Fragment(org.opengrok.indexer.search.Summary.Fragment) GZIPInputStream(java.util.zip.GZIPInputStream) HTMLStripCharFilter(org.apache.lucene.analysis.charfilter.HTMLStripCharFilter) FileReader(java.io.FileReader) InputStreamReader(java.io.InputStreamReader) Definitions(org.opengrok.indexer.analysis.Definitions) HistoryException(org.opengrok.indexer.history.HistoryException) IOException(java.io.IOException) ParseException(org.apache.lucene.queryparser.classic.ParseException) HistoryException(org.opengrok.indexer.history.HistoryException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) FileInputStream(java.io.FileInputStream) IndexableField(org.apache.lucene.index.IndexableField) Scopes(org.opengrok.indexer.analysis.Scopes) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) BufferedReader(java.io.BufferedReader)

Example 13 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class DefinitionsTokenStreamTest method testDefinitionsVsContent.

// DefinitionsTokenStream should not be used in try-with-resources
@SuppressWarnings("java:S2095")
private void testDefinitionsVsContent(boolean expandTabs, String sourceResource, String tagsResource, int expectedCount, boolean doSupplement, Map<Integer, SimpleEntry<String, String>> overrides) throws IOException {
    StreamSource src = getSourceFromResource(sourceResource);
    // Deserialize the ctags.
    int tabSize = expandTabs ? 8 : 0;
    String suppResource = doSupplement ? sourceResource : null;
    Definitions defs = StreamUtils.readTagsFromResource(tagsResource, suppResource, tabSize);
    // Read the whole input.
    StringBuilder bld = new StringBuilder();
    String source;
    try (Reader rdr = ExpandTabsReader.wrap(IOUtils.createBOMStrippedReader(src.getStream(), StandardCharsets.UTF_8.name()), tabSize)) {
        int c;
        while ((c = rdr.read()) != -1) {
            bld.append((char) c);
        }
        source = bld.toString();
    }
    // Deserialize the token stream.
    DefinitionsTokenStream tokstream = new DefinitionsTokenStream();
    tokstream.initialize(defs, src, in -> ExpandTabsReader.wrap(in, tabSize));
    // Iterate through stream.
    CharTermAttribute term = tokstream.getAttribute(CharTermAttribute.class);
    assertNotNull(term, "CharTermAttribute");
    OffsetAttribute offs = tokstream.getAttribute(OffsetAttribute.class);
    assertNotNull(offs, "OffsetAttribute");
    int count = 0;
    while (tokstream.incrementToken()) {
        ++count;
        String termValue = term.toString();
        String cutValue = source.substring(offs.startOffset(), offs.endOffset());
        // If an override exists, test it specially.
        if (overrides != null && overrides.containsKey(count)) {
            SimpleEntry<String, String> overkv = overrides.get(count);
            assertEquals(overkv.getKey(), cutValue, "cut term override" + count);
            assertEquals(overkv.getValue(), termValue, "cut term w.r.t. term override" + count);
            continue;
        }
        boolean cutContainsTerm = cutValue.endsWith(termValue);
        assertTrue(cutContainsTerm, "cut term" + count + " at " + (offs.startOffset()) + "-" + (offs.endOffset()) + "[" + cutValue + "] vs [" + termValue + "]");
    }
    assertEquals(expectedCount, count, "token count");
}
Also used : CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) StreamSource(org.opengrok.indexer.analysis.StreamSource) Definitions(org.opengrok.indexer.analysis.Definitions) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Reader(java.io.Reader) ExpandTabsReader(org.opengrok.indexer.analysis.ExpandTabsReader)

Example 14 with Definitions

use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.

the class PascalAnalyzerFactoryTest method testAnalyzer.

/**
 * Test of writeXref method, of class PascalAnalyzerFactory.
 *
 * @throws java.lang.Exception exception
 */
@Test
void testAnalyzer() throws Exception {
    String path = repository.getSourceRoot() + "/pascal/Sample.pas";
    File f = new File(path);
    assertTrue(f.canRead() && f.isFile(), "pascal testfile " + f + " not found");
    Document doc = new Document();
    doc.add(new Field(QueryBuilder.FULLPATH, path, string_ft_nstored_nanalyzed_norms));
    StringWriter xrefOut = new StringWriter();
    analyzer.setCtags(ctags);
    analyzer.setScopesEnabled(true);
    analyzer.analyze(doc, getStreamSource(path), xrefOut);
    Definitions definitions = Definitions.deserialize(doc.getField(QueryBuilder.TAGS).binaryValue().bytes);
    assertNotNull(definitions);
    String[] type = new String[1];
    assertTrue(definitions.hasDefinitionAt("Sample", 22, type));
    assertThat(type[0], is("unit"));
    assertTrue(definitions.hasDefinitionAt("TSample", 28, type));
    assertThat(type[0], is("class"));
    assertTrue(definitions.hasDefinitionAt("Id", 40, type));
    assertThat(type[0], is("property"));
    assertTrue(definitions.hasDefinitionAt("Description", 41, type));
    assertThat(type[0], is("property"));
    assertTrue(definitions.hasDefinitionAt("TSample.GetId", 48, type));
    assertThat(type[0], is("function"));
    assertTrue(definitions.hasDefinitionAt("TSample.SetId", 53, type));
    assertThat(type[0], is("procedure"));
    assertTrue(definitions.hasDefinitionAt("TSample.GetClassName", 58, type));
    assertThat(type[0], is("function"));
    assertTrue(definitions.hasDefinitionAt("TSample.GetUser", 63, type));
    assertThat(type[0], is("function"));
}
Also used : Field(org.apache.lucene.document.Field) StringWriter(java.io.StringWriter) Definitions(org.opengrok.indexer.analysis.Definitions) Document(org.apache.lucene.document.Document) File(java.io.File) Test(org.junit.jupiter.api.Test)

Aggregations

Definitions (org.opengrok.indexer.analysis.Definitions)14 IOException (java.io.IOException)5 Document (org.apache.lucene.document.Document)5 File (java.io.File)4 Reader (java.io.Reader)4 IndexableField (org.apache.lucene.index.IndexableField)4 Test (org.junit.jupiter.api.Test)4 Scopes (org.opengrok.indexer.analysis.Scopes)4 StringWriter (java.io.StringWriter)3 Hit (org.opengrok.indexer.search.Hit)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 StringReader (java.io.StringReader)2 ArrayList (java.util.ArrayList)2 Field (org.apache.lucene.document.Field)2 AbstractAnalyzer (org.opengrok.indexer.analysis.AbstractAnalyzer)2 ExpandTabsReader (org.opengrok.indexer.analysis.ExpandTabsReader)2 RuntimeEnvironment (org.opengrok.indexer.configuration.RuntimeEnvironment)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileNotFoundException (java.io.FileNotFoundException)1