use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class DefinitionsTokenStream method createTokens.
private void createTokens(Definitions defs, LineBreaker brk) {
for (Definitions.Tag tag : defs.getTags()) {
// Shift from ctags's convention.
int lineno = tag.line - 1;
if (lineno >= 0 && lineno < brk.count() && tag.symbol != null && tag.text != null) {
int lineoff = brk.getOffset(lineno);
if (tag.lineStart >= 0) {
PendingToken tok = new PendingToken(tag.symbol, lineoff + tag.lineStart, lineoff + tag.lineEnd);
events.add(tok);
}
}
}
events.sort(PendingTokenOffsetsComparator.INSTANCE);
}
use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class SearchEngine method results.
/**
* Get results , if no search was started before, no results are returned.
* This method will requery if {@code end} is more than first query from search,
* hence performance hit applies, if you want results in later pages than
* number of cachePages. {@code end} has to be bigger than {@code start} !
*
* @param start start of the hit list
* @param end end of the hit list
* @param ret list of results from start to end or null/empty if no search
* was started
*/
public void results(int start, int end, List<Hit> ret) {
// return if no start search() was done
if (hits == null || (end < start)) {
ret.clear();
return;
}
ret.clear();
// TODO check if below fits for if end=old hits.length, or it should include it
if (end > hits.length && !allCollected) {
// do the requery, we want more than 5 pages
collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
try {
searcher.search(query, collector);
} catch (Exception e) {
// this exception should never be hit, since search() will hit this before
LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e);
}
hits = collector.topDocs().scoreDocs;
Document d = null;
for (int i = start; i < hits.length; i++) {
int docId = hits[i].doc;
try {
d = searcher.doc(docId);
} catch (Exception e) {
LOGGER.log(Level.SEVERE, SEARCH_EXCEPTION_MSG, e);
}
docs.add(d);
}
allCollected = true;
}
// the only problem is that count of docs is usually smaller than number of results
for (int ii = start; ii < end; ++ii) {
boolean alt = (ii % 2 == 0);
boolean hasContext = false;
try {
Document doc = docs.get(ii);
String filename = doc.get(QueryBuilder.PATH);
AbstractAnalyzer.Genre genre = AbstractAnalyzer.Genre.get(doc.get(QueryBuilder.T));
Definitions tags = null;
IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
if (tagsField != null) {
tags = Definitions.deserialize(tagsField.binaryValue().bytes);
}
Scopes scopes = null;
IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
if (scopesField != null) {
scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
}
int nhits = docs.size();
if (sourceContext != null) {
sourceContext.toggleAlt();
try {
if (AbstractAnalyzer.Genre.PLAIN == genre && (source != null)) {
// SRCROOT is read with UTF-8 as a default.
hasContext = sourceContext.getContext(new InputStreamReader(new FileInputStream(source + filename), StandardCharsets.UTF_8), null, null, null, filename, tags, nhits > 100, getDefinition() != null, ret, scopes);
} else if (AbstractAnalyzer.Genre.XREFABLE == genre && data != null && summarizer != null) {
int l;
/**
* For backward compatibility, read the
* OpenGrok-produced document using the system
* default charset.
*/
try (Reader r = RuntimeEnvironment.getInstance().isCompressXref() ? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(TandemPath.join(data + Prefix.XREF_P + filename, ".gz")))))) : new HTMLStripCharFilter(new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) {
l = r.read(content);
}
// TODO FIX below fragmenter according to either summarizer or context
// (to get line numbers, might be hard, since xref writers will need to be fixed too,
// they generate just one line of html code now :( )
Summary sum = summarizer.getSummary(new String(content, 0, l));
Fragment[] fragments = sum.getFragments();
for (Fragment fragment : fragments) {
String match = fragment.toString();
if (match.length() > 0) {
if (!fragment.isEllipsis()) {
Hit hit = new Hit(filename, fragment.toString(), "", true, alt);
ret.add(hit);
}
hasContext = true;
}
}
} else {
LOGGER.log(Level.WARNING, "Unknown genre: {0} for {1}", new Object[] { genre, filename });
hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
}
} catch (FileNotFoundException exp) {
LOGGER.log(Level.WARNING, "Couldn''t read summary from {0} ({1})", new Object[] { filename, exp.getMessage() });
hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
}
}
if (historyContext != null) {
hasContext |= historyContext.getContext(source + filename, filename, ret);
}
if (!hasContext) {
ret.add(new Hit(filename, "...", "", false, alt));
}
} catch (IOException | ClassNotFoundException | HistoryException e) {
LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e);
}
}
}
use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class DefinitionsTokenStreamTest method testDefinitionsVsContent.
// DefinitionsTokenStream should not be used in try-with-resources
@SuppressWarnings("java:S2095")
private void testDefinitionsVsContent(boolean expandTabs, String sourceResource, String tagsResource, int expectedCount, boolean doSupplement, Map<Integer, SimpleEntry<String, String>> overrides) throws IOException {
StreamSource src = getSourceFromResource(sourceResource);
// Deserialize the ctags.
int tabSize = expandTabs ? 8 : 0;
String suppResource = doSupplement ? sourceResource : null;
Definitions defs = StreamUtils.readTagsFromResource(tagsResource, suppResource, tabSize);
// Read the whole input.
StringBuilder bld = new StringBuilder();
String source;
try (Reader rdr = ExpandTabsReader.wrap(IOUtils.createBOMStrippedReader(src.getStream(), StandardCharsets.UTF_8.name()), tabSize)) {
int c;
while ((c = rdr.read()) != -1) {
bld.append((char) c);
}
source = bld.toString();
}
// Deserialize the token stream.
DefinitionsTokenStream tokstream = new DefinitionsTokenStream();
tokstream.initialize(defs, src, in -> ExpandTabsReader.wrap(in, tabSize));
// Iterate through stream.
CharTermAttribute term = tokstream.getAttribute(CharTermAttribute.class);
assertNotNull(term, "CharTermAttribute");
OffsetAttribute offs = tokstream.getAttribute(OffsetAttribute.class);
assertNotNull(offs, "OffsetAttribute");
int count = 0;
while (tokstream.incrementToken()) {
++count;
String termValue = term.toString();
String cutValue = source.substring(offs.startOffset(), offs.endOffset());
// If an override exists, test it specially.
if (overrides != null && overrides.containsKey(count)) {
SimpleEntry<String, String> overkv = overrides.get(count);
assertEquals(overkv.getKey(), cutValue, "cut term override" + count);
assertEquals(overkv.getValue(), termValue, "cut term w.r.t. term override" + count);
continue;
}
boolean cutContainsTerm = cutValue.endsWith(termValue);
assertTrue(cutContainsTerm, "cut term" + count + " at " + (offs.startOffset()) + "-" + (offs.endOffset()) + "[" + cutValue + "] vs [" + termValue + "]");
}
assertEquals(expectedCount, count, "token count");
}
use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class PascalAnalyzerFactoryTest method testAnalyzer.
/**
* Test of writeXref method, of class PascalAnalyzerFactory.
*
* @throws java.lang.Exception exception
*/
@Test
void testAnalyzer() throws Exception {
String path = repository.getSourceRoot() + "/pascal/Sample.pas";
File f = new File(path);
assertTrue(f.canRead() && f.isFile(), "pascal testfile " + f + " not found");
Document doc = new Document();
doc.add(new Field(QueryBuilder.FULLPATH, path, string_ft_nstored_nanalyzed_norms));
StringWriter xrefOut = new StringWriter();
analyzer.setCtags(ctags);
analyzer.setScopesEnabled(true);
analyzer.analyze(doc, getStreamSource(path), xrefOut);
Definitions definitions = Definitions.deserialize(doc.getField(QueryBuilder.TAGS).binaryValue().bytes);
assertNotNull(definitions);
String[] type = new String[1];
assertTrue(definitions.hasDefinitionAt("Sample", 22, type));
assertThat(type[0], is("unit"));
assertTrue(definitions.hasDefinitionAt("TSample", 28, type));
assertThat(type[0], is("class"));
assertTrue(definitions.hasDefinitionAt("Id", 40, type));
assertThat(type[0], is("property"));
assertTrue(definitions.hasDefinitionAt("Description", 41, type));
assertThat(type[0], is("property"));
assertTrue(definitions.hasDefinitionAt("TSample.GetId", 48, type));
assertThat(type[0], is("function"));
assertTrue(definitions.hasDefinitionAt("TSample.SetId", 53, type));
assertThat(type[0], is("procedure"));
assertTrue(definitions.hasDefinitionAt("TSample.GetClassName", 58, type));
assertThat(type[0], is("function"));
assertTrue(definitions.hasDefinitionAt("TSample.GetUser", 63, type));
assertThat(type[0], is("function"));
}
Aggregations