use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class PlainAnalyzer method analyze.
@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
Definitions defs = null;
NullWriter nullWriter = null;
doc.add(new OGKTextField(QueryBuilder.FULL, getReader(src.getStream())));
String fullPath = doc.get(QueryBuilder.FULLPATH);
if (fullPath != null && ctags != null) {
defs = ctags.doCtags(fullPath);
if (defs != null && defs.numberOfSymbols() > 0) {
tryAddingDefs(doc, defs, src);
byte[] tags = defs.serialize();
doc.add(new StoredField(QueryBuilder.TAGS, tags));
}
}
/*
* This is to explicitly use appropriate analyzer's token stream to
* work around #1376: symbols search works like full text search.
*/
JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
OGKTextField ref = new OGKTextField(QueryBuilder.REFS, symbolTokenizer);
symbolTokenizer.setReader(getReader(src.getStream()));
doc.add(ref);
if (scopesEnabled && xrefOut == null) {
/*
* Scopes are generated during xref generation. If xrefs are
* turned off we still need to run writeXref() to produce scopes,
* we use a dummy writer that will throw away any xref output.
*/
nullWriter = new NullWriter();
xrefOut = nullWriter;
}
if (xrefOut != null) {
try (Reader in = getReader(src.getStream())) {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
args.setDefs(defs);
args.setProject(project);
CompletableFuture<XrefWork> future = CompletableFuture.supplyAsync(() -> {
try {
return new XrefWork(writeXref(args));
} catch (IOException e) {
return new XrefWork(e);
}
}, env.getIndexerParallelizer().getXrefWatcherExecutor()).orTimeout(env.getXrefTimeout(), TimeUnit.SECONDS);
// Will throw ExecutionException wrapping TimeoutException on timeout.
XrefWork xrefWork = future.get();
Xrefer xref = xrefWork.xrefer;
if (xref != null) {
Scopes scopes = xref.getScopes();
if (scopes.size() > 0) {
byte[] scopesSerialized = scopes.serialize();
doc.add(new StoredField(QueryBuilder.SCOPES, scopesSerialized));
}
String path = doc.get(QueryBuilder.PATH);
addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
} else {
// Re-throw the exception from writeXref().
throw new IOException(xrefWork.exception);
}
} catch (ExecutionException e) {
throw new InterruptedException("failed to generate xref :" + e);
} finally {
if (nullWriter != null) {
nullWriter.close();
}
}
}
}
use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class Results method printPlain.
private static void printPlain(PrintPlainFinalArgs fargs, Document doc, int docId, String rpath) throws ClassNotFoundException, IOException {
fargs.shelp.getSourceContext().toggleAlt();
boolean didPresentNew = fargs.shelp.getSourceContext().getContext2(fargs.env, fargs.shelp.getSearcher(), docId, fargs.out, fargs.xrefPrefix, fargs.morePrefix, true, fargs.tabSize);
if (!didPresentNew) {
/*
* Fall back to the old view, which re-analyzes text using
* PlainLinetokenizer. E.g., when source code is updated (thus
* affecting timestamps) but re-indexing is not yet complete.
*/
Definitions tags = null;
IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
if (tagsField != null) {
tags = Definitions.deserialize(tagsField.binaryValue().bytes);
}
Scopes scopes;
IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
if (scopesField != null) {
scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
} else {
scopes = new Scopes();
}
boolean isDefSearch = fargs.shelp.getBuilder().isDefSearch();
// SRCROOT is read with UTF-8 as a default.
File sourceFile = new File(fargs.shelp.getSourceRoot(), rpath);
try (FileInputStream fis = new FileInputStream(sourceFile);
Reader r = IOUtils.createBOMStrippedReader(fis, StandardCharsets.UTF_8.name())) {
fargs.shelp.getSourceContext().getContext(r, fargs.out, fargs.xrefPrefix, fargs.morePrefix, rpath, tags, true, isDefSearch, null, scopes);
} catch (IOException ex) {
String errMsg = String.format("No context for %s", sourceFile);
if (LOGGER.isLoggable(Level.FINE)) {
// WARNING but with FINE detail
LOGGER.log(Level.WARNING, errMsg, ex);
} else {
LOGGER.log(Level.WARNING, errMsg);
}
}
}
}
use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class Context method getContext.
/**
* ???.
* Closes the given <var>in</var> reader on return.
*
* @param in File to be matched
* @param out to write the context
* @param urlPrefix URL prefix
* @param morePrefix to link to more... page
* @param path path of the file
* @param tags format to highlight defs.
* @param limit should the number of matching lines be limited?
* @param isDefSearch is definition search
* @param hits list of hits
* @param scopes scopes object
* @return Did it get any matching context?
*/
public boolean getContext(Reader in, Writer out, String urlPrefix, String morePrefix, String path, Definitions tags, boolean limit, boolean isDefSearch, List<Hit> hits, Scopes scopes) {
if (m == null) {
IOUtils.close(in);
return false;
}
boolean anything = false;
TreeMap<Integer, String[]> matchingTags = null;
String urlPrefixE = (urlPrefix == null) ? "" : Util.uriEncodePath(urlPrefix);
String pathE = Util.uriEncodePath(path);
if (tags != null) {
matchingTags = new TreeMap<>();
try {
for (Definitions.Tag tag : tags.getTags()) {
for (LineMatcher lineMatcher : m) {
if (lineMatcher.match(tag.symbol) == LineMatcher.MATCHED) {
String scope = null;
String scopeUrl = null;
if (scopes != null) {
Scope scp = scopes.getScope(tag.line);
scope = scp.getName() + "()";
scopeUrl = "<a href=\"" + urlPrefixE + pathE + "#" + scp.getLineFrom() + "\">" + scope + "</a>";
}
/* desc[0] is matched symbol
* desc[1] is line number
* desc[2] is type
* desc[3] is matching line;
* desc[4] is scope
*/
String[] desc = { tag.symbol, Integer.toString(tag.line), tag.type, tag.text, scope };
if (in == null) {
if (out == null) {
Hit hit = new Hit(path, Util.htmlize(desc[3]).replace(desc[0], "<b>" + desc[0] + "</b>"), desc[1], false, alt);
hits.add(hit);
} else {
out.write("<a class=\"s\" href=\"");
out.write(urlPrefixE);
out.write(pathE);
out.write("#");
out.write(desc[1]);
out.write("\"><span class=\"l\">");
out.write(desc[1]);
out.write("</span> ");
out.write(Util.htmlize(desc[3]).replace(desc[0], "<b>" + desc[0] + "</b>"));
out.write("</a> ");
if (desc[4] != null) {
out.write("<span class=\"scope\"><a href\"");
out.write(scopeUrl);
out.write("\">in ");
out.write(desc[4]);
out.write("</a></span> ");
}
out.write("<i>");
out.write(desc[2]);
out.write("</i><br/>");
}
anything = true;
} else {
matchingTags.put(tag.line, desc);
}
break;
}
}
}
} catch (Exception e) {
if (hits != null) {
// @todo verify why we ignore all exceptions?
LOGGER.log(Level.WARNING, "Could not get context for " + path, e);
}
}
}
// Just to get the matching tag send a null in
if (in == null) {
return anything;
}
PlainLineTokenizer tokens = new PlainLineTokenizer(null);
boolean truncated = false;
boolean lim = limit;
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
if (!env.isQuickContextScan()) {
lim = false;
}
if (lim) {
char[] buffer = new char[MAXFILEREAD];
int charsRead;
try {
charsRead = in.read(buffer);
if (charsRead == MAXFILEREAD) {
// we probably only read parts of the file, so set the
// truncated flag to enable the [all...] link that
// requests all matches
truncated = true;
// characters back)
for (int i = charsRead - 1; i > charsRead - 100; i--) {
if (buffer[i] == '\n') {
charsRead = i;
break;
}
}
}
} catch (IOException e) {
LOGGER.log(Level.WARNING, "An error occurred while reading data", e);
return anything;
}
if (charsRead == 0) {
return anything;
}
tokens.reInit(buffer, charsRead, out, urlPrefixE + pathE + "#", matchingTags, scopes);
} else {
tokens.reInit(in, out, urlPrefixE + pathE + "#", matchingTags, scopes);
}
if (hits != null) {
tokens.setAlt(alt);
tokens.setHitList(hits);
tokens.setFilename(path);
}
int limit_max_lines = env.getContextLimit();
try {
String token;
int matchState;
int matchedLines = 0;
while ((token = tokens.yylex()) != null && (!lim || matchedLines < limit_max_lines)) {
for (LineMatcher lineMatcher : m) {
matchState = lineMatcher.match(token);
if (matchState == LineMatcher.MATCHED) {
if (!isDefSearch) {
tokens.printContext();
} else if (tokens.tags.containsKey(tokens.markedLine)) {
tokens.printContext();
}
matchedLines++;
break;
} else if (matchState == LineMatcher.WAIT) {
tokens.holdOn();
} else {
tokens.neverMind();
}
}
}
anything = matchedLines > 0;
tokens.dumpRest();
if (lim && (truncated || matchedLines == limit_max_lines) && out != null) {
out.write("<a href=\"" + Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI + "\">[all...]</a>");
}
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Could not get context for " + path, e);
} finally {
IOUtils.close(in);
if (out != null) {
try {
out.flush();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Failed to flush stream: ", e);
}
}
}
return anything;
}
use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class Context method getContext2.
/**
* Look for context for this instance's initialized query in a search result
* {@link Document}, and output according to the parameters.
* @param env required environment
* @param searcher required search that produced the document
* @param docId document ID for producing context
* @param dest required target to write
* @param urlPrefix prefix for links
* @param morePrefix optional link to more... page
* @param limit a value indicating if the number of matching lines should be
* limited. N.b. unlike
* {@link #getContext(java.io.Reader, java.io.Writer, java.lang.String, java.lang.String, java.lang.String,
* org.opengrok.indexer.analysis.Definitions, boolean, boolean, java.util.List, org.opengrok.indexer.analysis.Scopes)},
* the {@code limit} argument will not be interpreted w.r.t.
* {@link RuntimeEnvironment#isQuickContextScan()}.
* @param tabSize optional positive tab size that must accord with the value
* used when indexing or else postings may be wrongly shifted until
* re-indexing
* @return Did it get any matching context?
*/
public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, int docId, Appendable dest, String urlPrefix, String morePrefix, boolean limit, int tabSize) {
if (isEmpty()) {
return false;
}
Document doc;
try {
doc = searcher.doc(docId);
} catch (IOException e) {
LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e);
return false;
}
Definitions tags = null;
try {
IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
if (tagsField != null) {
tags = Definitions.deserialize(tagsField.binaryValue().bytes);
}
} catch (ClassNotFoundException | IOException e) {
LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e);
return false;
}
Scopes scopes;
try {
IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
if (scopesField != null) {
scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
} else {
scopes = new Scopes();
}
} catch (ClassNotFoundException | IOException e) {
LOGGER.log(Level.WARNING, "ERROR Scopes.deserialize(...)", e);
return false;
}
/*
* UnifiedHighlighter demands an analyzer "even if in some
* circumstances it isn't used"; here it is not meant to be used.
*/
PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE;
AbstractAnalyzer anz = fac.getAnalyzer();
String path = doc.get(QueryBuilder.PATH);
String pathE = Util.uriEncodePath(path);
String urlPrefixE = urlPrefix == null ? "" : Util.uriEncodePath(urlPrefix);
String moreURL = morePrefix == null ? null : Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI;
ContextArgs args = new ContextArgs(env.getContextSurround(), env.getContextLimit());
/*
* Lucene adds to the following value in FieldHighlighter, so avoid
* integer overflow by not using Integer.MAX_VALUE -- Short is good
* enough.
*/
int linelimit = limit ? args.getContextLimit() : Short.MAX_VALUE;
ContextFormatter formatter = new ContextFormatter(args);
formatter.setUrl(urlPrefixE + pathE);
formatter.setDefs(tags);
formatter.setScopes(scopes);
formatter.setMoreUrl(moreURL);
formatter.setMoreLimit(linelimit);
OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, searcher, anz);
uhi.setBreakIterator(StrictLineBreakIterator::new);
uhi.setFormatter(formatter);
uhi.setTabSize(tabSize);
try {
List<String> fieldList = qbuilder.getContextFields();
String[] fields = fieldList.toArray(new String[0]);
String res = uhi.highlightFieldsUnion(fields, query, docId, linelimit);
if (res != null) {
dest.append(res);
return true;
}
} catch (IOException e) {
LOGGER.log(Level.WARNING, "ERROR highlightFieldsUnion(...)", e);
// Continue below.
} catch (Throwable e) {
LOGGER.log(Level.SEVERE, "ERROR highlightFieldsUnion(...)", e);
throw e;
}
return false;
}
use of org.opengrok.indexer.analysis.Definitions in project OpenGrok by OpenGrok.
the class SearchHelper method maybeRedirectToDefinition.
private void maybeRedirectToDefinition(int docID, TermQuery termQuery) throws IOException, ClassNotFoundException {
// Bug #3900: Check if this is a search for a single term, and that
// term is a definition. If that's the case, and we only have one match,
// we'll generate a direct link instead of a listing.
//
// Attempt to create a direct link to the definition if we search for
// one single definition term AND we have exactly one match AND there
// is only one definition of that symbol in the document that matches.
Document doc = searcher.doc(docID);
IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
if (tagsField != null) {
byte[] rawTags = tagsField.binaryValue().bytes;
Definitions tags = Definitions.deserialize(rawTags);
String symbol = termQuery.getTerm().text();
if (tags.occurrences(symbol) == 1) {
String anchor = Util.uriEncode(symbol);
redirect = contextPath + Prefix.XREF_P + Util.uriEncodePath(doc.get(QueryBuilder.PATH)) + '?' + QueryParameters.FRAGMENT_IDENTIFIER_PARAM_EQ + anchor + '#' + anchor;
}
}
}
Aggregations