use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.
the class HaskellXrefTest method basicTest.
@Test
public void basicTest() throws IOException {
String s = "putStrLn \"Hello, world!\"";
Writer w = new StringWriter();
HaskellAnalyzerFactory fac = new HaskellAnalyzerFactory();
AbstractAnalyzer analyzer = fac.getAnalyzer();
WriteXrefArgs xargs = new WriteXrefArgs(new StringReader(s), w);
Xrefer xref = analyzer.writeXref(xargs);
assertLinesEqual("Haskell basicTest", "<a class=\"l\" name=\"1\" href=\"#1\">1</a>" + "<a href=\"/source/s?defs=putStrLn\" class=\"intelliWindow-symbol\"" + " data-definition-place=\"undefined-in-file\">putStrLn</a>" + " <span class=\"s\">"Hello, world!"</span>\n", w.toString());
assertEquals(1, xref.getLOC(), "Haskell LOC");
}
use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.
the class GZIPAnalyzer method analyze.
@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
AbstractAnalyzer fa;
StreamSource gzSrc = wrap(src);
String path = doc.get(QueryBuilder.PATH);
if (path != null && path.toLowerCase(Locale.ROOT).endsWith(".gz")) {
String newname = path.substring(0, path.length() - 3);
// System.err.println("GZIPPED OF = " + newname);
try (InputStream gzis = gzSrc.getStream()) {
fa = AnalyzerGuru.getAnalyzer(gzis, newname);
}
if (fa == null) {
this.g = Genre.DATA;
LOGGER.log(Level.WARNING, "Did not analyze {0}, detected as data.", newname);
// TODO we could probably wrap tar analyzer here, need to do research on reader coming from gzis ...
} else {
// simple file gziped case captured here
if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) {
this.g = Genre.XREFABLE;
} else {
this.g = Genre.DATA;
}
fa.analyze(doc, gzSrc, xrefOut);
if (doc.get(QueryBuilder.T) != null) {
doc.removeField(QueryBuilder.T);
if (g == Genre.XREFABLE) {
doc.add(new Field(QueryBuilder.T, g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms));
}
}
}
}
}
use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.
the class Context method getContext2.
/**
* Look for context for this instance's initialized query in a search result
* {@link Document}, and output according to the parameters.
* @param env required environment
* @param searcher required search that produced the document
* @param docId document ID for producing context
* @param dest required target to write
* @param urlPrefix prefix for links
* @param morePrefix optional link to more... page
* @param limit a value indicating if the number of matching lines should be
* limited. N.b. unlike
* {@link #getContext(java.io.Reader, java.io.Writer, java.lang.String, java.lang.String, java.lang.String,
* org.opengrok.indexer.analysis.Definitions, boolean, boolean, java.util.List, org.opengrok.indexer.analysis.Scopes)},
* the {@code limit} argument will not be interpreted w.r.t.
* {@link RuntimeEnvironment#isQuickContextScan()}.
* @param tabSize optional positive tab size that must accord with the value
* used when indexing or else postings may be wrongly shifted until
* re-indexing
* @return Did it get any matching context?
*/
public boolean getContext2(RuntimeEnvironment env, IndexSearcher searcher, int docId, Appendable dest, String urlPrefix, String morePrefix, boolean limit, int tabSize) {
if (isEmpty()) {
return false;
}
Document doc;
try {
doc = searcher.doc(docId);
} catch (IOException e) {
LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e);
return false;
}
Definitions tags = null;
try {
IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
if (tagsField != null) {
tags = Definitions.deserialize(tagsField.binaryValue().bytes);
}
} catch (ClassNotFoundException | IOException e) {
LOGGER.log(Level.WARNING, "ERROR Definitions.deserialize(...)", e);
return false;
}
Scopes scopes;
try {
IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
if (scopesField != null) {
scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
} else {
scopes = new Scopes();
}
} catch (ClassNotFoundException | IOException e) {
LOGGER.log(Level.WARNING, "ERROR Scopes.deserialize(...)", e);
return false;
}
/*
* UnifiedHighlighter demands an analyzer "even if in some
* circumstances it isn't used"; here it is not meant to be used.
*/
PlainAnalyzerFactory fac = PlainAnalyzerFactory.DEFAULT_INSTANCE;
AbstractAnalyzer anz = fac.getAnalyzer();
String path = doc.get(QueryBuilder.PATH);
String pathE = Util.uriEncodePath(path);
String urlPrefixE = urlPrefix == null ? "" : Util.uriEncodePath(urlPrefix);
String moreURL = morePrefix == null ? null : Util.uriEncodePath(morePrefix) + pathE + "?" + queryAsURI;
ContextArgs args = new ContextArgs(env.getContextSurround(), env.getContextLimit());
/*
* Lucene adds to the following value in FieldHighlighter, so avoid
* integer overflow by not using Integer.MAX_VALUE -- Short is good
* enough.
*/
int linelimit = limit ? args.getContextLimit() : Short.MAX_VALUE;
ContextFormatter formatter = new ContextFormatter(args);
formatter.setUrl(urlPrefixE + pathE);
formatter.setDefs(tags);
formatter.setScopes(scopes);
formatter.setMoreUrl(moreURL);
formatter.setMoreLimit(linelimit);
OGKUnifiedHighlighter uhi = new OGKUnifiedHighlighter(env, searcher, anz);
uhi.setBreakIterator(StrictLineBreakIterator::new);
uhi.setFormatter(formatter);
uhi.setTabSize(tabSize);
try {
List<String> fieldList = qbuilder.getContextFields();
String[] fields = fieldList.toArray(new String[0]);
String res = uhi.highlightFieldsUnion(fields, query, docId, linelimit);
if (res != null) {
dest.append(res);
return true;
}
} catch (IOException e) {
LOGGER.log(Level.WARNING, "ERROR highlightFieldsUnion(...)", e);
// Continue below.
} catch (Throwable e) {
LOGGER.log(Level.SEVERE, "ERROR highlightFieldsUnion(...)", e);
throw e;
}
return false;
}
use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.
the class IndexDatabase method addFile.
/**
* Add a file to the Lucene index (and generate a xref file).
*
* @param file The file to add
* @param path The path to the file (from source root)
* @param ctags a defined instance to use (only if its binary is not null)
* @throws java.io.IOException if an error occurs
* @throws InterruptedException if a timeout occurs
*/
private void addFile(File file, String path, Ctags ctags) throws IOException, InterruptedException {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
AbstractAnalyzer fa = getAnalyzerFor(file, path);
for (IndexChangedListener listener : listeners) {
listener.fileAdd(path, fa.getClass().getSimpleName());
}
ctags.setTabSize(project != null ? project.getTabSize() : 0);
if (env.getCtagsTimeout() != 0) {
ctags.setTimeout(env.getCtagsTimeout());
}
fa.setCtags(ctags);
fa.setCountsAggregator(countsAggregator);
fa.setProject(Project.getProject(path));
fa.setScopesEnabled(env.isScopesEnabled());
fa.setFoldingEnabled(env.isFoldingEnabled());
Document doc = new Document();
CountingWriter xrefOut = null;
try {
String xrefAbs = null;
File transientXref = null;
if (env.isGenerateHtml()) {
xrefAbs = getXrefPath(path);
transientXref = new File(TandemPath.join(xrefAbs, PendingFileCompleter.PENDING_EXTENSION));
xrefOut = newXrefWriter(path, transientXref, env.isCompressXref());
}
analyzerGuru.populateDocument(doc, file, path, fa, xrefOut);
// Avoid producing empty xref files.
if (xrefOut != null && xrefOut.getCount() > 0) {
PendingFileRenaming ren = new PendingFileRenaming(xrefAbs, transientXref.getAbsolutePath());
completer.add(ren);
} else if (xrefOut != null) {
LOGGER.log(Level.FINER, "xref for {0} would be empty, will remove", path);
completer.add(new PendingFileDeletion(transientXref.toString()));
}
} catch (InterruptedException e) {
LOGGER.log(Level.WARNING, "File ''{0}'' interrupted--{1}", new Object[] { path, e.getMessage() });
cleanupResources(doc);
throw e;
} catch (Exception e) {
LOGGER.log(Level.INFO, "Skipped file ''{0}'' because the analyzer didn''t " + "understand it.", path);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "Exception from analyzer " + fa.getClass().getName(), e);
}
cleanupResources(doc);
return;
} finally {
fa.setCtags(null);
fa.setCountsAggregator(null);
if (xrefOut != null) {
xrefOut.close();
}
}
try {
writer.addDocument(doc);
} catch (Throwable t) {
cleanupResources(doc);
throw t;
}
setDirty();
for (IndexChangedListener listener : listeners) {
listener.fileAdded(path, fa.getClass().getSimpleName());
}
}
use of org.opengrok.indexer.analysis.AbstractAnalyzer in project OpenGrok by OpenGrok.
the class IndexDatabase method checkSettings.
/**
* Verify TABSIZE, and evaluate AnalyzerGuru version together with ZVER --
* or return a value to indicate mismatch.
* @param file the source file object
* @param path the source file path
* @return {@code false} if a mismatch is detected
*/
private boolean checkSettings(File file, String path) throws IOException {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
// potential xref writer
boolean outIsXrefWriter = false;
int reqTabSize = project != null && project.hasTabSizeSetting() ? project.getTabSize() : 0;
Integer actTabSize = settings.getTabSize();
if (actTabSize != null && !actTabSize.equals(reqTabSize)) {
LOGGER.log(Level.FINE, "Tabsize mismatch: {0}", path);
return false;
}
int n = 0;
postsIter = uidIter.postings(postsIter);
while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
++n;
// Read a limited-fields version of the document.
Document doc = reader.document(postsIter.docID(), CHECK_FIELDS);
if (doc == null) {
LOGGER.log(Level.FINER, "No Document: {0}", path);
continue;
}
long reqGuruVersion = AnalyzerGuru.getVersionNo();
Long actGuruVersion = settings.getAnalyzerGuruVersion();
/*
* For an older OpenGrok index that does not yet have a defined,
* stored analyzerGuruVersion, break so that no extra work is done.
* After a re-index, the guru version check will be active.
*/
if (actGuruVersion == null) {
break;
}
AbstractAnalyzer fa = null;
String fileTypeName;
if (actGuruVersion.equals(reqGuruVersion)) {
fileTypeName = doc.get(QueryBuilder.TYPE);
if (fileTypeName == null) {
// (Should not get here, but break just in case.)
LOGGER.log(Level.FINEST, "Missing TYPE field: {0}", path);
break;
}
AnalyzerFactory fac = AnalyzerGuru.findByFileTypeName(fileTypeName);
if (fac != null) {
fa = fac.getAnalyzer();
}
} else {
/*
* If the stored guru version does not match, re-verify the
* selection of analyzer or return a value to indicate the
* analyzer is now mis-matched.
*/
LOGGER.log(Level.FINER, "Guru version mismatch: {0}", path);
fa = getAnalyzerFor(file, path);
fileTypeName = fa.getFileTypeName();
String oldTypeName = doc.get(QueryBuilder.TYPE);
if (!fileTypeName.equals(oldTypeName)) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "Changed {0} to {1}: {2}", new Object[] { oldTypeName, fileTypeName, path });
}
return false;
}
}
// Verify Analyzer version, or return a value to indicate mismatch.
long reqVersion = AnalyzerGuru.getAnalyzerVersionNo(fileTypeName);
Long actVersion = settings.getAnalyzerVersion(fileTypeName);
if (actVersion == null || !actVersion.equals(reqVersion)) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "{0} version mismatch: {1}", new Object[] { fileTypeName, path });
}
return false;
}
if (fa != null) {
outIsXrefWriter = true;
}
// The versions checks have passed.
break;
}
if (n < 1) {
LOGGER.log(Level.FINER, "Missing index Documents: {0}", path);
return false;
}
// If the economy mode is on, this should be treated as a match.
if (!env.isGenerateHtml()) {
if (xrefExistsFor(path)) {
LOGGER.log(Level.FINEST, "Extraneous {0} , removing its xref file", path);
removeXrefFile(path);
}
return true;
}
return (!outIsXrefWriter || xrefExistsFor(path));
}
Aggregations