use of org.opengrok.indexer.analysis.NumLinesLOC in project OpenGrok by OpenGrok.
the class TroffAnalyzer method analyze.
@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
// this is to explicitly use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search
JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
symbolTokenizer.setReader(getReader(src.getStream()));
OGKTextField full = new OGKTextField(QueryBuilder.FULL, symbolTokenizer);
doc.add(full);
if (xrefOut != null) {
try (Reader in = getReader(src.getStream())) {
WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
args.setProject(project);
Xrefer xref = writeXref(args);
String path = doc.get(QueryBuilder.PATH);
addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
}
}
}
use of org.opengrok.indexer.analysis.NumLinesLOC in project OpenGrok by OpenGrok.
the class IndexDatabase method removeFile.
/**
* Remove a stale file (uidIter.term().text()) from the index database and
* history cache, and queue the removal of xref.
*
* @param removeHistory if false, do not remove history cache for this file
* @throws java.io.IOException if an error occurs
*/
private void removeFile(boolean removeHistory) throws IOException {
String path = Util.uid2url(uidIter.term().utf8ToString());
for (IndexChangedListener listener : listeners) {
listener.fileRemove(path);
}
// Determine if a reversal of counts is necessary, and execute if so.
if (isCountingDeltas) {
postsIter = uidIter.postings(postsIter);
while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
// Read a limited-fields version of the document.
Document doc = reader.document(postsIter.docID(), REVERT_COUNTS_FIELDS);
if (doc != null) {
NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc);
if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) {
NumLinesLOC counts = new NumLinesLOC(path, -nullableCounts.getNumLines(), -nullableCounts.getLOC());
countsAggregator.register(counts);
}
break;
}
}
}
writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term()));
removeXrefFile(path);
if (removeHistory) {
removeHistoryFile(path);
}
setDirty();
for (IndexChangedListener listener : listeners) {
listener.fileRemoved(path);
}
}
use of org.opengrok.indexer.analysis.NumLinesLOC in project OpenGrok by OpenGrok.
the class IndexDatabase method update.
/**
* Update the content of this index database.
*
* @throws IOException if an error occurs
*/
public void update() throws IOException {
synchronized (lock) {
if (running) {
throw new IOException("Indexer already running!");
}
running = true;
interrupted = false;
}
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
reader = null;
writer = null;
settings = null;
uidIter = null;
postsIter = null;
indexedSymlinks.clear();
IOException finishingException = null;
try {
Analyzer analyzer = AnalyzerGuru.getAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iwc.setRAMBufferSizeMB(env.getRamBufferSize());
writer = new IndexWriter(indexDirectory, iwc);
// to make sure index exists on the disk
writer.commit();
completer = new PendingFileCompleter();
if (directories.isEmpty()) {
if (project == null) {
directories.add("");
} else {
directories.add(project.getPath());
}
}
for (String dir : directories) {
File sourceRoot;
if ("".equals(dir)) {
sourceRoot = env.getSourceRootFile();
} else {
sourceRoot = new File(env.getSourceRootFile(), dir);
}
dir = Util.fixPathIfWindows(dir);
String startuid = Util.path2uid(dir, "");
// open existing index
reader = DirectoryReader.open(indexDirectory);
countsAggregator = new NumLinesLOCAggregator();
settings = readAnalysisSettings();
if (settings == null) {
settings = new IndexAnalysisSettings3();
}
Terms terms = null;
if (reader.numDocs() > 0) {
terms = MultiTerms.getTerms(reader, QueryBuilder.U);
NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
if (countsAccessor.hasStored(reader)) {
isWithDirectoryCounts = true;
isCountingDeltas = true;
} else {
boolean foundCounts = countsAccessor.register(countsAggregator, reader);
isWithDirectoryCounts = false;
isCountingDeltas = foundCounts;
if (!isCountingDeltas) {
LOGGER.info("Forcing reindexing to fully compute directory counts");
}
}
} else {
isWithDirectoryCounts = false;
isCountingDeltas = false;
}
try {
if (terms != null) {
uidIter = terms.iterator();
// init uid
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid));
if (stat == TermsEnum.SeekStatus.END) {
uidIter = null;
LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", startuid);
}
}
// The actual indexing happens in indexParallel().
IndexDownArgs args = new IndexDownArgs();
Statistics elapsed = new Statistics();
LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir);
indexDown(sourceRoot, dir, args);
elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), "indexer.db.directory.traversal");
showFileCount(dir, args);
args.cur_count = 0;
elapsed = new Statistics();
LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
indexParallel(dir, args);
elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), "indexer.db.directory.index");
// removed and have higher ordering than any present files.
while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
removeFile(true);
BytesRef next = uidIter.next();
if (next == null) {
uidIter = null;
}
}
/*
* As a signifier that #Lines/LOC are comprehensively
* stored so that later calculation is in deltas mode, we
* need at least one D-document saved. For a repo with only
* non-code files, however, no true #Lines/LOC will have
* been saved. Subsequent re-indexing will do more work
* than necessary (until a source code file is placed). We
* can record zeroes for a fake file under the root to get
* a D-document even for this special repo situation.
*
* Metrics are aggregated for directories up to the root,
* so it suffices to put the fake directly under the root.
*/
if (!isWithDirectoryCounts) {
final String ROOT_FAKE_FILE = "/.OpenGrok_fake_file";
countsAggregator.register(new NumLinesLOC(ROOT_FAKE_FILE, 0, 0));
}
NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
countsAccessor.store(writer, reader, countsAggregator, isWithDirectoryCounts && isCountingDeltas);
markProjectIndexed(project);
} finally {
reader.close();
}
}
// This is deliberate.
try {
finishWriting();
} catch (IOException e) {
finishingException = e;
}
} catch (RuntimeException ex) {
LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex);
throw ex;
} finally {
completer = null;
try {
if (writer != null) {
writer.close();
}
} catch (IOException e) {
if (finishingException == null) {
finishingException = e;
}
LOGGER.log(Level.WARNING, "An error occurred while closing writer", e);
} finally {
writer = null;
synchronized (lock) {
running = false;
}
}
}
if (finishingException != null) {
throw finishingException;
}
if (!isInterrupted() && isDirty()) {
if (env.isOptimizeDatabase()) {
optimize();
}
env.setIndexTimestamp();
}
}
use of org.opengrok.indexer.analysis.NumLinesLOC in project OpenGrok by OpenGrok.
the class NumLinesLOCAccessor method processFileCounts.
private boolean processFileCounts(NumLinesLOCAggregator countsAggregator, IndexSearcher searcher, TopDocs hits) throws IOException {
boolean hasDefinedNumLines = false;
for (ScoreDoc sd : hits.scoreDocs) {
Document d = searcher.doc(sd.doc);
NullableNumLinesLOC counts = NumLinesLOCUtil.read(d);
if (counts.getNumLines() != null && counts.getLOC() != null) {
NumLinesLOC defCounts = new NumLinesLOC(counts.getPath(), counts.getNumLines(), counts.getLOC());
countsAggregator.register(defCounts);
hasDefinedNumLines = true;
}
}
return hasDefinedNumLines;
}
Aggregations