use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.
the class Indexer method main.
/**
* Program entry point.
*
* @param argv argument vector
*/
@SuppressWarnings("PMD.UseStringBufferForStringAppends")
public static void main(String[] argv) {
// this won't count JVM creation though
Statistics stats = new Statistics();
boolean update = true;
Executor.registerErrorHandler();
List<String> subFiles = RuntimeEnvironment.getInstance().getSubFiles();
ArrayList<String> subFilesList = new ArrayList<>();
boolean createDict = false;
try {
argv = parseOptions(argv);
if (webappURI != null && !HostUtil.isReachable(webappURI, WEBAPP_CONNECT_TIMEOUT)) {
System.err.println(webappURI + " is not reachable.");
System.exit(1);
}
/*
* Attend to disabledRepositories here in case exitWithHelp() will
* need to report about repos.
*/
disabledRepositories.addAll(cfg.getDisabledRepositories());
cfg.setDisabledRepositories(disabledRepositories);
for (String repoName : disabledRepositories) {
LOGGER.log(Level.FINEST, "Disabled {0}", repoName);
}
if (help) {
exitWithHelp();
}
checkConfiguration();
if (awaitProfiler) {
pauseToAwaitProfiler();
}
env = RuntimeEnvironment.getInstance();
env.setIndexer(true);
// Complete the configuration of repository types.
List<Class<? extends Repository>> repositoryClasses = RepositoryFactory.getRepositoryClasses();
for (Class<? extends Repository> clazz : repositoryClasses) {
// Set external repository binaries from System properties.
try {
Field f = clazz.getDeclaredField("CMD_PROPERTY_KEY");
Object key = f.get(null);
if (key != null) {
cfg.setRepoCmd(clazz.getCanonicalName(), System.getProperty(key.toString()));
}
} catch (Exception e) {
// don't care
}
}
// Logging starts here.
if (verbose) {
String fn = LoggerUtil.getFileHandlerPattern();
if (fn != null) {
System.out.println("Logging filehandler pattern: " + fn);
}
}
// automatically allow symlinks that are directly in source root
File sourceRootFile = new File(cfg.getSourceRoot());
File[] projectDirs = sourceRootFile.listFiles();
if (projectDirs != null) {
for (File projectDir : projectDirs) {
if (!projectDir.getCanonicalPath().equals(projectDir.getAbsolutePath())) {
allowedSymlinks.add(projectDir.getAbsolutePath());
}
}
}
allowedSymlinks.addAll(cfg.getAllowedSymlinks());
cfg.setAllowedSymlinks(allowedSymlinks);
canonicalRoots.addAll(cfg.getCanonicalRoots());
cfg.setCanonicalRoots(canonicalRoots);
// This will be used to perform more fine-grained checking in invalidateRepositories().
for (String arg : argv) {
String path = Paths.get(cfg.getSourceRoot(), arg).toString();
subFilesList.add(path);
}
// according to the project key which is the same.
for (Entry<String, Project> entry : cfg.getProjects().entrySet()) {
if (entry.getValue().getName() == null) {
entry.getValue().setName(entry.getKey());
}
}
// with return code upon failure.
if (checkIndex) {
if (cfg.getDataRoot() == null || cfg.getDataRoot().isEmpty()) {
System.err.println("Need data root in configuration for index check (use -R)");
System.exit(1);
}
if (!IndexCheck.check(cfg, subFilesList)) {
System.err.printf("Index check failed%n");
System.err.print("You might want to remove " + (!subFilesList.isEmpty() ? "data for projects " + String.join(",", subFilesList) : "all data") + " under the data root and reindex\n");
System.exit(1);
}
System.exit(0);
}
// Set updated configuration in RuntimeEnvironment.
env.setConfiguration(cfg, subFilesList, CommandTimeoutType.INDEXER);
// Let repository types to add items to ignoredNames.
// This changes env so is called after the setConfiguration()
// call above.
RepositoryFactory.initializeIgnoredNames(env);
if (bareConfig) {
getInstance().sendToConfigHost(env, webappURI);
writeConfigToFile(env, configFilename);
System.exit(0);
}
/*
* Add paths to directories under source root. If projects
* are enabled the path should correspond to a project because
* project path is necessary to correctly set index directory
* (otherwise the index files will end up in index data root
* directory and not per project data root directory).
* For the check we need to have 'env' already set.
*/
for (String path : subFilesList) {
String srcPath = env.getSourceRootPath();
if (srcPath == null) {
System.err.println("Error getting source root from environment. Exiting.");
System.exit(1);
}
path = path.substring(srcPath.length());
if (env.hasProjects()) {
// The paths need to correspond to a project.
Project project;
if ((project = Project.getProject(path)) != null) {
subFiles.add(path);
List<RepositoryInfo> repoList = env.getProjectRepositoriesMap().get(project);
if (repoList != null) {
repositories.addAll(repoList.stream().map(RepositoryInfo::getDirectoryNameRelative).collect(Collectors.toSet()));
}
} else {
System.err.println("The path " + path + " does not correspond to a project");
}
} else {
subFiles.add(path);
}
}
if (!subFilesList.isEmpty() && subFiles.isEmpty()) {
System.err.println("None of the paths were added, exiting");
System.exit(1);
}
Metrics.updateSubFiles(subFiles);
// emitted during indexing do not cause validation error.
if (addProjects && webappURI != null) {
try {
IndexerUtil.enableProjects(webappURI);
} catch (Exception e) {
LOGGER.log(Level.SEVERE, String.format("Couldn't notify the webapp on %s.", webappURI), e);
System.err.printf("Couldn't notify the webapp on %s: %s.%n", webappURI, e.getLocalizedMessage());
}
}
LOGGER.log(Level.INFO, "Indexer version {0} ({1}) running on Java {2}", new Object[] { Info.getVersion(), Info.getRevision(), System.getProperty("java.version") });
// Create history cache first.
if (searchRepositories) {
if (searchPaths.isEmpty()) {
String[] dirs = env.getSourceRootFile().list((f, name) -> f.isDirectory() && env.getPathAccepter().accept(f));
if (dirs != null) {
searchPaths.addAll(Arrays.asList(dirs));
}
}
searchPaths = searchPaths.stream().map(t -> Paths.get(env.getSourceRootPath(), t).toString()).collect(Collectors.toSet());
}
getInstance().prepareIndexer(env, searchPaths, addProjects, createDict, runIndex, subFiles, new ArrayList<>(repositories));
// prepareIndexer() populated the list of projects so now default projects can be set.
env.setDefaultProjectsFromNames(defaultProjects);
// And now index it all.
if (runIndex || (optimizedChanged && env.isOptimizeDatabase())) {
IndexChangedListener progress = new DefaultIndexChangedListener();
getInstance().doIndexerExecution(update, subFiles, progress);
}
writeConfigToFile(env, configFilename);
// or send new configuration to the web application in the case of full reindex.
if (webappURI != null) {
if (!subFiles.isEmpty()) {
getInstance().refreshSearcherManagers(env, subFiles, webappURI);
} else {
getInstance().sendToConfigHost(env, webappURI);
}
}
env.getIndexerParallelizer().bounce();
} catch (ParseException e) {
System.err.println("** " + e.getMessage());
System.exit(1);
} catch (IndexerException ex) {
LOGGER.log(Level.SEVERE, "Exception running indexer", ex);
System.err.println("Exception: " + ex.getLocalizedMessage());
System.err.println(optParser.getUsage());
System.exit(1);
} catch (Throwable e) {
LOGGER.log(Level.SEVERE, "Unexpected Exception", e);
System.err.println("Exception: " + e.getLocalizedMessage());
System.exit(1);
} finally {
stats.report(LOGGER, "Indexer finished", "indexer.total");
}
}
use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.
the class Indexer method doIndexerExecution.
/**
* This is the second phase of the indexer which generates Lucene index
* by passing source code files through ctags, generating xrefs
* and storing data from the source files in the index (along with history,
* if any).
*
* @param update if set to true, index database is updated, otherwise optimized
* @param subFiles index just some subdirectories
* @param progress object to receive notifications as indexer progress is made
* @throws IOException if I/O exception occurred
*/
public void doIndexerExecution(final boolean update, List<String> subFiles, IndexChangedListener progress) throws IOException {
Statistics elapsed = new Statistics();
LOGGER.info("Starting indexing");
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
IndexerParallelizer parallelizer = env.getIndexerParallelizer();
final CountDownLatch latch;
if (subFiles == null || subFiles.isEmpty()) {
if (update) {
latch = IndexDatabase.updateAll(progress);
} else if (env.isOptimizeDatabase()) {
latch = IndexDatabase.optimizeAll();
} else {
latch = new CountDownLatch(0);
}
} else {
List<IndexDatabase> dbs = new ArrayList<>();
for (String path : subFiles) {
Project project = Project.getProject(path);
if (project == null && env.hasProjects()) {
LOGGER.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
} else {
IndexDatabase db;
if (project == null) {
db = new IndexDatabase();
} else {
db = new IndexDatabase(project);
}
int idx = dbs.indexOf(db);
if (idx != -1) {
db = dbs.get(idx);
}
if (db.addDirectory(path)) {
if (idx == -1) {
dbs.add(db);
}
} else {
LOGGER.log(Level.WARNING, "Directory does not exist \"{0}\"", path);
}
}
}
latch = new CountDownLatch(dbs.size());
for (final IndexDatabase db : dbs) {
final boolean optimize = env.isOptimizeDatabase();
db.addIndexChangedListener(progress);
parallelizer.getFixedExecutor().submit(() -> {
try {
if (update) {
db.update();
} else if (optimize) {
db.optimize();
}
} catch (Throwable e) {
LOGGER.log(Level.SEVERE, "An error occurred while " + (update ? "updating" : "optimizing") + " index", e);
} finally {
latch.countDown();
}
});
}
}
// Wait forever for the executors to finish.
try {
LOGGER.info("Waiting for the executors to finish");
latch.await();
} catch (InterruptedException exp) {
LOGGER.log(Level.WARNING, "Received interrupt while waiting" + " for executor to finish", exp);
}
elapsed.report(LOGGER, "Done indexing data of all repositories", "indexer.repository.indexing");
CtagsUtil.deleteTempFiles();
}
use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.
the class IndexDatabase method optimize.
/**
* Optimize the index database.
* @throws IOException I/O exception
*/
public void optimize() throws IOException {
synchronized (lock) {
if (running) {
LOGGER.warning("Optimize terminated... Someone else is updating / optimizing it!");
return;
}
running = true;
}
IndexWriter wrt = null;
IOException writerException = null;
try {
Statistics elapsed = new Statistics();
String projectDetail = this.project != null ? " for project " + project.getName() : "";
LOGGER.log(Level.INFO, "Optimizing the index{0}", projectDetail);
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
wrt = new IndexWriter(indexDirectory, conf);
// this is deprecated and not needed anymore
wrt.forceMerge(1);
elapsed.report(LOGGER, String.format("Done optimizing index%s", projectDetail), "indexer.db.optimize");
synchronized (lock) {
if (dirtyFile.exists() && !dirtyFile.delete()) {
LOGGER.log(Level.FINE, "Failed to remove \"dirty-file\": {0}", dirtyFile.getAbsolutePath());
}
dirty = false;
}
} catch (IOException e) {
writerException = e;
LOGGER.log(Level.SEVERE, "ERROR: optimizing index", e);
} finally {
if (wrt != null) {
try {
wrt.close();
} catch (IOException e) {
if (writerException == null) {
writerException = e;
}
LOGGER.log(Level.WARNING, "An error occurred while closing writer", e);
}
}
synchronized (lock) {
running = false;
}
}
if (writerException != null) {
throw writerException;
}
}
use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.
the class IndexDatabase method indexParallel.
/**
* Executes the second, parallel stage of indexing.
* @param dir the parent directory (when appended to SOURCE_ROOT)
* @param args contains a list of files to index, found during the earlier
* stage
*/
private void indexParallel(String dir, IndexDownArgs args) {
int worksCount = args.works.size();
if (worksCount < 1) {
return;
}
AtomicInteger successCounter = new AtomicInteger();
AtomicInteger currentCounter = new AtomicInteger();
AtomicInteger alreadyClosedCounter = new AtomicInteger();
IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer();
ObjectPool<Ctags> ctagsPool = parallelizer.getCtagsPool();
Map<Boolean, List<IndexFileWork>> bySuccess = null;
try (Progress progress = new Progress(LOGGER, dir, worksCount)) {
bySuccess = parallelizer.getForkJoinPool().submit(() -> args.works.parallelStream().collect(Collectors.groupingByConcurrent((x) -> {
int tries = 0;
Ctags pctags = null;
boolean ret;
Statistics stats = new Statistics();
while (true) {
try {
if (alreadyClosedCounter.get() > 0) {
ret = false;
} else {
pctags = ctagsPool.get();
addFile(x.file, x.path, pctags);
successCounter.incrementAndGet();
ret = true;
}
} catch (AlreadyClosedException e) {
alreadyClosedCounter.incrementAndGet();
String errmsg = String.format("ERROR addFile(): %s", x.file);
LOGGER.log(Level.SEVERE, errmsg, e);
x.exception = e;
ret = false;
} catch (InterruptedException e) {
// Allow one retry if interrupted
if (++tries <= 1) {
continue;
}
LOGGER.log(Level.WARNING, "No retry: {0}", x.file);
x.exception = e;
ret = false;
} catch (RuntimeException | IOException e) {
String errmsg = String.format("ERROR addFile(): %s", x.file);
LOGGER.log(Level.WARNING, errmsg, e);
x.exception = e;
ret = false;
} finally {
if (pctags != null) {
pctags.reset();
ctagsPool.release(pctags);
}
}
progress.increment();
stats.report(LOGGER, Level.FINEST, String.format("file ''%s'' %s", x.file, ret ? "indexed" : "failed indexing"));
return ret;
}
}))).get();
} catch (InterruptedException | ExecutionException e) {
int successCount = successCounter.intValue();
double successPct = 100.0 * successCount / worksCount;
String exmsg = String.format("%d successes (%.1f%%) after aborting parallel-indexing", successCount, successPct);
LOGGER.log(Level.SEVERE, exmsg, e);
}
args.cur_count = currentCounter.intValue();
// Start with failureCount=worksCount, and then subtract successes.
int failureCount = worksCount;
if (bySuccess != null) {
List<IndexFileWork> successes = bySuccess.getOrDefault(Boolean.TRUE, null);
if (successes != null) {
failureCount -= successes.size();
}
}
if (failureCount > 0) {
double pctFailed = 100.0 * failureCount / worksCount;
String exmsg = String.format("%d failures (%.1f%%) while parallel-indexing", failureCount, pctFailed);
LOGGER.log(Level.WARNING, exmsg);
}
/*
* Encountering an AlreadyClosedException is severe enough to abort the
* run, since it will fail anyway later upon trying to commit().
*/
int numAlreadyClosed = alreadyClosedCounter.get();
if (numAlreadyClosed > 0) {
throw new AlreadyClosedException(String.format("count=%d", numAlreadyClosed));
}
}
use of org.opengrok.indexer.util.Statistics in project OpenGrok by OpenGrok.
the class IndexDatabase method update.
/**
* Update the content of this index database.
*
* @throws IOException if an error occurs
*/
public void update() throws IOException {
synchronized (lock) {
if (running) {
throw new IOException("Indexer already running!");
}
running = true;
interrupted = false;
}
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
reader = null;
writer = null;
settings = null;
uidIter = null;
postsIter = null;
indexedSymlinks.clear();
IOException finishingException = null;
try {
Analyzer analyzer = AnalyzerGuru.getAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iwc.setRAMBufferSizeMB(env.getRamBufferSize());
writer = new IndexWriter(indexDirectory, iwc);
// to make sure index exists on the disk
writer.commit();
completer = new PendingFileCompleter();
if (directories.isEmpty()) {
if (project == null) {
directories.add("");
} else {
directories.add(project.getPath());
}
}
for (String dir : directories) {
File sourceRoot;
if ("".equals(dir)) {
sourceRoot = env.getSourceRootFile();
} else {
sourceRoot = new File(env.getSourceRootFile(), dir);
}
dir = Util.fixPathIfWindows(dir);
String startuid = Util.path2uid(dir, "");
// open existing index
reader = DirectoryReader.open(indexDirectory);
countsAggregator = new NumLinesLOCAggregator();
settings = readAnalysisSettings();
if (settings == null) {
settings = new IndexAnalysisSettings3();
}
Terms terms = null;
if (reader.numDocs() > 0) {
terms = MultiTerms.getTerms(reader, QueryBuilder.U);
NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
if (countsAccessor.hasStored(reader)) {
isWithDirectoryCounts = true;
isCountingDeltas = true;
} else {
boolean foundCounts = countsAccessor.register(countsAggregator, reader);
isWithDirectoryCounts = false;
isCountingDeltas = foundCounts;
if (!isCountingDeltas) {
LOGGER.info("Forcing reindexing to fully compute directory counts");
}
}
} else {
isWithDirectoryCounts = false;
isCountingDeltas = false;
}
try {
if (terms != null) {
uidIter = terms.iterator();
// init uid
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid));
if (stat == TermsEnum.SeekStatus.END) {
uidIter = null;
LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", startuid);
}
}
// The actual indexing happens in indexParallel().
IndexDownArgs args = new IndexDownArgs();
Statistics elapsed = new Statistics();
LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir);
indexDown(sourceRoot, dir, args);
elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), "indexer.db.directory.traversal");
showFileCount(dir, args);
args.cur_count = 0;
elapsed = new Statistics();
LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
indexParallel(dir, args);
elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), "indexer.db.directory.index");
// removed and have higher ordering than any present files.
while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
removeFile(true);
BytesRef next = uidIter.next();
if (next == null) {
uidIter = null;
}
}
/*
* As a signifier that #Lines/LOC are comprehensively
* stored so that later calculation is in deltas mode, we
* need at least one D-document saved. For a repo with only
* non-code files, however, no true #Lines/LOC will have
* been saved. Subsequent re-indexing will do more work
* than necessary (until a source code file is placed). We
* can record zeroes for a fake file under the root to get
* a D-document even for this special repo situation.
*
* Metrics are aggregated for directories up to the root,
* so it suffices to put the fake directly under the root.
*/
if (!isWithDirectoryCounts) {
final String ROOT_FAKE_FILE = "/.OpenGrok_fake_file";
countsAggregator.register(new NumLinesLOC(ROOT_FAKE_FILE, 0, 0));
}
NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
countsAccessor.store(writer, reader, countsAggregator, isWithDirectoryCounts && isCountingDeltas);
markProjectIndexed(project);
} finally {
reader.close();
}
}
// This is deliberate.
try {
finishWriting();
} catch (IOException e) {
finishingException = e;
}
} catch (RuntimeException ex) {
LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex);
throw ex;
} finally {
completer = null;
try {
if (writer != null) {
writer.close();
}
} catch (IOException e) {
if (finishingException == null) {
finishingException = e;
}
LOGGER.log(Level.WARNING, "An error occurred while closing writer", e);
} finally {
writer = null;
synchronized (lock) {
running = false;
}
}
}
if (finishingException != null) {
throw finishingException;
}
if (!isInterrupted() && isDirty()) {
if (env.isOptimizeDatabase()) {
optimize();
}
env.setIndexTimestamp();
}
}
Aggregations