use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.
the class LuceneService method updateIndex.
/**
* Updates a repository index incrementally from the last indexed commits.
*
* @param model
* @param repository
* @return IndexResult
*/
private IndexResult updateIndex(RepositoryModel model, Repository repository) {
IndexResult result = new IndexResult();
try {
FileBasedConfig config = getConfig(repository);
config.load();
// build a quick lookup of annotated tags
Map<String, List<String>> tags = new HashMap<String, List<String>>();
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
if (!tag.isAnnotatedTag()) {
// skip non-annotated tags
continue;
}
if (!tags.containsKey(tag.getObjectId().getName())) {
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
}
tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
}
// detect branch deletion
// first assume all branches are deleted and then remove each
// existing branch from deletedBranches during indexing
Set<String> deletedBranches = new TreeSet<String>();
for (String alias : config.getNames(CONF_ALIAS)) {
String branch = config.getString(CONF_ALIAS, null, alias);
deletedBranches.add(branch);
}
// get the local branches
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
// sort them by most recently updated
Collections.sort(branches, new Comparator<RefModel>() {
@Override
public int compare(RefModel ref1, RefModel ref2) {
return ref2.getDate().compareTo(ref1.getDate());
}
});
// reorder default branch to first position
RefModel defaultBranch = null;
ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
for (RefModel branch : branches) {
if (branch.getObjectId().equals(defaultBranchId)) {
defaultBranch = branch;
break;
}
}
branches.remove(defaultBranch);
branches.add(0, defaultBranch);
// walk through each branches
for (RefModel branch : branches) {
String branchName = branch.getName();
boolean indexBranch = false;
if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) && branch.equals(defaultBranch)) {
// indexing "default" branch
indexBranch = true;
} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
// ignore internal meta branches
indexBranch = false;
} else {
// normal explicit branch check
indexBranch = model.indexedBranches.contains(branch.getName());
}
// if this branch is not specifically indexed then skip
if (!indexBranch) {
continue;
}
// remove this branch from the deletedBranches set
deletedBranches.remove(branchName);
// determine last commit
String keyName = getBranchKey(branchName);
String lastCommit = config.getString(CONF_BRANCH, null, keyName);
List<RevCommit> revs;
if (StringUtils.isEmpty(lastCommit)) {
// new branch/unindexed branch, get all commits on branch
revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
} else {
// pre-existing branch, get changes since last commit
revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
}
if (revs.size() > 0) {
result.branchCount += 1;
}
// reverse the list of commits so we start with the first commit
Collections.reverse(revs);
for (RevCommit commit : revs) {
// index a commit
result.add(index(model.name, repository, branchName, commit));
}
// update the config
config.setString(CONF_ALIAS, null, keyName, branchName);
config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
config.save();
}
// unless a branch really was deleted and no longer exists
if (deletedBranches.size() > 0) {
for (String branch : deletedBranches) {
IndexWriter writer = getIndexWriter(model.name);
writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
writer.commit();
}
}
result.success = true;
} catch (Throwable t) {
logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
}
return result;
}
use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.
the class LuceneService method index.
/**
* Incrementally index an object for the repository.
*
* @param repositoryName
* @param doc
* @return true, if successful
*/
private boolean index(String repositoryName, Document doc) {
try {
IndexWriter writer = getIndexWriter(repositoryName);
writer.addDocument(doc);
writer.commit();
resetIndexSearcher(repositoryName);
return true;
} catch (Exception e) {
logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
}
return false;
}
use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.
the class LuceneService method index.
/**
* Incrementally update the index with the specified commit for the
* repository.
*
* @param repositoryName
* @param repository
* @param branch
* the fully qualified branch name (e.g. refs/heads/master)
* @param commit
* @return true, if successful
*/
private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) {
IndexResult result = new IndexResult();
try {
String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);
IndexWriter writer = getIndexWriter(repositoryName);
for (PathChangeModel path : changedPaths) {
if (path.isSubmodule()) {
continue;
}
// delete the indexed blob
deleteBlob(repositoryName, branch, path.name);
// re-index the blob
if (!ChangeType.DELETE.equals(path.changeType)) {
result.blobCount++;
Document doc = new Document();
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
// determine extension to compare to the extension
// blacklist
String ext = null;
String name = path.name.toLowerCase();
if (name.indexOf('.') > -1) {
ext = name.substring(name.lastIndexOf('.') + 1);
}
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
// read the blob content
String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings);
if (str != null) {
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
writer.addDocument(doc);
}
}
}
}
writer.commit();
// get any annotated commit tags
List<String> commitTags = new ArrayList<String>();
for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
commitTags.add(ref.displayName);
}
}
// create and write the Lucene document
Document doc = createDocument(commit, commitTags);
doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
result.commitCount++;
result.success = index(repositoryName, doc);
} catch (Exception e) {
logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
}
return result;
}
use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.
the class LuceneService method reindex.
/**
* This completely indexes the repository and will destroy any existing
* index.
*
* @param repositoryName
* @param repository
* @return IndexResult
*/
public IndexResult reindex(RepositoryModel model, Repository repository) {
IndexResult result = new IndexResult();
if (!deleteIndex(model.name)) {
return result;
}
try {
String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
FileBasedConfig config = getConfig(repository);
Set<String> indexedCommits = new TreeSet<String>();
IndexWriter writer = getIndexWriter(model.name);
// build a quick lookup of tags
Map<String, List<String>> tags = new HashMap<String, List<String>>();
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
if (!tag.isAnnotatedTag()) {
// skip non-annotated tags
continue;
}
if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
}
tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
}
ObjectReader reader = repository.newObjectReader();
// get the local branches
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
// sort them by most recently updated
Collections.sort(branches, new Comparator<RefModel>() {
@Override
public int compare(RefModel ref1, RefModel ref2) {
return ref2.getDate().compareTo(ref1.getDate());
}
});
// reorder default branch to first position
RefModel defaultBranch = null;
ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
for (RefModel branch : branches) {
if (branch.getObjectId().equals(defaultBranchId)) {
defaultBranch = branch;
break;
}
}
branches.remove(defaultBranch);
branches.add(0, defaultBranch);
// walk through each branch
for (RefModel branch : branches) {
boolean indexBranch = false;
if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) && branch.equals(defaultBranch)) {
// indexing "default" branch
indexBranch = true;
} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
// skip internal meta branches
indexBranch = false;
} else {
// normal explicit branch check
indexBranch = model.indexedBranches.contains(branch.getName());
}
// if this branch is not specifically indexed then skip
if (!indexBranch) {
continue;
}
String branchName = branch.getName();
RevWalk revWalk = new RevWalk(reader);
RevCommit tip = revWalk.parseCommit(branch.getObjectId());
String tipId = tip.getId().getName();
String keyName = getBranchKey(branchName);
config.setString(CONF_ALIAS, null, keyName, branchName);
config.setString(CONF_BRANCH, null, keyName, tipId);
// index the blob contents of the tree
TreeWalk treeWalk = new TreeWalk(repository);
treeWalk.addTree(tip.getTree());
treeWalk.setRecursive(true);
Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
while (treeWalk.next()) {
// ensure path is not in a submodule
if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
}
}
ByteArrayOutputStream os = new ByteArrayOutputStream();
byte[] tmp = new byte[32767];
RevWalk commitWalk = new RevWalk(reader);
commitWalk.markStart(tip);
RevCommit commit;
while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
TreeWalk diffWalk = new TreeWalk(reader);
int parentCount = commit.getParentCount();
switch(parentCount) {
case 0:
diffWalk.addTree(new EmptyTreeIterator());
break;
case 1:
diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
break;
default:
// skip merge commits
continue;
}
diffWalk.addTree(getTree(commitWalk, commit));
diffWalk.setFilter(ANY_DIFF);
diffWalk.setRecursive(true);
while ((paths.size() > 0) && diffWalk.next()) {
String path = diffWalk.getPathString();
if (!paths.containsKey(path)) {
continue;
}
// remove path from set
ObjectId blobId = paths.remove(path);
result.blobCount++;
// index the blob metadata
String blobAuthor = getAuthor(commit);
String blobCommitter = getCommitter(commit);
String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);
Document doc = new Document();
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
// determine extension to compare to the extension
// blacklist
String ext = null;
String name = path.toLowerCase();
if (name.indexOf('.') > -1) {
ext = name.substring(name.lastIndexOf('.') + 1);
}
// index the blob content
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
InputStream in = ldr.openStream();
int n;
while ((n = in.read(tmp)) > 0) {
os.write(tmp, 0, n);
}
in.close();
byte[] content = os.toByteArray();
String str = StringUtils.decodeString(content, encodings);
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
os.reset();
}
// add the blob to the index
writer.addDocument(doc);
}
}
os.close();
// index the tip commit object
if (indexedCommits.add(tipId)) {
Document doc = createDocument(tip, tags.get(tipId));
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
writer.addDocument(doc);
result.commitCount += 1;
result.branchCount += 1;
}
// traverse the log and index the previous commit objects
RevWalk historyWalk = new RevWalk(reader);
historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
RevCommit rev;
while ((rev = historyWalk.next()) != null) {
String hash = rev.getId().getName();
if (indexedCommits.add(hash)) {
Document doc = createDocument(rev, tags.get(hash));
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
writer.addDocument(doc);
result.commitCount += 1;
}
}
}
// finished
reader.close();
// commit all changes and reset the searcher
config.save();
writer.commit();
resetIndexSearcher(model.name);
result.success();
} catch (Exception e) {
logger.error("Exception while reindexing " + model.name, e);
}
return result;
}
use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.
the class LuceneService method getIndexSearcher.
/**
* Gets an index searcher for the repository.
*
* @param repository
* @return
* @throws IOException
*/
private IndexSearcher getIndexSearcher(String repository) throws IOException {
IndexSearcher searcher = searchers.get(repository);
if (searcher == null) {
IndexWriter writer = getIndexWriter(repository);
searcher = new IndexSearcher(DirectoryReader.open(writer, true));
searchers.put(repository, searcher);
}
return searcher;
}
Aggregations