Search in sources :

Example 86 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.

the class LuceneService method updateIndex.

/**
	 * Updates a repository index incrementally from the last indexed commits.
	 *
	 * @param model
	 * @param repository
	 * @return IndexResult
	 */
private IndexResult updateIndex(RepositoryModel model, Repository repository) {
    IndexResult result = new IndexResult();
    try {
        FileBasedConfig config = getConfig(repository);
        config.load();
        // build a quick lookup of annotated tags
        Map<String, List<String>> tags = new HashMap<String, List<String>>();
        for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
            if (!tag.isAnnotatedTag()) {
                // skip non-annotated tags
                continue;
            }
            if (!tags.containsKey(tag.getObjectId().getName())) {
                tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
        }
        // detect branch deletion
        // first assume all branches are deleted and then remove each
        // existing branch from deletedBranches during indexing
        Set<String> deletedBranches = new TreeSet<String>();
        for (String alias : config.getNames(CONF_ALIAS)) {
            String branch = config.getString(CONF_ALIAS, null, alias);
            deletedBranches.add(branch);
        }
        // get the local branches
        List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
        // sort them by most recently updated
        Collections.sort(branches, new Comparator<RefModel>() {

            @Override
            public int compare(RefModel ref1, RefModel ref2) {
                return ref2.getDate().compareTo(ref1.getDate());
            }
        });
        // reorder default branch to first position
        RefModel defaultBranch = null;
        ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
        for (RefModel branch : branches) {
            if (branch.getObjectId().equals(defaultBranchId)) {
                defaultBranch = branch;
                break;
            }
        }
        branches.remove(defaultBranch);
        branches.add(0, defaultBranch);
        // walk through each branches
        for (RefModel branch : branches) {
            String branchName = branch.getName();
            boolean indexBranch = false;
            if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) && branch.equals(defaultBranch)) {
                // indexing "default" branch
                indexBranch = true;
            } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
                // ignore internal meta branches
                indexBranch = false;
            } else {
                // normal explicit branch check
                indexBranch = model.indexedBranches.contains(branch.getName());
            }
            // if this branch is not specifically indexed then skip
            if (!indexBranch) {
                continue;
            }
            // remove this branch from the deletedBranches set
            deletedBranches.remove(branchName);
            // determine last commit
            String keyName = getBranchKey(branchName);
            String lastCommit = config.getString(CONF_BRANCH, null, keyName);
            List<RevCommit> revs;
            if (StringUtils.isEmpty(lastCommit)) {
                // new branch/unindexed branch, get all commits on branch
                revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
            } else {
                // pre-existing branch, get changes since last commit
                revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
            }
            if (revs.size() > 0) {
                result.branchCount += 1;
            }
            // reverse the list of commits so we start with the first commit
            Collections.reverse(revs);
            for (RevCommit commit : revs) {
                // index a commit
                result.add(index(model.name, repository, branchName, commit));
            }
            // update the config
            config.setString(CONF_ALIAS, null, keyName, branchName);
            config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
            config.save();
        }
        // unless a branch really was deleted and no longer exists
        if (deletedBranches.size() > 0) {
            for (String branch : deletedBranches) {
                IndexWriter writer = getIndexWriter(model.name);
                writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
                writer.commit();
            }
        }
        result.success = true;
    } catch (Throwable t) {
        logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
    }
    return result;
}
Also used : RefModel(com.gitblit.models.RefModel) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ObjectId(org.eclipse.jgit.lib.ObjectId) Term(org.apache.lucene.index.Term) IndexWriter(org.apache.lucene.index.IndexWriter) TreeSet(java.util.TreeSet) List(java.util.List) ArrayList(java.util.ArrayList) FileBasedConfig(org.eclipse.jgit.storage.file.FileBasedConfig) RevCommit(org.eclipse.jgit.revwalk.RevCommit)

Example 87 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.

the class LuceneService method index.

/**
	 * Incrementally index an object for the repository.
	 *
	 * @param repositoryName
	 * @param doc
	 * @return true, if successful
	 */
private boolean index(String repositoryName, Document doc) {
    try {
        IndexWriter writer = getIndexWriter(repositoryName);
        writer.addDocument(doc);
        writer.commit();
        resetIndexSearcher(repositoryName);
        return true;
    } catch (Exception e) {
        logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
    }
    return false;
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) ParseException(java.text.ParseException) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) IOException(java.io.IOException)

Example 88 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.

the class LuceneService method index.

/**
	 * Incrementally update the index with the specified commit for the
	 * repository.
	 *
	 * @param repositoryName
	 * @param repository
	 * @param branch
	 *            the fully qualified branch name (e.g. refs/heads/master)
	 * @param commit
	 * @return true, if successful
	 */
private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) {
    IndexResult result = new IndexResult();
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
        String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);
        IndexWriter writer = getIndexWriter(repositoryName);
        for (PathChangeModel path : changedPaths) {
            if (path.isSubmodule()) {
                continue;
            }
            // delete the indexed blob
            deleteBlob(repositoryName, branch, path.name);
            // re-index the blob
            if (!ChangeType.DELETE.equals(path.changeType)) {
                result.blobCount++;
                Document doc = new Document();
                doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
                doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
                doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
                doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
                doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
                doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
                doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
                // determine extension to compare to the extension
                // blacklist
                String ext = null;
                String name = path.name.toLowerCase();
                if (name.indexOf('.') > -1) {
                    ext = name.substring(name.lastIndexOf('.') + 1);
                }
                if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                    // read the blob content
                    String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings);
                    if (str != null) {
                        doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
                        writer.addDocument(doc);
                    }
                }
            }
        }
        writer.commit();
        // get any annotated commit tags
        List<String> commitTags = new ArrayList<String>();
        for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
            if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
                commitTags.add(ref.displayName);
            }
        }
        // create and write the Lucene document
        Document doc = createDocument(commit, commitTags);
        doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
        result.commitCount++;
        result.success = index(repositoryName, doc);
    } catch (Exception e) {
        logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
    }
    return result;
}
Also used : StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) RefModel(com.gitblit.models.RefModel) PathChangeModel(com.gitblit.models.PathModel.PathChangeModel) IndexWriter(org.apache.lucene.index.IndexWriter) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ParseException(java.text.ParseException) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) IOException(java.io.IOException)

Example 89 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.

the class LuceneService method reindex.

/**
	 * This completely indexes the repository and will destroy any existing
	 * index.
	 *
	 * @param repositoryName
	 * @param repository
	 * @return IndexResult
	 */
public IndexResult reindex(RepositoryModel model, Repository repository) {
    IndexResult result = new IndexResult();
    if (!deleteIndex(model.name)) {
        return result;
    }
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        FileBasedConfig config = getConfig(repository);
        Set<String> indexedCommits = new TreeSet<String>();
        IndexWriter writer = getIndexWriter(model.name);
        // build a quick lookup of tags
        Map<String, List<String>> tags = new HashMap<String, List<String>>();
        for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
            if (!tag.isAnnotatedTag()) {
                // skip non-annotated tags
                continue;
            }
            if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
                tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
        }
        ObjectReader reader = repository.newObjectReader();
        // get the local branches
        List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
        // sort them by most recently updated
        Collections.sort(branches, new Comparator<RefModel>() {

            @Override
            public int compare(RefModel ref1, RefModel ref2) {
                return ref2.getDate().compareTo(ref1.getDate());
            }
        });
        // reorder default branch to first position
        RefModel defaultBranch = null;
        ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
        for (RefModel branch : branches) {
            if (branch.getObjectId().equals(defaultBranchId)) {
                defaultBranch = branch;
                break;
            }
        }
        branches.remove(defaultBranch);
        branches.add(0, defaultBranch);
        // walk through each branch
        for (RefModel branch : branches) {
            boolean indexBranch = false;
            if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) && branch.equals(defaultBranch)) {
                // indexing "default" branch
                indexBranch = true;
            } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
                // skip internal meta branches
                indexBranch = false;
            } else {
                // normal explicit branch check
                indexBranch = model.indexedBranches.contains(branch.getName());
            }
            // if this branch is not specifically indexed then skip
            if (!indexBranch) {
                continue;
            }
            String branchName = branch.getName();
            RevWalk revWalk = new RevWalk(reader);
            RevCommit tip = revWalk.parseCommit(branch.getObjectId());
            String tipId = tip.getId().getName();
            String keyName = getBranchKey(branchName);
            config.setString(CONF_ALIAS, null, keyName, branchName);
            config.setString(CONF_BRANCH, null, keyName, tipId);
            // index the blob contents of the tree
            TreeWalk treeWalk = new TreeWalk(repository);
            treeWalk.addTree(tip.getTree());
            treeWalk.setRecursive(true);
            Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
            while (treeWalk.next()) {
                // ensure path is not in a submodule
                if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
                    paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
                }
            }
            ByteArrayOutputStream os = new ByteArrayOutputStream();
            byte[] tmp = new byte[32767];
            RevWalk commitWalk = new RevWalk(reader);
            commitWalk.markStart(tip);
            RevCommit commit;
            while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
                TreeWalk diffWalk = new TreeWalk(reader);
                int parentCount = commit.getParentCount();
                switch(parentCount) {
                    case 0:
                        diffWalk.addTree(new EmptyTreeIterator());
                        break;
                    case 1:
                        diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
                        break;
                    default:
                        // skip merge commits
                        continue;
                }
                diffWalk.addTree(getTree(commitWalk, commit));
                diffWalk.setFilter(ANY_DIFF);
                diffWalk.setRecursive(true);
                while ((paths.size() > 0) && diffWalk.next()) {
                    String path = diffWalk.getPathString();
                    if (!paths.containsKey(path)) {
                        continue;
                    }
                    // remove path from set
                    ObjectId blobId = paths.remove(path);
                    result.blobCount++;
                    // index the blob metadata
                    String blobAuthor = getAuthor(commit);
                    String blobCommitter = getCommitter(commit);
                    String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);
                    Document doc = new Document();
                    doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
                    doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
                    doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
                    // determine extension to compare to the extension
                    // blacklist
                    String ext = null;
                    String name = path.toLowerCase();
                    if (name.indexOf('.') > -1) {
                        ext = name.substring(name.lastIndexOf('.') + 1);
                    }
                    // index the blob content
                    if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                        ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
                        InputStream in = ldr.openStream();
                        int n;
                        while ((n = in.read(tmp)) > 0) {
                            os.write(tmp, 0, n);
                        }
                        in.close();
                        byte[] content = os.toByteArray();
                        String str = StringUtils.decodeString(content, encodings);
                        doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
                        os.reset();
                    }
                    // add the blob to the index
                    writer.addDocument(doc);
                }
            }
            os.close();
            // index the tip commit object
            if (indexedCommits.add(tipId)) {
                Document doc = createDocument(tip, tags.get(tipId));
                doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                writer.addDocument(doc);
                result.commitCount += 1;
                result.branchCount += 1;
            }
            // traverse the log and index the previous commit objects
            RevWalk historyWalk = new RevWalk(reader);
            historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
            RevCommit rev;
            while ((rev = historyWalk.next()) != null) {
                String hash = rev.getId().getName();
                if (indexedCommits.add(hash)) {
                    Document doc = createDocument(rev, tags.get(hash));
                    doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                    writer.addDocument(doc);
                    result.commitCount += 1;
                }
            }
        }
        // finished
        reader.close();
        // commit all changes and reset the searcher
        config.save();
        writer.commit();
        resetIndexSearcher(model.name);
        result.success();
    } catch (Exception e) {
        logger.error("Exception while reindexing " + model.name, e);
    }
    return result;
}
Also used : RefModel(com.gitblit.models.RefModel) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Document(org.apache.lucene.document.Document) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) TreeSet(java.util.TreeSet) List(java.util.List) ArrayList(java.util.ArrayList) ObjectReader(org.eclipse.jgit.lib.ObjectReader) FileBasedConfig(org.eclipse.jgit.storage.file.FileBasedConfig) TreeWalk(org.eclipse.jgit.treewalk.TreeWalk) RevCommit(org.eclipse.jgit.revwalk.RevCommit) ObjectId(org.eclipse.jgit.lib.ObjectId) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RevWalk(org.eclipse.jgit.revwalk.RevWalk) TreeMap(java.util.TreeMap) ParseException(java.text.ParseException) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) IOException(java.io.IOException) IndexWriter(org.apache.lucene.index.IndexWriter) EmptyTreeIterator(org.eclipse.jgit.treewalk.EmptyTreeIterator) ObjectLoader(org.eclipse.jgit.lib.ObjectLoader)

Example 90 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project gitblit by gitblit.

the class LuceneService method getIndexSearcher.

/**
	 * Gets an index searcher for the repository.
	 *
	 * @param repository
	 * @return
	 * @throws IOException
	 */
private IndexSearcher getIndexSearcher(String repository) throws IOException {
    IndexSearcher searcher = searchers.get(repository);
    if (searcher == null) {
        IndexWriter writer = getIndexWriter(repository);
        searcher = new IndexSearcher(DirectoryReader.open(writer, true));
        searchers.put(repository, searcher);
    }
    return searcher;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexWriter(org.apache.lucene.index.IndexWriter)

Aggregations

IndexWriter (org.apache.lucene.index.IndexWriter)529 Document (org.apache.lucene.document.Document)311 Directory (org.apache.lucene.store.Directory)306 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)293 IndexReader (org.apache.lucene.index.IndexReader)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)136 DirectoryReader (org.apache.lucene.index.DirectoryReader)127 Term (org.apache.lucene.index.Term)125 IndexSearcher (org.apache.lucene.search.IndexSearcher)110 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)107 TextField (org.apache.lucene.document.TextField)104 RAMDirectory (org.apache.lucene.store.RAMDirectory)88 IOException (java.io.IOException)86 Field (org.apache.lucene.document.Field)86 TermQuery (org.apache.lucene.search.TermQuery)56 StringField (org.apache.lucene.document.StringField)52 BytesRef (org.apache.lucene.util.BytesRef)52 FieldType (org.apache.lucene.document.FieldType)50 Test (org.junit.Test)49 Query (org.apache.lucene.search.Query)45