Search in sources :

Example 1 with RequestBuffer

use of com.palantir.stash.codesearch.elasticsearch.RequestBuffer in project stash-codesearch-plugin by palantir.

the class SearchUpdateJobImpl method doUpdate.

@Override
public void doUpdate(Client client, GitScm gitScm, GlobalSettings globalSettings) {
    if (!globalSettings.getIndexingEnabled()) {
        return;
    }
    GitCommandBuilderFactory builderFactory = gitScm.getCommandBuilderFactory();
    // List of bulk requests to execute sequentially at the end of the method
    RequestBuffer requestBuffer = new RequestBuffer(client);
    // Unique identifier for ref
    String refDesc = toString();
    // Hash of latest indexed commit
    String prevHash = getLatestIndexedHash(client);
    // Hash of latest commit on ref
    String newHash = getLatestHash(builderFactory);
    if (newHash == null) {
        log.error("Aborting since hash is invalid");
        return;
    }
    // Diff for files & process changes
    Set<SimpleEntry<String, String>> filesToAdd = new LinkedHashSet<SimpleEntry<String, String>>();
    try {
        // Get diff --raw -z tokens
        String[] diffToks = builderFactory.builder(repository).command("diff").argument("--raw").argument("--abbrev=40").argument("-z").argument(prevHash).argument(newHash).build(new StringOutputHandler(plf)).call().split("");
        // Process each diff --raw -z entry
        for (int curTok = 0; curTok < diffToks.length; ++curTok) {
            String[] statusToks = diffToks[curTok].split(" ");
            if (statusToks.length < 5) {
                break;
            }
            String status = statusToks[4];
            String oldBlob = statusToks[2];
            String newBlob = statusToks[3];
            // TODO: so many warnings!  Generics, CAEN I HAZ THEM?
            if (status.startsWith("A")) {
                String path = diffToks[++curTok];
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, path));
            // File copied
            } else if (status.startsWith("C")) {
                String toPath = diffToks[curTok += 2];
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, toPath));
            // File deleted
            } else if (status.startsWith("D")) {
                String path = diffToks[++curTok];
                requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, path));
            // File modified
            } else if (status.startsWith("M") || status.startsWith("T")) {
                String path = diffToks[++curTok];
                if (!oldBlob.equals(newBlob)) {
                    requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, path));
                    filesToAdd.add(new SimpleEntry<String, String>(newBlob, path));
                }
            // File renamed
            } else if (status.startsWith("R")) {
                String fromPath = diffToks[++curTok];
                String toPath = diffToks[++curTok];
                requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, fromPath));
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, toPath));
            // Unknown change
            } else if (status.startsWith("X")) {
                throw new RuntimeException("Status letter 'X' is a git bug.");
            }
        }
    } catch (Exception e) {
        log.error("Caught error while diffing between {} and {}, aborting update", prevHash, newHash, e);
        return;
    }
    log.debug("{} update: adding {} files", refDesc, filesToAdd.size());
    // simply add the ref to the refs array.
    if (!filesToAdd.isEmpty()) {
        try {
            BulkRequestBuilder bulkFileRefUpdate = client.prepareBulk();
            ImmutableList<SimpleEntry<String, String>> filesToAddCopy = ImmutableList.copyOf(filesToAdd);
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                String blob = bppair.getKey(), path = bppair.getValue();
                bulkFileRefUpdate.add(buildAddFileToRef(client, blob, path));
            }
            BulkItemResponse[] responses = bulkFileRefUpdate.get().getItems();
            if (responses.length != filesToAddCopy.size()) {
                throw new IndexOutOfBoundsException("Bulk resp. array must have the same length as original request array");
            }
            // Process all update responses
            int count = 0;
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                if (!responses[count].isFailed()) {
                    // Update was successful, no need to index file
                    filesToAdd.remove(bppair);
                }
                ++count;
            }
        } catch (Exception e) {
            log.warn("file-ref update failed, performing upserts for all changes", e);
        }
    }
    log.debug("{} update: {} files to upsert", refDesc, filesToAdd.size());
    // Process all changes w/o corresponding documents
    if (!filesToAdd.isEmpty()) {
        try {
            // Get filesizes and prune all files that exceed the filesize limit
            ImmutableList<SimpleEntry<String, String>> filesToAddCopy = ImmutableList.copyOf(filesToAdd);
            CatFileInputHandler catFileInput = new CatFileInputHandler();
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                catFileInput.addObject(bppair.getKey());
            }
            String[] catFileMetadata = builderFactory.builder(repository).command("cat-file").argument("--batch-check").inputHandler(catFileInput).build(new StringOutputHandler(plf)).call().split("\n");
            if (filesToAdd.size() != catFileMetadata.length) {
                throw new IndexOutOfBoundsException("git cat-file --batch-check returned wrong number of lines");
            }
            CatFileOutputHandler catFileOutput = new CatFileOutputHandler(plf);
            int count = 0;
            int maxFileSize = globalSettings.getMaxFileSize();
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                int fs;
                try {
                    fs = Integer.parseInt(catFileMetadata[count].split("\\s")[2]);
                } catch (Exception e) {
                    fs = Integer.MAX_VALUE;
                }
                if (fs > maxFileSize) {
                    filesToAdd.remove(bppair);
                } else {
                    catFileOutput.addFile(fs);
                }
                ++count;
            }
            // Generate new cat-file input and retrieve file contents
            catFileInput = new CatFileInputHandler();
            for (SimpleEntry<String, String> bppair : filesToAdd) {
                catFileInput.addObject(bppair.getKey());
            }
            String[] fileContents = builderFactory.builder(repository).command("cat-file").argument("--batch=").inputHandler(catFileInput).build(catFileOutput).call();
            if (filesToAdd.size() != fileContents.length) {
                throw new IndexOutOfBoundsException("git cat-file --batch= returned wrong number of files");
            }
            count = 0;
            for (SimpleEntry<String, String> bppair : filesToAdd) {
                String blob = bppair.getKey(), path = bppair.getValue();
                String fileContent = fileContents[count];
                if (fileContent != null) {
                    requestBuffer.add(buildAddFileToRef(client, blob, path).setUpsert(jsonBuilder().startObject().field("project", repository.getProject().getKey()).field("repository", repository.getSlug()).field("blob", blob).field("path", path).field("extension", FilenameUtils.getExtension(path).toLowerCase()).field("contents", fileContent).field("charcount", fileContent.length()).field("linecount", countLines(fileContent)).startArray("refs").value(ref).endArray().endObject()));
                }
                ++count;
            }
        } catch (Exception e) {
            log.error("Caught error during new file indexing, aborting update", e);
            return;
        }
    }
    // Clear memory
    filesToAdd = null;
    // Get deleted commits
    String[] deletedCommits;
    try {
        deletedCommits = builderFactory.builder(repository).command("rev-list").argument(prevHash).argument("^" + newHash).build(new StringOutputHandler(plf)).call().split("\n+");
    } catch (Exception e) {
        log.error("Caught error while scanning for deleted commits, aborting update", e);
        return;
    }
    // Remove deleted commits from ES index
    int commitsDeleted = 0;
    for (String hash : deletedCommits) {
        if (hash.length() != 40) {
            continue;
        }
        requestBuffer.add(buildDeleteCommitFromRef(client, hash));
        ++commitsDeleted;
    }
    // Get new commits
    String[] newCommits;
    try {
        newCommits = builderFactory.builder(repository).command("log").argument("--format=%H%x02%ct%x02%an%x02%ae%x02%s%x02%b%x03").argument(newHash).argument("^" + prevHash).build(new StringOutputHandler(plf)).call().split("");
    } catch (Exception e) {
        log.error("Caught error while scanning for new commits, aborting update", e);
        return;
    }
    // Add new commits to ES index
    int commitsAdded = 0;
    for (String line : newCommits) {
        try {
            // Parse each commit "line" (not really lines, since they're delimited by )
            if (line.length() <= 40) {
                continue;
            }
            if (line.charAt(0) == '\n') {
                line = line.substring(1);
            }
            String[] commitToks = line.split("", 6);
            String hash = commitToks[0];
            long timestamp = Long.parseLong(commitToks[1]) * 1000;
            String authorName = commitToks[2];
            String authorEmail = commitToks[3];
            String subject = commitToks[4];
            // bodies are optional, so this might not be present
            String body = commitToks.length < 6 ? "" : commitToks[5];
            if (hash.length() != 40) {
                continue;
            }
            // Add commit to request
            requestBuffer.add(buildAddCommitToRef(client, hash).setUpsert(jsonBuilder().startObject().field("project", repository.getProject().getKey()).field("repository", repository.getSlug()).field("hash", hash).field("commitdate", new Date(timestamp)).field("authorname", authorName).field("authoremail", authorEmail).field("subject", subject).field("body", body).startArray("refs").value(ref).endArray().endObject()));
            ++commitsAdded;
        } catch (Exception e) {
            log.warn("Caught error while constructing request object, skipping update", e);
            continue;
        }
    }
    log.debug("{} update: adding {} commits, deleting {} commits", refDesc, commitsAdded, commitsDeleted);
    // Write remaining requests and wait for completion
    requestBuffer.flush();
    // Update latest indexed note
    addLatestIndexedNote(client, newHash);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RequestBuffer(com.palantir.stash.codesearch.elasticsearch.RequestBuffer) SimpleEntry(java.util.AbstractMap.SimpleEntry) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) Date(java.util.Date) GitCommandBuilderFactory(com.atlassian.stash.scm.git.GitCommandBuilderFactory) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder)

Aggregations

GitCommandBuilderFactory (com.atlassian.stash.scm.git.GitCommandBuilderFactory)1 RequestBuffer (com.palantir.stash.codesearch.elasticsearch.RequestBuffer)1 SimpleEntry (java.util.AbstractMap.SimpleEntry)1 Date (java.util.Date)1 LinkedHashSet (java.util.LinkedHashSet)1 BulkItemResponse (org.elasticsearch.action.bulk.BulkItemResponse)1 BulkRequestBuilder (org.elasticsearch.action.bulk.BulkRequestBuilder)1