Search in sources :

Example 16 with BulkRequestBuilder

use of org.elasticsearch.action.bulk.BulkRequestBuilder in project stash-codesearch-plugin by palantir.

the class SearchUpdateJobImpl method doUpdate.

@Override
public void doUpdate(Client client, GitScm gitScm, GlobalSettings globalSettings) {
    if (!globalSettings.getIndexingEnabled()) {
        return;
    }
    GitCommandBuilderFactory builderFactory = gitScm.getCommandBuilderFactory();
    // List of bulk requests to execute sequentially at the end of the method
    RequestBuffer requestBuffer = new RequestBuffer(client);
    // Unique identifier for ref
    String refDesc = toString();
    // Hash of latest indexed commit
    String prevHash = getLatestIndexedHash(client);
    // Hash of latest commit on ref
    String newHash = getLatestHash(builderFactory);
    if (newHash == null) {
        log.error("Aborting since hash is invalid");
        return;
    }
    // Diff for files & process changes
    Set<SimpleEntry<String, String>> filesToAdd = new LinkedHashSet<SimpleEntry<String, String>>();
    try {
        // Get diff --raw -z tokens
        String[] diffToks = builderFactory.builder(repository).command("diff").argument("--raw").argument("--abbrev=40").argument("-z").argument(prevHash).argument(newHash).build(new StringOutputHandler(plf)).call().split("");
        // Process each diff --raw -z entry
        for (int curTok = 0; curTok < diffToks.length; ++curTok) {
            String[] statusToks = diffToks[curTok].split(" ");
            if (statusToks.length < 5) {
                break;
            }
            String status = statusToks[4];
            String oldBlob = statusToks[2];
            String newBlob = statusToks[3];
            // TODO: so many warnings!  Generics, CAEN I HAZ THEM?
            if (status.startsWith("A")) {
                String path = diffToks[++curTok];
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, path));
            // File copied
            } else if (status.startsWith("C")) {
                String toPath = diffToks[curTok += 2];
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, toPath));
            // File deleted
            } else if (status.startsWith("D")) {
                String path = diffToks[++curTok];
                requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, path));
            // File modified
            } else if (status.startsWith("M") || status.startsWith("T")) {
                String path = diffToks[++curTok];
                if (!oldBlob.equals(newBlob)) {
                    requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, path));
                    filesToAdd.add(new SimpleEntry<String, String>(newBlob, path));
                }
            // File renamed
            } else if (status.startsWith("R")) {
                String fromPath = diffToks[++curTok];
                String toPath = diffToks[++curTok];
                requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, fromPath));
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, toPath));
            // Unknown change
            } else if (status.startsWith("X")) {
                throw new RuntimeException("Status letter 'X' is a git bug.");
            }
        }
    } catch (Exception e) {
        log.error("Caught error while diffing between {} and {}, aborting update", prevHash, newHash, e);
        return;
    }
    log.debug("{} update: adding {} files", refDesc, filesToAdd.size());
    // simply add the ref to the refs array.
    if (!filesToAdd.isEmpty()) {
        try {
            BulkRequestBuilder bulkFileRefUpdate = client.prepareBulk();
            ImmutableList<SimpleEntry<String, String>> filesToAddCopy = ImmutableList.copyOf(filesToAdd);
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                String blob = bppair.getKey(), path = bppair.getValue();
                bulkFileRefUpdate.add(buildAddFileToRef(client, blob, path));
            }
            BulkItemResponse[] responses = bulkFileRefUpdate.get().getItems();
            if (responses.length != filesToAddCopy.size()) {
                throw new IndexOutOfBoundsException("Bulk resp. array must have the same length as original request array");
            }
            // Process all update responses
            int count = 0;
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                if (!responses[count].isFailed()) {
                    // Update was successful, no need to index file
                    filesToAdd.remove(bppair);
                }
                ++count;
            }
        } catch (Exception e) {
            log.warn("file-ref update failed, performing upserts for all changes", e);
        }
    }
    log.debug("{} update: {} files to upsert", refDesc, filesToAdd.size());
    // Process all changes w/o corresponding documents
    if (!filesToAdd.isEmpty()) {
        try {
            // Get filesizes and prune all files that exceed the filesize limit
            ImmutableList<SimpleEntry<String, String>> filesToAddCopy = ImmutableList.copyOf(filesToAdd);
            CatFileInputHandler catFileInput = new CatFileInputHandler();
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                catFileInput.addObject(bppair.getKey());
            }
            String[] catFileMetadata = builderFactory.builder(repository).command("cat-file").argument("--batch-check").inputHandler(catFileInput).build(new StringOutputHandler(plf)).call().split("\n");
            if (filesToAdd.size() != catFileMetadata.length) {
                throw new IndexOutOfBoundsException("git cat-file --batch-check returned wrong number of lines");
            }
            CatFileOutputHandler catFileOutput = new CatFileOutputHandler(plf);
            int count = 0;
            int maxFileSize = globalSettings.getMaxFileSize();
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                int fs;
                try {
                    fs = Integer.parseInt(catFileMetadata[count].split("\\s")[2]);
                } catch (Exception e) {
                    fs = Integer.MAX_VALUE;
                }
                if (fs > maxFileSize) {
                    filesToAdd.remove(bppair);
                } else {
                    catFileOutput.addFile(fs);
                }
                ++count;
            }
            // Generate new cat-file input and retrieve file contents
            catFileInput = new CatFileInputHandler();
            for (SimpleEntry<String, String> bppair : filesToAdd) {
                catFileInput.addObject(bppair.getKey());
            }
            String[] fileContents = builderFactory.builder(repository).command("cat-file").argument("--batch=").inputHandler(catFileInput).build(catFileOutput).call();
            if (filesToAdd.size() != fileContents.length) {
                throw new IndexOutOfBoundsException("git cat-file --batch= returned wrong number of files");
            }
            count = 0;
            for (SimpleEntry<String, String> bppair : filesToAdd) {
                String blob = bppair.getKey(), path = bppair.getValue();
                String fileContent = fileContents[count];
                if (fileContent != null) {
                    requestBuffer.add(buildAddFileToRef(client, blob, path).setUpsert(jsonBuilder().startObject().field("project", repository.getProject().getKey()).field("repository", repository.getSlug()).field("blob", blob).field("path", path).field("extension", FilenameUtils.getExtension(path).toLowerCase()).field("contents", fileContent).field("charcount", fileContent.length()).field("linecount", countLines(fileContent)).startArray("refs").value(ref).endArray().endObject()));
                }
                ++count;
            }
        } catch (Exception e) {
            log.error("Caught error during new file indexing, aborting update", e);
            return;
        }
    }
    // Clear memory
    filesToAdd = null;
    // Get deleted commits
    String[] deletedCommits;
    try {
        deletedCommits = builderFactory.builder(repository).command("rev-list").argument(prevHash).argument("^" + newHash).build(new StringOutputHandler(plf)).call().split("\n+");
    } catch (Exception e) {
        log.error("Caught error while scanning for deleted commits, aborting update", e);
        return;
    }
    // Remove deleted commits from ES index
    int commitsDeleted = 0;
    for (String hash : deletedCommits) {
        if (hash.length() != 40) {
            continue;
        }
        requestBuffer.add(buildDeleteCommitFromRef(client, hash));
        ++commitsDeleted;
    }
    // Get new commits
    String[] newCommits;
    try {
        newCommits = builderFactory.builder(repository).command("log").argument("--format=%H%x02%ct%x02%an%x02%ae%x02%s%x02%b%x03").argument(newHash).argument("^" + prevHash).build(new StringOutputHandler(plf)).call().split("");
    } catch (Exception e) {
        log.error("Caught error while scanning for new commits, aborting update", e);
        return;
    }
    // Add new commits to ES index
    int commitsAdded = 0;
    for (String line : newCommits) {
        try {
            // Parse each commit "line" (not really lines, since they're delimited by )
            if (line.length() <= 40) {
                continue;
            }
            if (line.charAt(0) == '\n') {
                line = line.substring(1);
            }
            String[] commitToks = line.split("", 6);
            String hash = commitToks[0];
            long timestamp = Long.parseLong(commitToks[1]) * 1000;
            String authorName = commitToks[2];
            String authorEmail = commitToks[3];
            String subject = commitToks[4];
            // bodies are optional, so this might not be present
            String body = commitToks.length < 6 ? "" : commitToks[5];
            if (hash.length() != 40) {
                continue;
            }
            // Add commit to request
            requestBuffer.add(buildAddCommitToRef(client, hash).setUpsert(jsonBuilder().startObject().field("project", repository.getProject().getKey()).field("repository", repository.getSlug()).field("hash", hash).field("commitdate", new Date(timestamp)).field("authorname", authorName).field("authoremail", authorEmail).field("subject", subject).field("body", body).startArray("refs").value(ref).endArray().endObject()));
            ++commitsAdded;
        } catch (Exception e) {
            log.warn("Caught error while constructing request object, skipping update", e);
            continue;
        }
    }
    log.debug("{} update: adding {} commits, deleting {} commits", refDesc, commitsAdded, commitsDeleted);
    // Write remaining requests and wait for completion
    requestBuffer.flush();
    // Update latest indexed note
    addLatestIndexedNote(client, newHash);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RequestBuffer(com.palantir.stash.codesearch.elasticsearch.RequestBuffer) SimpleEntry(java.util.AbstractMap.SimpleEntry) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) Date(java.util.Date) GitCommandBuilderFactory(com.atlassian.stash.scm.git.GitCommandBuilderFactory) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder)

Example 17 with BulkRequestBuilder

use of org.elasticsearch.action.bulk.BulkRequestBuilder in project titan by thinkaurelius.

the class ElasticSearchIndex method restore.

public void restore(Map<String, Map<String, List<IndexEntry>>> documents, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
    BulkRequestBuilder bulk = client.prepareBulk();
    int requests = 0;
    try {
        for (Map.Entry<String, Map<String, List<IndexEntry>>> stores : documents.entrySet()) {
            String store = stores.getKey();
            for (Map.Entry<String, List<IndexEntry>> entry : stores.getValue().entrySet()) {
                String docID = entry.getKey();
                List<IndexEntry> content = entry.getValue();
                if (content == null || content.size() == 0) {
                    // delete
                    if (log.isTraceEnabled())
                        log.trace("Deleting entire document {}", docID);
                    bulk.add(new DeleteRequest(indexName, store, docID));
                    requests++;
                } else {
                    // Add
                    if (log.isTraceEnabled())
                        log.trace("Adding entire document {}", docID);
                    bulk.add(new IndexRequest(indexName, store, docID).source(getNewDocument(content, informations.get(store), IndexMutation.determineTTL(content))));
                    requests++;
                }
            }
        }
        if (requests > 0)
            bulk.execute().actionGet();
    } catch (Exception e) {
        throw convert(e);
    }
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder) IndexRequest(org.elasticsearch.action.index.IndexRequest) DeleteIndexRequest(org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest) FileNotFoundException(java.io.FileNotFoundException) TitanException(com.thinkaurelius.titan.core.TitanException) IndexMissingException(org.elasticsearch.indices.IndexMissingException) IOException(java.io.IOException)

Example 18 with BulkRequestBuilder

use of org.elasticsearch.action.bulk.BulkRequestBuilder in project metacat by Netflix.

the class ElasticSearchUtilImpl method hardDeleteDoc.

/**
     * Permanently delete index documents.
     *
     * @param type index type
     * @param ids  entity ids
     */
private void hardDeleteDoc(final String type, final List<String> ids) {
    try {
        RETRY_ES_PUBLISH.call(() -> {
            final BulkRequestBuilder bulkRequest = client.prepareBulk();
            ids.forEach(id -> bulkRequest.add(client.prepareDelete(esIndex, type, id)));
            final BulkResponse bulkResponse = bulkRequest.execute().actionGet();
            log.info("Deleting metadata of type {} with count {}", type, ids.size());
            if (bulkResponse.hasFailures()) {
                for (BulkItemResponse item : bulkResponse.getItems()) {
                    if (item.isFailed()) {
                        log.error("Failed deleting metadata of type {} with id {}. Message: {}", type, item.getId(), item.getFailureMessage());
                        registry.counter(registry.createId(Metrics.CounterElasticSearchDelete.name()).withTags(Metrics.statusFailureMap)).increment();
                        log("ElasticSearchUtil.bulkDelete.item", type, item.getId(), null, item.getFailureMessage(), null, true);
                    }
                }
            }
            return null;
        });
    } catch (Exception e) {
        log.error(String.format("Failed deleting metadata of type %s with ids %s", type, ids), e);
        registry.counter(registry.createId(Metrics.CounterElasticSearchBulkDelete.name()).withTags(Metrics.statusFailureMap)).increment();
        log("ElasticSearchUtil.bulkDelete", type, ids.toString(), null, e.getMessage(), e, true);
    }
}
Also used : BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder) FailedNodeException(org.elasticsearch.action.FailedNodeException) NodeClosedException(org.elasticsearch.node.NodeClosedException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) TransportException(org.elasticsearch.transport.TransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) EsRejectedExecutionException(org.elasticsearch.common.util.concurrent.EsRejectedExecutionException)

Example 19 with BulkRequestBuilder

use of org.elasticsearch.action.bulk.BulkRequestBuilder in project graylog2-server by Graylog2.

the class Messages method bulkIndex.

public boolean bulkIndex(final List<Map.Entry<IndexSet, Message>> messageList) {
    if (messageList.isEmpty()) {
        return true;
    }
    final BulkRequestBuilder requestBuilder = c.prepareBulk().setConsistencyLevel(WriteConsistencyLevel.ONE);
    for (Map.Entry<IndexSet, Message> entry : messageList) {
        requestBuilder.add(buildIndexRequest(entry.getKey().getWriteIndexAlias(), entry.getValue().toElasticSearchObject(invalidTimestampMeter), entry.getValue().getId()));
    }
    final BulkResponse response = runBulkRequest(requestBuilder.request());
    LOG.debug("Index: Bulk indexed {} messages, took {} ms, failures: {}", response.getItems().length, response.getTookInMillis(), response.hasFailures());
    if (response.hasFailures()) {
        propagateFailure(response.getItems(), messageList, response.buildFailureMessage());
    }
    return !response.hasFailures();
}
Also used : ResultMessage(org.graylog2.indexer.results.ResultMessage) Message(org.graylog2.plugin.Message) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) IndexSet(org.graylog2.indexer.IndexSet)

Example 20 with BulkRequestBuilder

use of org.elasticsearch.action.bulk.BulkRequestBuilder in project beam by apache.

the class ElasticSearchIOTestUtils method insertTestDocuments.

/** Inserts the given number of test documents into Elasticsearch. */
static void insertTestDocuments(String index, String type, long numDocs, Client client) throws Exception {
    final BulkRequestBuilder bulkRequestBuilder = client.prepareBulk().setRefresh(true);
    List<String> data = ElasticSearchIOTestUtils.createDocuments(numDocs, ElasticSearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
    for (String document : data) {
        bulkRequestBuilder.add(client.prepareIndex(index, type, null).setSource(document));
    }
    final BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet();
    if (bulkResponse.hasFailures()) {
        throw new IOException(String.format("Cannot insert test documents in index %s : %s", index, bulkResponse.buildFailureMessage()));
    }
}
Also used : BulkResponse(org.elasticsearch.action.bulk.BulkResponse) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder) IOException(java.io.IOException)

Aggregations

BulkRequestBuilder (org.elasticsearch.action.bulk.BulkRequestBuilder)45 BulkResponse (org.elasticsearch.action.bulk.BulkResponse)29 BulkItemResponse (org.elasticsearch.action.bulk.BulkItemResponse)13 SearchResponse (org.elasticsearch.action.search.SearchResponse)8 SearchHit (org.elasticsearch.search.SearchHit)8 IOException (java.io.IOException)7 IndexRequestBuilder (org.elasticsearch.action.index.IndexRequestBuilder)7 EsRejectedExecutionException (org.elasticsearch.common.util.concurrent.EsRejectedExecutionException)6 HashMap (java.util.HashMap)5 IllegalBehaviorStateException (org.dbflute.exception.IllegalBehaviorStateException)4 IndexRequest (org.elasticsearch.action.index.IndexRequest)4 SearchRequestBuilder (org.elasticsearch.action.search.SearchRequestBuilder)4 UpdateRequestBuilder (org.elasticsearch.action.update.UpdateRequestBuilder)4 SearchHits (org.elasticsearch.search.SearchHits)4 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 List (java.util.List)3 Map (java.util.Map)3 ElasticsearchTimeoutException (org.elasticsearch.ElasticsearchTimeoutException)3 FailedNodeException (org.elasticsearch.action.FailedNodeException)3