Search in sources :

Example 96 with ParameterizedMessage

use of org.apache.logging.log4j.message.ParameterizedMessage in project elasticsearch by elastic.

the class SnapshotShardsService method processIndexShardSnapshots.

/**
     * Checks if any new shards should be snapshotted on this node
     *
     * @param event cluster state changed event
     */
private void processIndexShardSnapshots(ClusterChangedEvent event) {
    SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
    Map<Snapshot, SnapshotShards> survivors = new HashMap<>();
    // First, remove snapshots that are no longer there
    for (Map.Entry<Snapshot, SnapshotShards> entry : shardSnapshots.entrySet()) {
        final Snapshot snapshot = entry.getKey();
        if (snapshotsInProgress != null && snapshotsInProgress.snapshot(snapshot) != null) {
            survivors.put(entry.getKey(), entry.getValue());
        } else {
            // state update, which is being processed here
            for (IndexShardSnapshotStatus snapshotStatus : entry.getValue().shards.values()) {
                if (snapshotStatus.stage() == Stage.INIT || snapshotStatus.stage() == Stage.STARTED) {
                    snapshotStatus.abort();
                }
            }
        }
    }
    // For now we will be mostly dealing with a single snapshot at a time but might have multiple simultaneously running
    // snapshots in the future
    Map<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> newSnapshots = new HashMap<>();
    // Now go through all snapshots and update existing or create missing
    final String localNodeId = event.state().nodes().getLocalNodeId();
    final DiscoveryNode masterNode = event.state().nodes().getMasterNode();
    final Map<Snapshot, Map<String, IndexId>> snapshotIndices = new HashMap<>();
    if (snapshotsInProgress != null) {
        for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
            snapshotIndices.put(entry.snapshot(), entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())));
            if (entry.state() == State.STARTED) {
                Map<ShardId, IndexShardSnapshotStatus> startedShards = new HashMap<>();
                SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
                for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
                    // Add all new shards to start processing on
                    if (localNodeId.equals(shard.value.nodeId())) {
                        if (shard.value.state() == State.INIT && (snapshotShards == null || !snapshotShards.shards.containsKey(shard.key))) {
                            logger.trace("[{}] - Adding shard to the queue", shard.key);
                            startedShards.put(shard.key, new IndexShardSnapshotStatus());
                        }
                    }
                }
                if (!startedShards.isEmpty()) {
                    newSnapshots.put(entry.snapshot(), startedShards);
                    if (snapshotShards != null) {
                        // We already saw this snapshot but we need to add more started shards
                        Map<ShardId, IndexShardSnapshotStatus> shards = new HashMap<>();
                        // Put all shards that were already running on this node
                        shards.putAll(snapshotShards.shards);
                        // Put all newly started shards
                        shards.putAll(startedShards);
                        survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(shards)));
                    } else {
                        // Brand new snapshot that we haven't seen before
                        survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(startedShards)));
                    }
                }
            } else if (entry.state() == State.ABORTED) {
                // Abort all running shards for this snapshot
                SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
                if (snapshotShards != null) {
                    for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
                        IndexShardSnapshotStatus snapshotStatus = snapshotShards.shards.get(shard.key);
                        if (snapshotStatus != null) {
                            switch(snapshotStatus.stage()) {
                                case INIT:
                                case STARTED:
                                    snapshotStatus.abort();
                                    break;
                                case FINALIZE:
                                    logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, letting it finish", entry.snapshot(), shard.key);
                                    break;
                                case DONE:
                                    logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshot(), shard.key);
                                    updateIndexShardSnapshotStatus(entry.snapshot(), shard.key, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
                                    break;
                                case FAILURE:
                                    logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshot(), shard.key);
                                    updateIndexShardSnapshotStatus(entry.snapshot(), shard.key, new ShardSnapshotStatus(localNodeId, State.FAILED, snapshotStatus.failure()), masterNode);
                                    break;
                                default:
                                    throw new IllegalStateException("Unknown snapshot shard stage " + snapshotStatus.stage());
                            }
                        }
                    }
                }
            }
        }
    }
    // Update the list of snapshots that we saw and tried to started
    // If startup of these shards fails later, we don't want to try starting these shards again
    shutdownLock.lock();
    try {
        shardSnapshots = unmodifiableMap(survivors);
        if (shardSnapshots.isEmpty()) {
            // Notify all waiting threads that no more snapshots
            shutdownCondition.signalAll();
        }
    } finally {
        shutdownLock.unlock();
    }
    // We have new shards to starts
    if (newSnapshots.isEmpty() == false) {
        Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
        for (final Map.Entry<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> entry : newSnapshots.entrySet()) {
            Map<String, IndexId> indicesMap = snapshotIndices.get(entry.getKey());
            assert indicesMap != null;
            for (final Map.Entry<ShardId, IndexShardSnapshotStatus> shardEntry : entry.getValue().entrySet()) {
                final ShardId shardId = shardEntry.getKey();
                try {
                    final IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShardOrNull(shardId.id());
                    final IndexId indexId = indicesMap.get(shardId.getIndexName());
                    assert indexId != null;
                    executor.execute(new AbstractRunnable() {

                        @Override
                        public void doRun() {
                            snapshot(indexShard, entry.getKey(), indexId, shardEntry.getValue());
                            updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
                        }

                        @Override
                        public void onFailure(Exception e) {
                            logger.warn((Supplier<?>) () -> new ParameterizedMessage("[{}] [{}] failed to create snapshot", shardId, entry.getKey()), e);
                            updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, ExceptionsHelper.detailedMessage(e)), masterNode);
                        }
                    });
                } catch (Exception e) {
                    updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, ExceptionsHelper.detailedMessage(e)), masterNode);
                }
            }
        }
    }
}
Also used : IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) HashMap(java.util.HashMap) ShardId(org.elasticsearch.index.shard.ShardId) Executor(java.util.concurrent.Executor) ClusterStateTaskExecutor(org.elasticsearch.cluster.ClusterStateTaskExecutor) Supplier(org.apache.logging.log4j.util.Supplier) IndexId(org.elasticsearch.repositories.IndexId) IndexShard(org.elasticsearch.index.shard.IndexShard) IndexShardSnapshotFailedException(org.elasticsearch.index.snapshots.IndexShardSnapshotFailedException) SnapshotFailedEngineException(org.elasticsearch.index.engine.SnapshotFailedEngineException) IOException(java.io.IOException) SnapshotsInProgress(org.elasticsearch.cluster.SnapshotsInProgress) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ObjectObjectCursor(com.carrotsearch.hppc.cursors.ObjectObjectCursor) Map(java.util.Map) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap) HashMap(java.util.HashMap) Collections.emptyMap(java.util.Collections.emptyMap) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap)

Example 97 with ParameterizedMessage

use of org.apache.logging.log4j.message.ParameterizedMessage in project elasticsearch by elastic.

the class StatsIT method assertShardExecutionState.

private void assertShardExecutionState(SearchResponse response, int expectedFailures) throws Exception {
    ShardSearchFailure[] failures = response.getShardFailures();
    if (failures.length != expectedFailures) {
        for (ShardSearchFailure failure : failures) {
            logger.error((Supplier<?>) () -> new ParameterizedMessage("Shard Failure: {}", failure), failure.getCause());
        }
        fail("Unexpected shard failures!");
    }
    assertThat("Not all shards are initialized", response.getSuccessfulShards(), equalTo(response.getTotalShards()));
}
Also used : ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ShardSearchFailure(org.elasticsearch.action.search.ShardSearchFailure)

Example 98 with ParameterizedMessage

use of org.apache.logging.log4j.message.ParameterizedMessage in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testAckedIndexing.

/**
     * Test that we do not loose document whose indexing request was successful, under a randomly selected disruption scheme
     * We also collect &amp; report the type of indexing failures that occur.
     * <p>
     * This test is a superset of tests run in the Jepsen test suite, with the exception of versioned updates
     */
@TestLogging("_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE,discovery:TRACE," + "org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," + "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE")
public void testAckedIndexing() throws Exception {
    final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
    final String timeout = seconds + "s";
    final List<String> nodes = startCluster(rarely() ? 5 : 3);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2)).put(IndexSettings.INDEX_SEQ_NO_CHECKPOINT_SYNC_INTERVAL.getKey(), randomBoolean() ? "5s" : "200ms")));
    ensureGreen();
    ServiceDisruptionScheme disruptionScheme = addRandomDisruptionScheme();
    logger.info("disruption scheme [{}] added", disruptionScheme);
    // id -> node sent.
    final ConcurrentHashMap<String, String> ackedDocs = new ConcurrentHashMap<>();
    final AtomicBoolean stop = new AtomicBoolean(false);
    List<Thread> indexers = new ArrayList<>(nodes.size());
    List<Semaphore> semaphores = new ArrayList<>(nodes.size());
    final AtomicInteger idGenerator = new AtomicInteger(0);
    final AtomicReference<CountDownLatch> countDownLatchRef = new AtomicReference<>();
    final List<Exception> exceptedExceptions = Collections.synchronizedList(new ArrayList<Exception>());
    logger.info("starting indexers");
    try {
        for (final String node : nodes) {
            final Semaphore semaphore = new Semaphore(0);
            semaphores.add(semaphore);
            final Client client = client(node);
            final String name = "indexer_" + indexers.size();
            final int numPrimaries = getNumShards("test").numPrimaries;
            Thread thread = new Thread(() -> {
                while (!stop.get()) {
                    String id = null;
                    try {
                        if (!semaphore.tryAcquire(10, TimeUnit.SECONDS)) {
                            continue;
                        }
                        logger.info("[{}] Acquired semaphore and it has {} permits left", name, semaphore.availablePermits());
                        try {
                            id = Integer.toString(idGenerator.incrementAndGet());
                            int shard = Math.floorMod(Murmur3HashFunction.hash(id), numPrimaries);
                            logger.trace("[{}] indexing id [{}] through node [{}] targeting shard [{}]", name, id, node, shard);
                            IndexResponse response = client.prepareIndex("test", "type", id).setSource("{}", XContentType.JSON).setTimeout(timeout).get(timeout);
                            assertEquals(DocWriteResponse.Result.CREATED, response.getResult());
                            ackedDocs.put(id, node);
                            logger.trace("[{}] indexed id [{}] through node [{}]", name, id, node);
                        } catch (ElasticsearchException e) {
                            exceptedExceptions.add(e);
                            final String docId = id;
                            logger.trace((Supplier<?>) () -> new ParameterizedMessage("[{}] failed id [{}] through node [{}]", name, docId, node), e);
                        } finally {
                            countDownLatchRef.get().countDown();
                            logger.trace("[{}] decreased counter : {}", name, countDownLatchRef.get().getCount());
                        }
                    } catch (InterruptedException e) {
                    // fine - semaphore interrupt
                    } catch (AssertionError | Exception e) {
                        logger.info((Supplier<?>) () -> new ParameterizedMessage("unexpected exception in background thread of [{}]", node), e);
                    }
                }
            });
            thread.setName(name);
            thread.start();
            indexers.add(thread);
        }
        int docsPerIndexer = randomInt(3);
        logger.info("indexing {} docs per indexer before partition", docsPerIndexer);
        countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
        for (Semaphore semaphore : semaphores) {
            semaphore.release(docsPerIndexer);
        }
        assertTrue(countDownLatchRef.get().await(1, TimeUnit.MINUTES));
        for (int iter = 1 + randomInt(2); iter > 0; iter--) {
            logger.info("starting disruptions & indexing (iteration [{}])", iter);
            disruptionScheme.startDisrupting();
            docsPerIndexer = 1 + randomInt(5);
            logger.info("indexing {} docs per indexer during partition", docsPerIndexer);
            countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
            Collections.shuffle(semaphores, random());
            for (Semaphore semaphore : semaphores) {
                assertThat(semaphore.availablePermits(), equalTo(0));
                semaphore.release(docsPerIndexer);
            }
            logger.info("waiting for indexing requests to complete");
            assertTrue(countDownLatchRef.get().await(docsPerIndexer * seconds * 1000 + 2000, TimeUnit.MILLISECONDS));
            logger.info("stopping disruption");
            disruptionScheme.stopDisrupting();
            for (String node : internalCluster().getNodeNames()) {
                ensureStableCluster(nodes.size(), TimeValue.timeValueMillis(disruptionScheme.expectedTimeToHeal().millis() + DISRUPTION_HEALING_OVERHEAD.millis()), true, node);
            }
            ensureGreen("test");
            logger.info("validating successful docs");
            assertBusy(() -> {
                for (String node : nodes) {
                    try {
                        logger.debug("validating through node [{}] ([{}] acked docs)", node, ackedDocs.size());
                        for (String id : ackedDocs.keySet()) {
                            assertTrue("doc [" + id + "] indexed via node [" + ackedDocs.get(id) + "] not found", client(node).prepareGet("test", "type", id).setPreference("_local").get().isExists());
                        }
                    } catch (AssertionError | NoShardAvailableActionException e) {
                        throw new AssertionError(e.getMessage() + " (checked via node [" + node + "]", e);
                    }
                }
            }, 30, TimeUnit.SECONDS);
            logger.info("done validating (iteration [{}])", iter);
        }
    } finally {
        if (exceptedExceptions.size() > 0) {
            StringBuilder sb = new StringBuilder();
            for (Exception e : exceptedExceptions) {
                sb.append("\n").append(e.getMessage());
            }
            logger.debug("Indexing exceptions during disruption: {}", sb);
        }
        logger.info("shutting down indexers");
        stop.set(true);
        for (Thread indexer : indexers) {
            indexer.interrupt();
            indexer.join(60000);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) ServiceDisruptionScheme(org.elasticsearch.test.disruption.ServiceDisruptionScheme) Semaphore(java.util.concurrent.Semaphore) ElasticsearchException(org.elasticsearch.ElasticsearchException) Supplier(org.apache.logging.log4j.util.Supplier) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Client(org.elasticsearch.client.Client) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ElasticsearchException(org.elasticsearch.ElasticsearchException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NoShardAvailableActionException(org.elasticsearch.action.NoShardAvailableActionException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) NoShardAvailableActionException(org.elasticsearch.action.NoShardAvailableActionException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IndexResponse(org.elasticsearch.action.index.IndexResponse) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) TestLogging(org.elasticsearch.test.junit.annotations.TestLogging)

Example 99 with ParameterizedMessage

use of org.apache.logging.log4j.message.ParameterizedMessage in project elasticsearch by elastic.

the class TranslogTests method testConcurrentWriteViewsAndSnapshot.

/**
     * Tests that concurrent readers and writes maintain view and snapshot semantics
     */
public void testConcurrentWriteViewsAndSnapshot() throws Throwable {
    final Thread[] writers = new Thread[randomIntBetween(1, 10)];
    final Thread[] readers = new Thread[randomIntBetween(1, 10)];
    final int flushEveryOps = randomIntBetween(5, 100);
    // used to notify main thread that so many operations have been written so it can simulate a flush
    final AtomicReference<CountDownLatch> writtenOpsLatch = new AtomicReference<>(new CountDownLatch(0));
    final AtomicLong idGenerator = new AtomicLong();
    final CyclicBarrier barrier = new CyclicBarrier(writers.length + readers.length + 1);
    // a map of all written ops and their returned location.
    final Map<Translog.Operation, Translog.Location> writtenOps = ConcurrentCollections.newConcurrentMap();
    // a signal for all threads to stop
    final AtomicBoolean run = new AtomicBoolean(true);
    // any errors on threads
    final List<Exception> errors = new CopyOnWriteArrayList<>();
    logger.debug("using [{}] readers. [{}] writers. flushing every ~[{}] ops.", readers.length, writers.length, flushEveryOps);
    for (int i = 0; i < writers.length; i++) {
        final String threadName = "writer_" + i;
        final int threadId = i;
        writers[i] = new Thread(new AbstractRunnable() {

            @Override
            public void doRun() throws BrokenBarrierException, InterruptedException, IOException {
                barrier.await();
                int counter = 0;
                while (run.get()) {
                    long id = idGenerator.incrementAndGet();
                    final Translog.Operation op;
                    final Translog.Operation.Type type = Translog.Operation.Type.values()[((int) (id % Translog.Operation.Type.values().length))];
                    switch(type) {
                        case CREATE:
                        case INDEX:
                            op = new Translog.Index("type", "" + id, new byte[] { (byte) id });
                            break;
                        case DELETE:
                            op = new Translog.Delete(newUid("" + id));
                            break;
                        case NO_OP:
                            op = new Translog.NoOp(id, id, Long.toString(id));
                            break;
                        default:
                            throw new AssertionError("unsupported operation type [" + type + "]");
                    }
                    Translog.Location location = translog.add(op);
                    Translog.Location existing = writtenOps.put(op, location);
                    if (existing != null) {
                        fail("duplicate op [" + op + "], old entry at " + location);
                    }
                    if (id % writers.length == threadId) {
                        translog.ensureSynced(location);
                    }
                    writtenOpsLatch.get().countDown();
                    counter++;
                }
                logger.debug("--> [{}] done. wrote [{}] ops.", threadName, counter);
            }

            @Override
            public void onFailure(Exception e) {
                logger.error((Supplier<?>) () -> new ParameterizedMessage("--> writer [{}] had an error", threadName), e);
                errors.add(e);
            }
        }, threadName);
        writers[i].start();
    }
    for (int i = 0; i < readers.length; i++) {
        final String threadId = "reader_" + i;
        readers[i] = new Thread(new AbstractRunnable() {

            Translog.View view = null;

            Set<Translog.Operation> writtenOpsAtView;

            @Override
            public void onFailure(Exception e) {
                logger.error((Supplier<?>) () -> new ParameterizedMessage("--> reader [{}] had an error", threadId), e);
                errors.add(e);
                try {
                    closeView();
                } catch (IOException inner) {
                    inner.addSuppressed(e);
                    logger.error("unexpected error while closing view, after failure", inner);
                }
            }

            void closeView() throws IOException {
                if (view != null) {
                    view.close();
                }
            }

            void newView() throws IOException {
                closeView();
                view = translog.newView();
                // captures the currently written ops so we know what to expect from the view
                writtenOpsAtView = new HashSet<>(writtenOps.keySet());
                logger.debug("--> [{}] opened view from [{}]", threadId, view.minTranslogGeneration());
            }

            @Override
            protected void doRun() throws Exception {
                barrier.await();
                int iter = 0;
                while (run.get()) {
                    if (iter++ % 10 == 0) {
                        newView();
                    }
                    // captures al views that are written since the view was created (with a small caveat see bellow)
                    // these are what we expect the snapshot to return (and potentially some more).
                    Set<Translog.Operation> expectedOps = new HashSet<>(writtenOps.keySet());
                    expectedOps.removeAll(writtenOpsAtView);
                    Translog.Snapshot snapshot = view.snapshot();
                    Translog.Operation op;
                    while ((op = snapshot.next()) != null) {
                        expectedOps.remove(op);
                    }
                    if (expectedOps.isEmpty() == false) {
                        StringBuilder missed = new StringBuilder("missed ").append(expectedOps.size()).append(" operations");
                        boolean failed = false;
                        for (Translog.Operation expectedOp : expectedOps) {
                            final Translog.Location loc = writtenOps.get(expectedOp);
                            if (loc.generation < view.minTranslogGeneration()) {
                                // may yet be available in writtenOpsAtView, meaning we will erroneously expect them
                                continue;
                            }
                            failed = true;
                            missed.append("\n --> [").append(expectedOp).append("] written at ").append(loc);
                        }
                        if (failed) {
                            fail(missed.toString());
                        }
                    }
                    // slow down things a bit and spread out testing..
                    writtenOpsLatch.get().await(200, TimeUnit.MILLISECONDS);
                }
                closeView();
                logger.debug("--> [{}] done. tested [{}] snapshots", threadId, iter);
            }
        }, threadId);
        readers[i].start();
    }
    barrier.await();
    try {
        for (int iterations = scaledRandomIntBetween(10, 200); iterations > 0 && errors.isEmpty(); iterations--) {
            writtenOpsLatch.set(new CountDownLatch(flushEveryOps));
            while (writtenOpsLatch.get().await(200, TimeUnit.MILLISECONDS) == false) {
                if (errors.size() > 0) {
                    break;
                }
            }
            translog.commit();
        }
    } finally {
        run.set(false);
        logger.debug("--> waiting for threads to stop");
        for (Thread thread : writers) {
            thread.join();
        }
        for (Thread thread : readers) {
            thread.join();
        }
        if (errors.size() > 0) {
            Throwable e = errors.get(0);
            for (Throwable suppress : errors.subList(1, errors.size())) {
                e.addSuppressed(suppress);
            }
            throw e;
        }
        logger.info("--> test done. total ops written [{}]", writtenOps.size());
    }
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) Set(java.util.Set) HashSet(java.util.HashSet) Matchers.hasToString(org.hamcrest.Matchers.hasToString) Matchers.containsString(org.hamcrest.Matchers.containsString) Location(org.elasticsearch.index.translog.Translog.Location) HashSet(java.util.HashSet) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) EOFException(java.io.EOFException) InvalidPathException(java.nio.file.InvalidPathException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) CyclicBarrier(java.util.concurrent.CyclicBarrier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Location(org.elasticsearch.index.translog.Translog.Location) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 100 with ParameterizedMessage

use of org.apache.logging.log4j.message.ParameterizedMessage in project elasticsearch by elastic.

the class GceUnicastHostsProvider method buildDynamicNodes.

/**
     * We build the list of Nodes from GCE Management API
     * Information can be cached using `cloud.gce.refresh_interval` property if needed.
     */
@Override
public List<DiscoveryNode> buildDynamicNodes() {
    // We check that needed properties have been set
    if (this.project == null || this.project.isEmpty() || this.zones == null || this.zones.isEmpty()) {
        throw new IllegalArgumentException("one or more gce discovery settings are missing. " + "Check elasticsearch.yml file. Should have [" + GceInstancesService.PROJECT_SETTING.getKey() + "] and [" + GceInstancesService.ZONE_SETTING.getKey() + "].");
    }
    if (refreshInterval.millis() != 0) {
        if (cachedDiscoNodes != null && (refreshInterval.millis() < 0 || (System.currentTimeMillis() - lastRefresh) < refreshInterval.millis())) {
            if (logger.isTraceEnabled())
                logger.trace("using cache to retrieve node list");
            return cachedDiscoNodes;
        }
        lastRefresh = System.currentTimeMillis();
    }
    logger.debug("start building nodes list using GCE API");
    cachedDiscoNodes = new ArrayList<>();
    String ipAddress = null;
    try {
        InetAddress inetAddress = networkService.resolvePublishHostAddresses(null);
        if (inetAddress != null) {
            ipAddress = NetworkAddress.format(inetAddress);
        }
    } catch (IOException e) {
    // We can't find the publish host address... Hmmm. Too bad :-(
    // We won't simply filter it
    }
    try {
        Collection<Instance> instances = gceInstancesService.instances();
        if (instances == null) {
            logger.trace("no instance found for project [{}], zones [{}].", this.project, this.zones);
            return cachedDiscoNodes;
        }
        for (Instance instance : instances) {
            String name = instance.getName();
            String type = instance.getMachineType();
            String status = instance.getStatus();
            logger.trace("gce instance {} with status {} found.", name, status);
            // See https://github.com/elastic/elasticsearch-cloud-gce/issues/3
            if (Status.TERMINATED.equals(status)) {
                logger.debug("node {} is TERMINATED. Ignoring", name);
                continue;
            }
            // see if we need to filter by tag
            boolean filterByTag = false;
            if (tags.isEmpty() == false) {
                logger.trace("start filtering instance {} with tags {}.", name, tags);
                if (instance.getTags() == null || instance.getTags().isEmpty() || instance.getTags().getItems() == null || instance.getTags().getItems().isEmpty()) {
                    // If this instance have no tag, we filter it
                    logger.trace("no tags for this instance but we asked for tags. {} won't be part of the cluster.", name);
                    filterByTag = true;
                } else {
                    // check that all tags listed are there on the instance
                    logger.trace("comparing instance tags {} with tags filter {}.", instance.getTags().getItems(), tags);
                    for (String tag : tags) {
                        boolean found = false;
                        for (String instancetag : instance.getTags().getItems()) {
                            if (instancetag.equals(tag)) {
                                found = true;
                                break;
                            }
                        }
                        if (!found) {
                            filterByTag = true;
                            break;
                        }
                    }
                }
            }
            if (filterByTag) {
                logger.trace("filtering out instance {} based tags {}, not part of {}", name, tags, instance.getTags() == null || instance.getTags().getItems() == null ? "" : instance.getTags());
                continue;
            } else {
                logger.trace("instance {} with tags {} is added to discovery", name, tags);
            }
            String ip_public = null;
            String ip_private = null;
            List<NetworkInterface> interfaces = instance.getNetworkInterfaces();
            for (NetworkInterface networkInterface : interfaces) {
                if (ip_public == null) {
                    // Trying to get Public IP Address (For future use)
                    if (networkInterface.getAccessConfigs() != null) {
                        for (AccessConfig accessConfig : networkInterface.getAccessConfigs()) {
                            if (Strings.hasText(accessConfig.getNatIP())) {
                                ip_public = accessConfig.getNatIP();
                                break;
                            }
                        }
                    }
                }
                if (ip_private == null) {
                    ip_private = networkInterface.getNetworkIP();
                }
                // If we have both public and private, we can stop here
                if (ip_private != null && ip_public != null)
                    break;
            }
            try {
                if (ip_private.equals(ipAddress)) {
                    // We found the current node.
                    // We can ignore it in the list of DiscoveryNode
                    logger.trace("current node found. Ignoring {} - {}", name, ip_private);
                } else {
                    String address = ip_private;
                    // Test if we have es_port metadata defined here
                    if (instance.getMetadata() != null && instance.getMetadata().containsKey("es_port")) {
                        Object es_port = instance.getMetadata().get("es_port");
                        logger.trace("es_port is defined with {}", es_port);
                        if (es_port instanceof String) {
                            address = address.concat(":").concat((String) es_port);
                        } else {
                            // Ignoring other values
                            logger.trace("es_port is instance of {}. Ignoring...", es_port.getClass().getName());
                        }
                    }
                    // ip_private is a single IP Address. We need to build a TransportAddress from it
                    // If user has set `es_port` metadata, we don't need to ping all ports
                    // we only limit to 1 addresses, makes no sense to ping 100 ports
                    TransportAddress[] addresses = transportService.addressesFromString(address, 1);
                    for (TransportAddress transportAddress : addresses) {
                        logger.trace("adding {}, type {}, address {}, transport_address {}, status {}", name, type, ip_private, transportAddress, status);
                        cachedDiscoNodes.add(new DiscoveryNode("#cloud-" + name + "-" + 0, transportAddress, emptyMap(), emptySet(), Version.CURRENT.minimumCompatibilityVersion()));
                    }
                }
            } catch (Exception e) {
                final String finalIpPrivate = ip_private;
                logger.warn((Supplier<?>) () -> new ParameterizedMessage("failed to add {}, address {}", name, finalIpPrivate), e);
            }
        }
    } catch (Exception e) {
        logger.warn("exception caught during discovery", e);
    }
    logger.debug("{} node(s) added", cachedDiscoNodes.size());
    logger.debug("using dynamic discovery nodes {}", cachedDiscoNodes);
    return cachedDiscoNodes;
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) Instance(com.google.api.services.compute.model.Instance) TransportAddress(org.elasticsearch.common.transport.TransportAddress) NetworkInterface(com.google.api.services.compute.model.NetworkInterface) IOException(java.io.IOException) AccessConfig(com.google.api.services.compute.model.AccessConfig) IOException(java.io.IOException) Supplier(org.apache.logging.log4j.util.Supplier) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) InetAddress(java.net.InetAddress)

Aggregations

ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)131 Supplier (org.apache.logging.log4j.util.Supplier)90 IOException (java.io.IOException)75 ElasticsearchException (org.elasticsearch.ElasticsearchException)38 ArrayList (java.util.ArrayList)28 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)26 ClusterState (org.elasticsearch.cluster.ClusterState)25 HashMap (java.util.HashMap)16 TimeValue (org.elasticsearch.common.unit.TimeValue)14 TransportException (org.elasticsearch.transport.TransportException)14 List (java.util.List)13 Supplier (java.util.function.Supplier)13 Map (java.util.Map)12 CountDownLatch (java.util.concurrent.CountDownLatch)12 ExecutionException (java.util.concurrent.ExecutionException)12 Settings (org.elasticsearch.common.settings.Settings)12 EsRejectedExecutionException (org.elasticsearch.common.util.concurrent.EsRejectedExecutionException)12 AbstractRunnable (org.elasticsearch.common.util.concurrent.AbstractRunnable)11 Index (org.elasticsearch.index.Index)11 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)10