use of org.apache.logging.log4j.util.Supplier in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method testStaleMasterNotHijackingMajority.
/**
* Tests that emulates a frozen elected master node that unfreezes and pushes his cluster state to other nodes
* that already are following another elected master node. These nodes should reject this cluster state and prevent
* them from following the stale master.
*/
@TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE,org.elasticsearch.test.disruption:TRACE")
public void testStaleMasterNotHijackingMajority() throws Exception {
// 3 node cluster with unicast discovery and minimum_master_nodes set to 2:
final List<String> nodes = startCluster(3, 2);
// Save the current master node as old master node, because that node will get frozen
final String oldMasterNode = internalCluster().getMasterName();
for (String node : nodes) {
ensureStableCluster(3, node);
}
assertMaster(oldMasterNode, nodes);
// Simulating a painful gc by suspending all threads for a long time on the current elected master node.
SingleNodeDisruption masterNodeDisruption = new LongGCDisruption(random(), oldMasterNode);
// Save the majority side
final List<String> majoritySide = new ArrayList<>(nodes);
majoritySide.remove(oldMasterNode);
// Keeps track of the previous and current master when a master node transition took place on each node on the majority side:
final Map<String, List<Tuple<String, String>>> masters = Collections.synchronizedMap(new HashMap<String, List<Tuple<String, String>>>());
for (final String node : majoritySide) {
masters.put(node, new ArrayList<Tuple<String, String>>());
internalCluster().getInstance(ClusterService.class, node).addListener(event -> {
DiscoveryNode previousMaster = event.previousState().nodes().getMasterNode();
DiscoveryNode currentMaster = event.state().nodes().getMasterNode();
if (!Objects.equals(previousMaster, currentMaster)) {
logger.info("node {} received new cluster state: {} \n and had previous cluster state: {}", node, event.state(), event.previousState());
String previousMasterNodeName = previousMaster != null ? previousMaster.getName() : null;
String currentMasterNodeName = currentMaster != null ? currentMaster.getName() : null;
masters.get(node).add(new Tuple<>(previousMasterNodeName, currentMasterNodeName));
}
});
}
final CountDownLatch oldMasterNodeSteppedDown = new CountDownLatch(1);
internalCluster().getInstance(ClusterService.class, oldMasterNode).addListener(event -> {
if (event.state().nodes().getMasterNodeId() == null) {
oldMasterNodeSteppedDown.countDown();
}
});
internalCluster().setDisruptionScheme(masterNodeDisruption);
logger.info("freezing node [{}]", oldMasterNode);
masterNodeDisruption.startDisrupting();
// Wait for the majority side to get stable
assertDifferentMaster(majoritySide.get(0), oldMasterNode);
assertDifferentMaster(majoritySide.get(1), oldMasterNode);
// the test is periodically tripping on the following assertion. To find out which threads are blocking the nodes from making
// progress we print a stack dump
boolean failed = true;
try {
assertDiscoveryCompleted(majoritySide);
failed = false;
} finally {
if (failed) {
logger.error("discovery failed to complete, probably caused by a blocked thread: {}", new HotThreads().busiestThreads(Integer.MAX_VALUE).ignoreIdleThreads(false).detect());
}
}
// The old master node is frozen, but here we submit a cluster state update task that doesn't get executed,
// but will be queued and once the old master node un-freezes it gets executed.
// The old master node will send this update + the cluster state where he is flagged as master to the other
// nodes that follow the new master. These nodes should ignore this update.
internalCluster().getInstance(ClusterService.class, oldMasterNode).submitStateUpdateTask("sneaky-update", new ClusterStateUpdateTask(Priority.IMMEDIATE) {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
return ClusterState.builder(currentState).build();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("failure [{}]", source), e);
}
});
// Save the new elected master node
final String newMasterNode = internalCluster().getMasterName(majoritySide.get(0));
logger.info("new detected master node [{}]", newMasterNode);
// Stop disruption
logger.info("Unfreeze node [{}]", oldMasterNode);
masterNodeDisruption.stopDisrupting();
oldMasterNodeSteppedDown.await(30, TimeUnit.SECONDS);
// Make sure that the end state is consistent on all nodes:
assertDiscoveryCompleted(nodes);
assertMaster(newMasterNode, nodes);
assertThat(masters.size(), equalTo(2));
for (Map.Entry<String, List<Tuple<String, String>>> entry : masters.entrySet()) {
String nodeName = entry.getKey();
List<Tuple<String, String>> recordedMasterTransition = entry.getValue();
assertThat("[" + nodeName + "] Each node should only record two master node transitions", recordedMasterTransition.size(), equalTo(2));
assertThat("[" + nodeName + "] First transition's previous master should be [null]", recordedMasterTransition.get(0).v1(), equalTo(oldMasterNode));
assertThat("[" + nodeName + "] First transition's current master should be [" + newMasterNode + "]", recordedMasterTransition.get(0).v2(), nullValue());
assertThat("[" + nodeName + "] Second transition's previous master should be [null]", recordedMasterTransition.get(1).v1(), nullValue());
assertThat("[" + nodeName + "] Second transition's current master should be [" + newMasterNode + "]", recordedMasterTransition.get(1).v2(), equalTo(newMasterNode));
}
}
use of org.apache.logging.log4j.util.Supplier in project elasticsearch by elastic.
the class AbstractS3SnapshotRestoreTest method cleanRepositoryFiles.
/**
* Deletes content of the repository files in the bucket
*/
public void cleanRepositoryFiles(String basePath) {
Settings settings = internalCluster().getInstance(Settings.class);
Settings[] buckets = { settings.getByPrefix("repositories.s3."), settings.getByPrefix("repositories.s3.private-bucket."), settings.getByPrefix("repositories.s3.remote-bucket."), settings.getByPrefix("repositories.s3.external-bucket.") };
for (Settings bucket : buckets) {
String bucketName = bucket.get("bucket");
// We check that settings has been set in elasticsearch.yml integration test file
// as described in README
assertThat("Your settings in elasticsearch.yml are incorrects. Check README file.", bucketName, notNullValue());
AmazonS3 client = internalCluster().getInstance(AwsS3Service.class).client(Settings.EMPTY, null, randomBoolean(), null);
try {
ObjectListing prevListing = null;
//From http://docs.amazonwebservices.com/AmazonS3/latest/dev/DeletingMultipleObjectsUsingJava.html
//we can do at most 1K objects per delete
//We don't know the bucket name until first object listing
DeleteObjectsRequest multiObjectDeleteRequest = null;
ArrayList<DeleteObjectsRequest.KeyVersion> keys = new ArrayList<DeleteObjectsRequest.KeyVersion>();
while (true) {
ObjectListing list;
if (prevListing != null) {
list = client.listNextBatchOfObjects(prevListing);
} else {
list = client.listObjects(bucketName, basePath);
multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
}
for (S3ObjectSummary summary : list.getObjectSummaries()) {
keys.add(new DeleteObjectsRequest.KeyVersion(summary.getKey()));
//Every 500 objects batch the delete request
if (keys.size() > 500) {
multiObjectDeleteRequest.setKeys(keys);
client.deleteObjects(multiObjectDeleteRequest);
multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
keys.clear();
}
}
if (list.isTruncated()) {
prevListing = list;
} else {
break;
}
}
if (!keys.isEmpty()) {
multiObjectDeleteRequest.setKeys(keys);
client.deleteObjects(multiObjectDeleteRequest);
}
} catch (Exception ex) {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("Failed to delete S3 repository [{}]", bucketName), ex);
}
}
}
use of org.apache.logging.log4j.util.Supplier in project elasticsearch by elastic.
the class TaskResultsService method taskResultIndexMapping.
public String taskResultIndexMapping() {
try (InputStream is = getClass().getResourceAsStream(TASK_RESULT_INDEX_MAPPING_FILE)) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
Streams.copy(is, out);
return out.toString(IOUtils.UTF_8);
} catch (Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("failed to create tasks results index template [{}]", TASK_RESULT_INDEX_MAPPING_FILE), e);
throw new IllegalStateException("failed to create tasks results index template [" + TASK_RESULT_INDEX_MAPPING_FILE + "]", e);
}
}
use of org.apache.logging.log4j.util.Supplier in project elasticsearch by elastic.
the class TcpTransport method onException.
protected void onException(Channel channel, Exception e) throws IOException {
if (!lifecycle.started()) {
// just close and ignore - we are already stopped and just need to make sure we release all resources
disconnectFromNodeChannel(channel, e);
return;
}
if (isCloseConnectionException(e)) {
logger.trace((Supplier<?>) () -> new ParameterizedMessage("close connection exception caught on transport layer [{}], disconnecting from relevant node", channel), e);
// close the channel, which will cause a node to be disconnected if relevant
disconnectFromNodeChannel(channel, e);
} else if (isConnectException(e)) {
logger.trace((Supplier<?>) () -> new ParameterizedMessage("connect exception caught on transport layer [{}]", channel), e);
// close the channel as safe measure, which will cause a node to be disconnected if relevant
disconnectFromNodeChannel(channel, e);
} else if (e instanceof BindException) {
logger.trace((Supplier<?>) () -> new ParameterizedMessage("bind exception caught on transport layer [{}]", channel), e);
// close the channel as safe measure, which will cause a node to be disconnected if relevant
disconnectFromNodeChannel(channel, e);
} else if (e instanceof CancelledKeyException) {
logger.trace((Supplier<?>) () -> new ParameterizedMessage("cancelled key exception caught on transport layer [{}], disconnecting from relevant node", channel), e);
// close the channel as safe measure, which will cause a node to be disconnected if relevant
disconnectFromNodeChannel(channel, e);
} else if (e instanceof TcpTransport.HttpOnTransportException) {
// in case we are able to return data, serialize the exception content and sent it back to the client
if (isOpen(channel)) {
final Runnable closeChannel = () -> {
try {
closeChannels(Collections.singletonList(channel));
} catch (IOException e1) {
logger.debug("failed to close httpOnTransport channel", e1);
}
};
boolean success = false;
try {
sendMessage(channel, new BytesArray(e.getMessage().getBytes(StandardCharsets.UTF_8)), closeChannel);
success = true;
} finally {
if (success == false) {
// it's fine to call this more than once
closeChannel.run();
}
}
}
} else {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("exception caught on transport layer [{}], closing connection", channel), e);
// close the channel, which will cause a node to be disconnected if relevant
disconnectFromNodeChannel(channel, e);
}
}
use of org.apache.logging.log4j.util.Supplier in project elasticsearch by elastic.
the class TcpTransport method connectToNode.
@Override
public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile, CheckedBiConsumer<Connection, ConnectionProfile, IOException> connectionValidator) throws ConnectTransportException {
connectionProfile = resolveConnectionProfile(connectionProfile, defaultConnectionProfile);
if (node == null) {
throw new ConnectTransportException(null, "can't connect to a null node");
}
// ensure we don't open connections while we are closing
globalLock.readLock().lock();
try {
ensureOpen();
try (Releasable ignored = connectionLock.acquire(node.getId())) {
NodeChannels nodeChannels = connectedNodes.get(node);
if (nodeChannels != null) {
return;
}
try {
try {
nodeChannels = openConnection(node, connectionProfile);
connectionValidator.accept(nodeChannels, connectionProfile);
} catch (Exception e) {
logger.trace((Supplier<?>) () -> new ParameterizedMessage("failed to connect to [{}], cleaning dangling connections", node), e);
IOUtils.closeWhileHandlingException(nodeChannels);
throw e;
}
// we acquire a connection lock, so no way there is an existing connection
connectedNodes.put(node, nodeChannels);
if (logger.isDebugEnabled()) {
logger.debug("connected to node [{}]", node);
}
transportServiceAdapter.onNodeConnected(node);
} catch (ConnectTransportException e) {
throw e;
} catch (Exception e) {
throw new ConnectTransportException(node, "general node connection failure", e);
}
}
} finally {
globalLock.readLock().unlock();
}
}
Aggregations