use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.
the class FsHealthServiceTests method testLoggingOnHungIO.
@TestLogging(value = "org.opensearch.monitor.fs:WARN", reason = "to ensure that we log on hung IO at WARN level")
public void testLoggingOnHungIO() throws Exception {
long slowLogThreshold = randomLongBetween(100, 200);
final Settings settings = Settings.builder().put(FsHealthService.SLOW_PATH_LOGGING_THRESHOLD_SETTING.getKey(), slowLogThreshold + "ms").build();
FileSystem fileSystem = PathUtils.getDefaultFileSystem();
TestThreadPool testThreadPool = new TestThreadPool(getClass().getName(), settings);
FileSystemFsyncHungProvider disruptFileSystemProvider = new FileSystemFsyncHungProvider(fileSystem, randomLongBetween(slowLogThreshold + 1, 400), testThreadPool);
fileSystem = disruptFileSystemProvider.getFileSystem(null);
PathUtilsForTesting.installMock(fileSystem);
final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
try (MockLogAppender mockAppender = MockLogAppender.createForLoggers(LogManager.getLogger(FsHealthService.class));
NodeEnvironment env = newNodeEnvironment()) {
FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
int counter = 0;
for (Path path : env.nodeDataPaths()) {
mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test" + ++counter, FsHealthService.class.getCanonicalName(), Level.WARN, "health check of [" + path + "] took [*ms] which is above the warn threshold*"));
}
// disrupt file system
disruptFileSystemProvider.injectIODelay.set(true);
fsHealthService.new FsHealthMonitor().run();
assertEquals(env.nodeDataPaths().length, disruptFileSystemProvider.getInjectedPathCount());
assertBusy(mockAppender::assertAllExpectationsMatched);
} finally {
PathUtilsForTesting.teardown();
ThreadPool.terminate(testThreadPool, 500, TimeUnit.MILLISECONDS);
}
}
use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.
the class FsHealthServiceTests method testFailsHealthOnUnexpectedLockFileSize.
public void testFailsHealthOnUnexpectedLockFileSize() throws IOException {
FileSystem fileSystem = PathUtils.getDefaultFileSystem();
final Settings settings = Settings.EMPTY;
TestThreadPool testThreadPool = new TestThreadPool(getClass().getName(), settings);
FileSystemUnexpectedLockFileSizeProvider unexpectedLockFileSizeFileSystemProvider = new FileSystemUnexpectedLockFileSizeProvider(fileSystem, 1, testThreadPool);
fileSystem = unexpectedLockFileSizeFileSystemProvider.getFileSystem(null);
PathUtilsForTesting.installMock(fileSystem);
final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
try (NodeEnvironment env = newNodeEnvironment()) {
FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
fsHealthService.new FsHealthMonitor().run();
assertEquals(HEALTHY, fsHealthService.getHealth().getStatus());
assertEquals("health check passed", fsHealthService.getHealth().getInfo());
// enabling unexpected file size injection
unexpectedLockFileSizeFileSystemProvider.injectUnexpectedFileSize.set(true);
fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
fsHealthService.new FsHealthMonitor().run();
assertEquals(UNHEALTHY, fsHealthService.getHealth().getStatus());
assertThat(fsHealthService.getHealth().getInfo(), is("health check failed due to broken node lock"));
assertEquals(1, unexpectedLockFileSizeFileSystemProvider.getInjectedPathCount());
} finally {
unexpectedLockFileSizeFileSystemProvider.injectUnexpectedFileSize.set(false);
PathUtilsForTesting.teardown();
ThreadPool.terminate(testThreadPool, 500, TimeUnit.MILLISECONDS);
}
}
use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.
the class FsHealthServiceTests method testFailsHealthOnSinglePathWriteFailure.
public void testFailsHealthOnSinglePathWriteFailure() throws IOException {
FileSystem fileSystem = PathUtils.getDefaultFileSystem();
FileSystemIOExceptionProvider disruptWritesFileSystemProvider = new FileSystemIOExceptionProvider(fileSystem);
fileSystem = disruptWritesFileSystemProvider.getFileSystem(null);
PathUtilsForTesting.installMock(fileSystem);
final Settings settings = Settings.EMPTY;
final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
TestThreadPool testThreadPool = new TestThreadPool(getClass().getName(), settings);
try (NodeEnvironment env = newNodeEnvironment()) {
Path[] paths = env.nodeDataPaths();
FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
fsHealthService.new FsHealthMonitor().run();
assertEquals(HEALTHY, fsHealthService.getHealth().getStatus());
assertEquals("health check passed", fsHealthService.getHealth().getInfo());
// disrupt file system writes on single path
String disruptedPath = randomFrom(paths).toString();
disruptWritesFileSystemProvider.restrictPathPrefix(disruptedPath);
disruptWritesFileSystemProvider.injectIOException.set(true);
fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
fsHealthService.new FsHealthMonitor().run();
assertEquals(UNHEALTHY, fsHealthService.getHealth().getStatus());
assertThat(fsHealthService.getHealth().getInfo(), is("health check failed on [" + disruptedPath + "]"));
assertEquals(1, disruptWritesFileSystemProvider.getInjectedPathCount());
} finally {
disruptWritesFileSystemProvider.injectIOException.set(false);
PathUtilsForTesting.teardown();
ThreadPool.terminate(testThreadPool, 500, TimeUnit.MILLISECONDS);
}
}
use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.
the class FsHealthServiceTests method testSchedulesHealthCheckAtRefreshIntervals.
public void testSchedulesHealthCheckAtRefreshIntervals() throws Exception {
long refreshInterval = randomLongBetween(1000, 12000);
final Settings settings = Settings.builder().put(FsHealthService.REFRESH_INTERVAL_SETTING.getKey(), refreshInterval + "ms").build();
final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
try (NodeEnvironment env = newNodeEnvironment()) {
FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, deterministicTaskQueue.getThreadPool(), env);
final long startTimeMillis = deterministicTaskQueue.getCurrentTimeMillis();
fsHealthService.doStart();
assertFalse(deterministicTaskQueue.hasRunnableTasks());
assertTrue(deterministicTaskQueue.hasDeferredTasks());
int rescheduledCount = 0;
for (int i = 1; i <= randomIntBetween(5, 10); i++) {
if (deterministicTaskQueue.hasRunnableTasks()) {
deterministicTaskQueue.runRandomTask();
} else {
assertThat(deterministicTaskQueue.getLatestDeferredExecutionTime(), is(refreshInterval * (rescheduledCount + 1)));
deterministicTaskQueue.advanceTime();
rescheduledCount++;
}
assertThat(deterministicTaskQueue.getCurrentTimeMillis() - startTimeMillis, is(refreshInterval * rescheduledCount));
}
fsHealthService.doStop();
deterministicTaskQueue.runAllTasksInTimeOrder();
assertFalse(deterministicTaskQueue.hasRunnableTasks());
assertFalse(deterministicTaskQueue.hasDeferredTasks());
}
}
use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.
the class FsHealthServiceTests method testFailsHealthOnHungIOBeyondHealthyTimeout.
public void testFailsHealthOnHungIOBeyondHealthyTimeout() throws Exception {
long healthyTimeoutThreshold = randomLongBetween(500, 1000);
long refreshInterval = randomLongBetween(500, 1000);
long slowLogThreshold = randomLongBetween(100, 200);
long delayBetweenChecks = 100;
final Settings settings = Settings.builder().put(FsHealthService.HEALTHY_TIMEOUT_SETTING.getKey(), healthyTimeoutThreshold + "ms").put(FsHealthService.REFRESH_INTERVAL_SETTING.getKey(), refreshInterval + "ms").put(FsHealthService.SLOW_PATH_LOGGING_THRESHOLD_SETTING.getKey(), slowLogThreshold + "ms").put(ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING.getKey(), // we need to verify exact time
0).build();
FileSystem fileSystem = PathUtils.getDefaultFileSystem();
TestThreadPool testThreadPool = new TestThreadPool(getClass().getName(), settings);
FileSystemFsyncHungProvider disruptFileSystemProvider = new FileSystemFsyncHungProvider(fileSystem, testThreadPool);
fileSystem = disruptFileSystemProvider.getFileSystem(null);
PathUtilsForTesting.installMock(fileSystem);
final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
try (NodeEnvironment env = newNodeEnvironment()) {
FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
logger.info("--> Initial health status prior to the first monitor run");
StatusInfo fsHealth = fsHealthService.getHealth();
assertEquals(HEALTHY, fsHealth.getStatus());
assertEquals("health check passed", fsHealth.getInfo());
logger.info("--> First monitor run");
fsHealthService.new FsHealthMonitor().run();
fsHealth = fsHealthService.getHealth();
assertEquals(HEALTHY, fsHealth.getStatus());
assertEquals("health check passed", fsHealth.getInfo());
logger.info("--> Disrupt file system");
disruptFileSystemProvider.injectIODelay.set(true);
final FsHealthService fsHealthSrvc = new FsHealthService(settings, clusterSettings, testThreadPool, env);
fsHealthSrvc.doStart();
waitUntil(() -> fsHealthSrvc.getHealth().getStatus() == UNHEALTHY, healthyTimeoutThreshold + (2 * refreshInterval), TimeUnit.MILLISECONDS);
fsHealth = fsHealthSrvc.getHealth();
assertEquals(UNHEALTHY, fsHealth.getStatus());
assertEquals("healthy threshold breached", fsHealth.getInfo());
int disruptedPathCount = disruptFileSystemProvider.getInjectedPathCount();
assertThat(disruptedPathCount, equalTo(1));
logger.info("--> Fix file system disruption");
disruptFileSystemProvider.injectIODelay.set(false);
waitUntil(() -> fsHealthSrvc.getHealth().getStatus() == HEALTHY, delayBetweenChecks + (4 * refreshInterval), TimeUnit.MILLISECONDS);
fsHealth = fsHealthSrvc.getHealth();
assertEquals(HEALTHY, fsHealth.getStatus());
assertEquals("health check passed", fsHealth.getInfo());
assertEquals(disruptedPathCount, disruptFileSystemProvider.getInjectedPathCount());
fsHealthSrvc.doStop();
} finally {
PathUtilsForTesting.teardown();
ThreadPool.terminate(testThreadPool, 500, TimeUnit.MILLISECONDS);
}
}
Aggregations