Search in sources :

Example 26 with NodeEnvironment

use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.

the class FsHealthServiceTests method testLoggingOnHungIO.

@TestLogging(value = "org.opensearch.monitor.fs:WARN", reason = "to ensure that we log on hung IO at WARN level")
public void testLoggingOnHungIO() throws Exception {
    long slowLogThreshold = randomLongBetween(100, 200);
    final Settings settings = Settings.builder().put(FsHealthService.SLOW_PATH_LOGGING_THRESHOLD_SETTING.getKey(), slowLogThreshold + "ms").build();
    FileSystem fileSystem = PathUtils.getDefaultFileSystem();
    TestThreadPool testThreadPool = new TestThreadPool(getClass().getName(), settings);
    FileSystemFsyncHungProvider disruptFileSystemProvider = new FileSystemFsyncHungProvider(fileSystem, randomLongBetween(slowLogThreshold + 1, 400), testThreadPool);
    fileSystem = disruptFileSystemProvider.getFileSystem(null);
    PathUtilsForTesting.installMock(fileSystem);
    final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
    try (MockLogAppender mockAppender = MockLogAppender.createForLoggers(LogManager.getLogger(FsHealthService.class));
        NodeEnvironment env = newNodeEnvironment()) {
        FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
        int counter = 0;
        for (Path path : env.nodeDataPaths()) {
            mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation("test" + ++counter, FsHealthService.class.getCanonicalName(), Level.WARN, "health check of [" + path + "] took [*ms] which is above the warn threshold*"));
        }
        // disrupt file system
        disruptFileSystemProvider.injectIODelay.set(true);
        fsHealthService.new FsHealthMonitor().run();
        assertEquals(env.nodeDataPaths().length, disruptFileSystemProvider.getInjectedPathCount());
        assertBusy(mockAppender::assertAllExpectationsMatched);
    } finally {
        PathUtilsForTesting.teardown();
        ThreadPool.terminate(testThreadPool, 500, TimeUnit.MILLISECONDS);
    }
}
Also used : Path(java.nio.file.Path) ClusterSettings(org.opensearch.common.settings.ClusterSettings) MockLogAppender(org.opensearch.test.MockLogAppender) NodeEnvironment(org.opensearch.env.NodeEnvironment) TestThreadPool(org.opensearch.threadpool.TestThreadPool) FileSystem(java.nio.file.FileSystem) ClusterSettings(org.opensearch.common.settings.ClusterSettings) Settings(org.opensearch.common.settings.Settings) TestLogging(org.opensearch.test.junit.annotations.TestLogging)

Example 27 with NodeEnvironment

use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.

the class FsHealthServiceTests method testFailsHealthOnUnexpectedLockFileSize.

public void testFailsHealthOnUnexpectedLockFileSize() throws IOException {
    FileSystem fileSystem = PathUtils.getDefaultFileSystem();
    final Settings settings = Settings.EMPTY;
    TestThreadPool testThreadPool = new TestThreadPool(getClass().getName(), settings);
    FileSystemUnexpectedLockFileSizeProvider unexpectedLockFileSizeFileSystemProvider = new FileSystemUnexpectedLockFileSizeProvider(fileSystem, 1, testThreadPool);
    fileSystem = unexpectedLockFileSizeFileSystemProvider.getFileSystem(null);
    PathUtilsForTesting.installMock(fileSystem);
    final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
    try (NodeEnvironment env = newNodeEnvironment()) {
        FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
        fsHealthService.new FsHealthMonitor().run();
        assertEquals(HEALTHY, fsHealthService.getHealth().getStatus());
        assertEquals("health check passed", fsHealthService.getHealth().getInfo());
        // enabling unexpected file size injection
        unexpectedLockFileSizeFileSystemProvider.injectUnexpectedFileSize.set(true);
        fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
        fsHealthService.new FsHealthMonitor().run();
        assertEquals(UNHEALTHY, fsHealthService.getHealth().getStatus());
        assertThat(fsHealthService.getHealth().getInfo(), is("health check failed due to broken node lock"));
        assertEquals(1, unexpectedLockFileSizeFileSystemProvider.getInjectedPathCount());
    } finally {
        unexpectedLockFileSizeFileSystemProvider.injectUnexpectedFileSize.set(false);
        PathUtilsForTesting.teardown();
        ThreadPool.terminate(testThreadPool, 500, TimeUnit.MILLISECONDS);
    }
}
Also used : ClusterSettings(org.opensearch.common.settings.ClusterSettings) NodeEnvironment(org.opensearch.env.NodeEnvironment) FileSystem(java.nio.file.FileSystem) TestThreadPool(org.opensearch.threadpool.TestThreadPool) ClusterSettings(org.opensearch.common.settings.ClusterSettings) Settings(org.opensearch.common.settings.Settings)

Example 28 with NodeEnvironment

use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.

the class FsHealthServiceTests method testFailsHealthOnSinglePathWriteFailure.

public void testFailsHealthOnSinglePathWriteFailure() throws IOException {
    FileSystem fileSystem = PathUtils.getDefaultFileSystem();
    FileSystemIOExceptionProvider disruptWritesFileSystemProvider = new FileSystemIOExceptionProvider(fileSystem);
    fileSystem = disruptWritesFileSystemProvider.getFileSystem(null);
    PathUtilsForTesting.installMock(fileSystem);
    final Settings settings = Settings.EMPTY;
    final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
    TestThreadPool testThreadPool = new TestThreadPool(getClass().getName(), settings);
    try (NodeEnvironment env = newNodeEnvironment()) {
        Path[] paths = env.nodeDataPaths();
        FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
        fsHealthService.new FsHealthMonitor().run();
        assertEquals(HEALTHY, fsHealthService.getHealth().getStatus());
        assertEquals("health check passed", fsHealthService.getHealth().getInfo());
        // disrupt file system writes on single path
        String disruptedPath = randomFrom(paths).toString();
        disruptWritesFileSystemProvider.restrictPathPrefix(disruptedPath);
        disruptWritesFileSystemProvider.injectIOException.set(true);
        fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
        fsHealthService.new FsHealthMonitor().run();
        assertEquals(UNHEALTHY, fsHealthService.getHealth().getStatus());
        assertThat(fsHealthService.getHealth().getInfo(), is("health check failed on [" + disruptedPath + "]"));
        assertEquals(1, disruptWritesFileSystemProvider.getInjectedPathCount());
    } finally {
        disruptWritesFileSystemProvider.injectIOException.set(false);
        PathUtilsForTesting.teardown();
        ThreadPool.terminate(testThreadPool, 500, TimeUnit.MILLISECONDS);
    }
}
Also used : Path(java.nio.file.Path) ClusterSettings(org.opensearch.common.settings.ClusterSettings) NodeEnvironment(org.opensearch.env.NodeEnvironment) FileSystem(java.nio.file.FileSystem) TestThreadPool(org.opensearch.threadpool.TestThreadPool) ClusterSettings(org.opensearch.common.settings.ClusterSettings) Settings(org.opensearch.common.settings.Settings)

Example 29 with NodeEnvironment

use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.

the class FsHealthServiceTests method testSchedulesHealthCheckAtRefreshIntervals.

public void testSchedulesHealthCheckAtRefreshIntervals() throws Exception {
    long refreshInterval = randomLongBetween(1000, 12000);
    final Settings settings = Settings.builder().put(FsHealthService.REFRESH_INTERVAL_SETTING.getKey(), refreshInterval + "ms").build();
    final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
    try (NodeEnvironment env = newNodeEnvironment()) {
        FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, deterministicTaskQueue.getThreadPool(), env);
        final long startTimeMillis = deterministicTaskQueue.getCurrentTimeMillis();
        fsHealthService.doStart();
        assertFalse(deterministicTaskQueue.hasRunnableTasks());
        assertTrue(deterministicTaskQueue.hasDeferredTasks());
        int rescheduledCount = 0;
        for (int i = 1; i <= randomIntBetween(5, 10); i++) {
            if (deterministicTaskQueue.hasRunnableTasks()) {
                deterministicTaskQueue.runRandomTask();
            } else {
                assertThat(deterministicTaskQueue.getLatestDeferredExecutionTime(), is(refreshInterval * (rescheduledCount + 1)));
                deterministicTaskQueue.advanceTime();
                rescheduledCount++;
            }
            assertThat(deterministicTaskQueue.getCurrentTimeMillis() - startTimeMillis, is(refreshInterval * rescheduledCount));
        }
        fsHealthService.doStop();
        deterministicTaskQueue.runAllTasksInTimeOrder();
        assertFalse(deterministicTaskQueue.hasRunnableTasks());
        assertFalse(deterministicTaskQueue.hasDeferredTasks());
    }
}
Also used : ClusterSettings(org.opensearch.common.settings.ClusterSettings) NodeEnvironment(org.opensearch.env.NodeEnvironment) ClusterSettings(org.opensearch.common.settings.ClusterSettings) Settings(org.opensearch.common.settings.Settings)

Example 30 with NodeEnvironment

use of org.opensearch.env.NodeEnvironment in project OpenSearch by opensearch-project.

the class FsHealthServiceTests method testFailsHealthOnHungIOBeyondHealthyTimeout.

public void testFailsHealthOnHungIOBeyondHealthyTimeout() throws Exception {
    long healthyTimeoutThreshold = randomLongBetween(500, 1000);
    long refreshInterval = randomLongBetween(500, 1000);
    long slowLogThreshold = randomLongBetween(100, 200);
    long delayBetweenChecks = 100;
    final Settings settings = Settings.builder().put(FsHealthService.HEALTHY_TIMEOUT_SETTING.getKey(), healthyTimeoutThreshold + "ms").put(FsHealthService.REFRESH_INTERVAL_SETTING.getKey(), refreshInterval + "ms").put(FsHealthService.SLOW_PATH_LOGGING_THRESHOLD_SETTING.getKey(), slowLogThreshold + "ms").put(ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING.getKey(), // we need to verify exact time
    0).build();
    FileSystem fileSystem = PathUtils.getDefaultFileSystem();
    TestThreadPool testThreadPool = new TestThreadPool(getClass().getName(), settings);
    FileSystemFsyncHungProvider disruptFileSystemProvider = new FileSystemFsyncHungProvider(fileSystem, testThreadPool);
    fileSystem = disruptFileSystemProvider.getFileSystem(null);
    PathUtilsForTesting.installMock(fileSystem);
    final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
    try (NodeEnvironment env = newNodeEnvironment()) {
        FsHealthService fsHealthService = new FsHealthService(settings, clusterSettings, testThreadPool, env);
        logger.info("--> Initial health status prior to the first monitor run");
        StatusInfo fsHealth = fsHealthService.getHealth();
        assertEquals(HEALTHY, fsHealth.getStatus());
        assertEquals("health check passed", fsHealth.getInfo());
        logger.info("--> First monitor run");
        fsHealthService.new FsHealthMonitor().run();
        fsHealth = fsHealthService.getHealth();
        assertEquals(HEALTHY, fsHealth.getStatus());
        assertEquals("health check passed", fsHealth.getInfo());
        logger.info("--> Disrupt file system");
        disruptFileSystemProvider.injectIODelay.set(true);
        final FsHealthService fsHealthSrvc = new FsHealthService(settings, clusterSettings, testThreadPool, env);
        fsHealthSrvc.doStart();
        waitUntil(() -> fsHealthSrvc.getHealth().getStatus() == UNHEALTHY, healthyTimeoutThreshold + (2 * refreshInterval), TimeUnit.MILLISECONDS);
        fsHealth = fsHealthSrvc.getHealth();
        assertEquals(UNHEALTHY, fsHealth.getStatus());
        assertEquals("healthy threshold breached", fsHealth.getInfo());
        int disruptedPathCount = disruptFileSystemProvider.getInjectedPathCount();
        assertThat(disruptedPathCount, equalTo(1));
        logger.info("--> Fix file system disruption");
        disruptFileSystemProvider.injectIODelay.set(false);
        waitUntil(() -> fsHealthSrvc.getHealth().getStatus() == HEALTHY, delayBetweenChecks + (4 * refreshInterval), TimeUnit.MILLISECONDS);
        fsHealth = fsHealthSrvc.getHealth();
        assertEquals(HEALTHY, fsHealth.getStatus());
        assertEquals("health check passed", fsHealth.getInfo());
        assertEquals(disruptedPathCount, disruptFileSystemProvider.getInjectedPathCount());
        fsHealthSrvc.doStop();
    } finally {
        PathUtilsForTesting.teardown();
        ThreadPool.terminate(testThreadPool, 500, TimeUnit.MILLISECONDS);
    }
}
Also used : ClusterSettings(org.opensearch.common.settings.ClusterSettings) NodeEnvironment(org.opensearch.env.NodeEnvironment) StatusInfo(org.opensearch.monitor.StatusInfo) FileSystem(java.nio.file.FileSystem) TestThreadPool(org.opensearch.threadpool.TestThreadPool) ClusterSettings(org.opensearch.common.settings.ClusterSettings) Settings(org.opensearch.common.settings.Settings)

Aggregations

NodeEnvironment (org.opensearch.env.NodeEnvironment)62 Settings (org.opensearch.common.settings.Settings)36 Path (java.nio.file.Path)32 Matchers.containsString (org.hamcrest.Matchers.containsString)22 ClusterState (org.opensearch.cluster.ClusterState)21 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)19 ClusterSettings (org.opensearch.common.settings.ClusterSettings)17 IndexWriter (org.apache.lucene.index.IndexWriter)16 Metadata (org.opensearch.cluster.metadata.Metadata)16 Writer (org.opensearch.gateway.PersistedClusterStateService.Writer)16 Index (org.opensearch.index.Index)13 IOException (java.io.IOException)12 FilterDirectory (org.apache.lucene.store.FilterDirectory)9 Environment (org.opensearch.env.Environment)9 ArrayList (java.util.ArrayList)7 Directory (org.apache.lucene.store.Directory)7 TestThreadPool (org.opensearch.threadpool.TestThreadPool)7 FileSystem (java.nio.file.FileSystem)6 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)6 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)6