Search in sources :

Example 1 with OzoneManagerRatisServer

use of org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer in project ozone by apache.

the class TestOzoneManagerHAMetadataOnly method testJMXMetrics.

@Test
public void testJMXMetrics() throws Exception {
    // Verify any one ratis metric is exposed by JMX MBeanServer
    OzoneManagerRatisServer ratisServer = getCluster().getOzoneManager(0).getOmRatisServer();
    ObjectName oname = new ObjectName(RATIS_APPLICATION_NAME_METRICS, "name", RATIS_APPLICATION_NAME_METRICS + ".log_worker." + ratisServer.getRaftPeerId().toString() + "@" + ratisServer.getRaftGroup().getGroupId() + ".flushCount");
    MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
    MBeanInfo mBeanInfo = mBeanServer.getMBeanInfo(oname);
    Assert.assertNotNull(mBeanInfo);
    Object flushCount = mBeanServer.getAttribute(oname, "Count");
    Assert.assertTrue((long) flushCount >= 0);
}
Also used : MBeanInfo(javax.management.MBeanInfo) OzoneManagerRatisServer(org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer) ObjectName(javax.management.ObjectName) MBeanServer(javax.management.MBeanServer) Test(org.junit.Test)

Example 2 with OzoneManagerRatisServer

use of org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer in project ozone by apache.

the class TestOzoneManagerHAMetadataOnly method testOMRetryCache.

@Test
public void testOMRetryCache() throws Exception {
    ObjectStore objectStore = getObjectStore();
    objectStore.createVolume(UUID.randomUUID().toString());
    OMFailoverProxyProvider omFailoverProxyProvider = OmFailoverProxyUtil.getFailoverProxyProvider(objectStore.getClientProxy());
    String currentLeaderNodeId = omFailoverProxyProvider.getCurrentProxyOMNodeId();
    OzoneManagerRatisServer ozoneManagerRatisServer = getCluster().getOzoneManager(currentLeaderNodeId).getOmRatisServer();
    RaftServer raftServer = ozoneManagerRatisServer.getServer();
    ClientId clientId = ClientId.randomId();
    long callId = 2000L;
    String userName = UserGroupInformation.getCurrentUser().getUserName();
    String volumeName = UUID.randomUUID().toString();
    GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(OMVolumeCreateRequest.getLogger());
    OMRequest omRequest = OMRequest.newBuilder().setCreateVolumeRequest(CreateVolumeRequest.newBuilder().setVolumeInfo(VolumeInfo.newBuilder().setOwnerName(userName).setAdminName(userName).setVolume(volumeName).build()).build()).setClientId(UUID.randomUUID().toString()).setCmdType(OzoneManagerProtocolProtos.Type.CreateVolume).build();
    RaftClientReply raftClientReply = raftServer.submitClientRequest(RaftClientRequest.newBuilder().setClientId(clientId).setServerId(raftServer.getId()).setGroupId(ozoneManagerRatisServer.getRaftGroup().getGroupId()).setCallId(callId).setMessage(Message.valueOf(OMRatisHelper.convertRequestToByteString(omRequest))).setType(RaftClientRequest.writeRequestType()).build());
    Assert.assertTrue(raftClientReply.isSuccess());
    Assert.assertTrue(logCapturer.getOutput().contains("created volume:" + volumeName));
    logCapturer.clearOutput();
    raftClientReply = raftServer.submitClientRequest(RaftClientRequest.newBuilder().setClientId(clientId).setServerId(raftServer.getId()).setGroupId(ozoneManagerRatisServer.getRaftGroup().getGroupId()).setCallId(callId).setMessage(Message.valueOf(OMRatisHelper.convertRequestToByteString(omRequest))).setType(RaftClientRequest.writeRequestType()).build());
    Assert.assertTrue(raftClientReply.isSuccess());
    // As second time with same client id and call id, this request should
    // not be executed ratis server should return from cache.
    // If 2nd time executed, it will fail with Volume creation failed. check
    // for that.
    Assert.assertFalse(logCapturer.getOutput().contains("Volume creation failed"));
    // Sleep for little above retry cache duration to get cache clear.
    Thread.sleep(getRetryCacheDuration().toMillis() + 5000);
    raftClientReply = raftServer.submitClientRequest(RaftClientRequest.newBuilder().setClientId(clientId).setServerId(raftServer.getId()).setGroupId(ozoneManagerRatisServer.getRaftGroup().getGroupId()).setCallId(callId).setMessage(Message.valueOf(OMRatisHelper.convertRequestToByteString(omRequest))).setType(RaftClientRequest.writeRequestType()).build());
    Assert.assertTrue(raftClientReply.isSuccess());
    // As second time with same client id and call id, this request should
    // be executed by ratis server as we are sending this request after cache
    // expiry duration.
    Assert.assertTrue(logCapturer.getOutput().contains("Volume creation failed"));
}
Also used : OMRequest(org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest) ObjectStore(org.apache.hadoop.ozone.client.ObjectStore) OMFailoverProxyProvider(org.apache.hadoop.ozone.om.ha.OMFailoverProxyProvider) RaftClientReply(org.apache.ratis.protocol.RaftClientReply) RaftServer(org.apache.ratis.server.RaftServer) ClientId(org.apache.ratis.protocol.ClientId) GenericTestUtils(org.apache.ozone.test.GenericTestUtils) OzoneManagerRatisServer(org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer) Test(org.junit.Test)

Example 3 with OzoneManagerRatisServer

use of org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer in project ozone by apache.

the class TestAddRemoveOzoneManager method getRatisLogFiles.

private File[] getRatisLogFiles(OzoneManager om) {
    OzoneManagerRatisServer newOMRatisServer = om.getOmRatisServer();
    File ratisDir = new File(newOMRatisServer.getRatisStorageDir(), newOMRatisServer.getRaftGroupId().getUuid().toString());
    File ratisLogDir = new File(ratisDir, Storage.STORAGE_DIR_CURRENT);
    return ratisLogDir.listFiles(new FileFilter() {

        @Override
        public boolean accept(File pathname) {
            return pathname.getName().startsWith("log");
        }
    });
}
Also used : OzoneManagerRatisServer(org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer) FileFilter(java.io.FileFilter) File(java.io.File)

Example 4 with OzoneManagerRatisServer

use of org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer in project ozone by apache.

the class OMAdminProtocolServerSideImpl method decommission.

@Override
public DecommissionOMResponse decommission(RpcController controller, DecommissionOMRequest request) throws ServiceException {
    if (request == null) {
        return null;
    }
    if (!ozoneManager.isRatisEnabled()) {
        return DecommissionOMResponse.newBuilder().setSuccess(false).setErrorMsg("OM node cannot be decommissioned as Ratis is " + "not enabled.").build();
    }
    OzoneManagerRatisServer omRatisServer = ozoneManager.getOmRatisServer();
    checkLeaderStatus(omRatisServer);
    OMNodeDetails decommNode = ozoneManager.getPeerNode(request.getNodeId());
    if (decommNode == null) {
        return DecommissionOMResponse.newBuilder().setSuccess(false).setErrorMsg("OM node not present in the OM peer list.").build();
    }
    try {
        omRatisServer.removeOMFromRatisRing(decommNode);
    } catch (IOException ex) {
        return DecommissionOMResponse.newBuilder().setSuccess(false).setErrorMsg(ex.getMessage()).build();
    }
    return DecommissionOMResponse.newBuilder().setSuccess(true).build();
}
Also used : OMNodeDetails(org.apache.hadoop.ozone.om.helpers.OMNodeDetails) IOException(java.io.IOException) OzoneManagerRatisServer(org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer)

Example 5 with OzoneManagerRatisServer

use of org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer in project ozone by apache.

the class TestOMRatisSnapshots method testInstallSnapshot.

@Test
public void testInstallSnapshot() throws Exception {
    // Get the leader OM
    String leaderOMNodeId = OmFailoverProxyUtil.getFailoverProxyProvider(objectStore.getClientProxy()).getCurrentProxyOMNodeId();
    OzoneManager leaderOM = cluster.getOzoneManager(leaderOMNodeId);
    OzoneManagerRatisServer leaderRatisServer = leaderOM.getOmRatisServer();
    // Find the inactive OM
    String followerNodeId = leaderOM.getPeerNodes().get(0).getNodeId();
    if (cluster.isOMActive(followerNodeId)) {
        followerNodeId = leaderOM.getPeerNodes().get(1).getNodeId();
    }
    OzoneManager followerOM = cluster.getOzoneManager(followerNodeId);
    // Do some transactions so that the log index increases
    List<String> keys = writeKeysToIncreaseLogIndex(leaderRatisServer, 200);
    // Get the latest db checkpoint from the leader OM.
    TransactionInfo transactionInfo = TransactionInfo.readTransactionInfo(leaderOM.getMetadataManager());
    TermIndex leaderOMTermIndex = TermIndex.valueOf(transactionInfo.getTerm(), transactionInfo.getTransactionIndex());
    long leaderOMSnapshotIndex = leaderOMTermIndex.getIndex();
    long leaderOMSnapshotTermIndex = leaderOMTermIndex.getTerm();
    DBCheckpoint leaderDbCheckpoint = leaderOM.getMetadataManager().getStore().getCheckpoint(false);
    // Start the inactive OM
    cluster.startInactiveOM(followerNodeId);
    // The recently started OM should be lagging behind the leader OM.
    // Wait & for follower to update transactions to leader snapshot index.
    // Timeout error if follower does not load update within 3s
    GenericTestUtils.waitFor(() -> {
        return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1;
    }, 100, 3000);
    long followerOMLastAppliedIndex = followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex();
    assertTrue(followerOMLastAppliedIndex >= leaderOMSnapshotIndex - 1);
    // Install leader OM's db checkpoint on the lagging OM.
    followerOM.installCheckpoint(leaderOMNodeId, leaderDbCheckpoint);
    // After the new checkpoint is installed, the follower OM
    // lastAppliedIndex must >= the snapshot index of the checkpoint. It
    // could be great than snapshot index if there is any conf entry from ratis.
    followerOMLastAppliedIndex = followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex();
    assertTrue(followerOMLastAppliedIndex >= leaderOMSnapshotIndex);
    assertTrue(followerOM.getOmRatisServer().getLastAppliedTermIndex().getTerm() >= leaderOMSnapshotTermIndex);
    // Verify that the follower OM's DB contains the transactions which were
    // made while it was inactive.
    OMMetadataManager followerOMMetaMngr = followerOM.getMetadataManager();
    Assert.assertNotNull(followerOMMetaMngr.getVolumeTable().get(followerOMMetaMngr.getVolumeKey(volumeName)));
    Assert.assertNotNull(followerOMMetaMngr.getBucketTable().get(followerOMMetaMngr.getBucketKey(volumeName, bucketName)));
    for (String key : keys) {
        Assert.assertNotNull(followerOMMetaMngr.getKeyTable(getDefaultBucketLayout()).get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key)));
    }
}
Also used : DBCheckpoint(org.apache.hadoop.hdds.utils.db.DBCheckpoint) TransactionInfo(org.apache.hadoop.hdds.utils.TransactionInfo) OzoneManagerRatisServer(org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer) TermIndex(org.apache.ratis.server.protocol.TermIndex) Test(org.junit.jupiter.api.Test)

Aggregations

OzoneManagerRatisServer (org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer)9 DBCheckpoint (org.apache.hadoop.hdds.utils.db.DBCheckpoint)3 GenericTestUtils (org.apache.ozone.test.GenericTestUtils)3 Test (org.junit.jupiter.api.Test)3 File (java.io.File)2 IOException (java.io.IOException)2 TransactionInfo (org.apache.hadoop.hdds.utils.TransactionInfo)2 OMRequest (org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest)2 RaftServer (org.apache.ratis.server.RaftServer)2 TermIndex (org.apache.ratis.server.protocol.TermIndex)2 Test (org.junit.Test)2 FileFilter (java.io.FileFilter)1 Path (java.nio.file.Path)1 Duration (java.time.Duration)1 MBeanInfo (javax.management.MBeanInfo)1 MBeanServer (javax.management.MBeanServer)1 ObjectName (javax.management.ObjectName)1 ObjectStore (org.apache.hadoop.ozone.client.ObjectStore)1 OzoneManager (org.apache.hadoop.ozone.om.OzoneManager)1 OMException (org.apache.hadoop.ozone.om.exceptions.OMException)1