Search in sources :

Example 61 with HelixManager

use of org.apache.helix.HelixManager in project pinot by linkedin.

the class BaseClusterIntegrationTest method setupSegmentCountCountDownLatch.

protected CountDownLatch setupSegmentCountCountDownLatch(final String tableName, final int expectedSegmentCount) throws Exception {
    final CountDownLatch latch = new CountDownLatch(1);
    HelixManager manager = HelixManagerFactory.getZKHelixManager(getHelixClusterName(), "test_instance", InstanceType.SPECTATOR, ZkStarter.DEFAULT_ZK_STR);
    manager.connect();
    manager.addExternalViewChangeListener(new ExternalViewChangeListener() {

        private boolean _hasBeenTriggered = false;

        @Override
        public void onExternalViewChange(List<ExternalView> externalViewList, NotificationContext changeContext) {
            // Nothing to do?
            if (_hasBeenTriggered) {
                return;
            }
            for (ExternalView externalView : externalViewList) {
                if (externalView.getId().contains(tableName)) {
                    Set<String> partitionSet = externalView.getPartitionSet();
                    if (partitionSet.size() == expectedSegmentCount) {
                        int onlinePartitionCount = 0;
                        for (String partitionId : partitionSet) {
                            Map<String, String> partitionStateMap = externalView.getStateMap(partitionId);
                            if (partitionStateMap.containsValue("ONLINE")) {
                                onlinePartitionCount++;
                            }
                        }
                        if (onlinePartitionCount == expectedSegmentCount) {
                            //                System.out.println("Got " + expectedSegmentCount + " online tables, unlatching the main thread");
                            latch.countDown();
                            _hasBeenTriggered = true;
                        }
                    }
                }
            }
        }
    });
    return latch;
}
Also used : ExternalView(org.apache.helix.model.ExternalView) NotificationContext(org.apache.helix.NotificationContext) HelixManager(org.apache.helix.HelixManager) ResultSet(java.sql.ResultSet) Set(java.util.Set) HashSet(java.util.HashSet) ExternalViewChangeListener(org.apache.helix.ExternalViewChangeListener) CountDownLatch(java.util.concurrent.CountDownLatch) Map(java.util.Map) HashMap(java.util.HashMap)

Example 62 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class HelixTaskExecutor method onMessage.

@Override
@PreFetch(enabled = false)
public void onMessage(String instanceName, List<Message> messages, NotificationContext changeContext) {
    HelixManager manager = changeContext.getManager();
    // TODO: see if we should have a separate notification call for resetting
    if (changeContext.getType() == Type.FINALIZE) {
        reset();
        return;
    }
    if (changeContext.getType() == Type.INIT) {
        init();
    // continue to process messages
    }
    // if prefetch is disabled in MessageListenerCallback, we need to read all new messages from zk.
    if (messages == null || messages.isEmpty()) {
        // If no messages are given, check and read all new messages.
        messages = readNewMessagesFromZK(manager, instanceName, changeContext.getChangeType());
    }
    if (_isShuttingDown) {
        StringBuilder sb = new StringBuilder();
        for (Message message : messages) {
            sb.append(message.getMsgId() + ",");
        }
        LOG.info("Helix task executor is shutting down, discard unprocessed messages : " + sb.toString());
        return;
    }
    // Update message count
    if (_messageQueueMonitor != null) {
        _messageQueueMonitor.setMessageQueueBacklog(messages.size());
    }
    if (messages.isEmpty()) {
        LOG.info("No Messages to process");
        return;
    }
    // sort message by creation timestamp, so message created earlier is processed first
    Collections.sort(messages, Message.CREATE_TIME_COMPARATOR);
    HelixDataAccessor accessor = manager.getHelixDataAccessor();
    Builder keyBuilder = accessor.keyBuilder();
    // message handlers created
    Map<String, MessageHandler> stateTransitionHandlers = new HashMap<>();
    List<MessageHandler> nonStateTransitionHandlers = new ArrayList<>();
    // message read
    List<Message> readMsgs = new ArrayList<>();
    String sessionId = manager.getSessionId();
    List<String> curResourceNames = accessor.getChildNames(keyBuilder.currentStates(instanceName, sessionId));
    List<PropertyKey> createCurStateKeys = new ArrayList<>();
    List<CurrentState> metaCurStates = new ArrayList<>();
    Set<String> createCurStateNames = new HashSet<>();
    for (Message message : messages) {
        // situations such as register a new message handler factory
        if (message.getMsgType().equalsIgnoreCase(MessageType.NO_OP.toString())) {
            LOG.info("Dropping NO-OP message. mid: " + message.getId() + ", from: " + message.getMsgSrc());
            reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
            continue;
        }
        if (message.isExpired()) {
            LOG.info("Dropping expired message. mid: " + message.getId() + ", from: " + message.getMsgSrc() + " relayed from: " + message.getRelaySrcHost());
            reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
            continue;
        }
        String tgtSessionId = message.getTgtSessionId();
        // sessionId mismatch normally means message comes from expired session, just remove it
        if (!sessionId.equals(tgtSessionId) && !tgtSessionId.equals("*")) {
            String warningMessage = "SessionId does NOT match. expected sessionId: " + sessionId + ", tgtSessionId in message: " + tgtSessionId + ", messageId: " + message.getMsgId();
            LOG.warn(warningMessage);
            reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
            _statusUpdateUtil.logWarning(message, HelixStateMachineEngine.class, warningMessage, manager);
            // upon session mismatch after a new session is established
            if (manager.getInstanceType() == InstanceType.PARTICIPANT || manager.getInstanceType() == InstanceType.CONTROLLER_PARTICIPANT) {
                if (message.getCreateTimeStamp() > manager.getSessionStartTime()) {
                    syncSessionToController(manager);
                }
            }
            continue;
        }
        if ((manager.getInstanceType() == InstanceType.CONTROLLER || manager.getInstanceType() == InstanceType.CONTROLLER_PARTICIPANT) && MessageType.PARTICIPANT_SESSION_CHANGE.name().equals(message.getMsgType())) {
            LOG.info(String.format("Controller received PARTICIPANT_SESSION_CHANGE msg from src: %s", message.getMsgSrc()));
            PropertyKey key = new Builder(manager.getClusterName()).liveInstances();
            List<LiveInstance> liveInstances = manager.getHelixDataAccessor().getChildValues(key);
            _controller.onLiveInstanceChange(liveInstances, changeContext);
            reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.COMPLETED);
            continue;
        }
        // don't process message that is of READ or UNPROCESSABLE state
        if (MessageState.NEW != message.getMsgState()) {
            // check for the status and ignore if its already read
            if (LOG.isTraceEnabled()) {
                LOG.trace("Message already read. msgId: " + message.getMsgId());
            }
            continue;
        }
        // State Transition Cancellation
        if (message.getMsgType().equals(MessageType.STATE_TRANSITION_CANCELLATION.name())) {
            boolean success = cancelNotStartedStateTransition(message, stateTransitionHandlers, accessor, instanceName);
            if (success) {
                continue;
            }
        }
        _monitor.reportReceivedMessage(message);
        // create message handlers, if handlers not found, leave its state as NEW
        try {
            MessageHandler createHandler = createMessageHandler(message, changeContext);
            if (createHandler == null) {
                continue;
            }
            if (message.getMsgType().equals(MessageType.STATE_TRANSITION.name()) || message.getMsgType().equals(MessageType.STATE_TRANSITION_CANCELLATION.name())) {
                stateTransitionHandlers.put(getMessageTarget(message.getResourceName(), message.getPartitionName()), createHandler);
            } else {
                nonStateTransitionHandlers.add(createHandler);
            }
        } catch (Exception e) {
            LOG.error("Failed to create message handler for " + message.getMsgId(), e);
            String error = "Failed to create message handler for " + message.getMsgId() + ", exception: " + e;
            _statusUpdateUtil.logError(message, HelixStateMachineEngine.class, e, error, manager);
            message.setMsgState(MessageState.UNPROCESSABLE);
            removeMessageFromZK(accessor, message, instanceName);
            LOG.error("Message cannot be processed: " + message.getRecord(), e);
            _monitor.reportProcessedMessage(message, ParticipantMessageMonitor.ProcessedMessageState.DISCARDED);
            continue;
        }
        markReadMessage(message, changeContext, manager);
        readMsgs.add(message);
        // do it for non-controller and state transition messages only
        if (!message.isControlerMsg() && message.getMsgType().equals(Message.MessageType.STATE_TRANSITION.name())) {
            String resourceName = message.getResourceName();
            if (!curResourceNames.contains(resourceName) && !createCurStateNames.contains(resourceName)) {
                createCurStateNames.add(resourceName);
                createCurStateKeys.add(keyBuilder.currentState(instanceName, sessionId, resourceName));
                CurrentState metaCurState = new CurrentState(resourceName);
                metaCurState.setBucketSize(message.getBucketSize());
                metaCurState.setStateModelDefRef(message.getStateModelDef());
                metaCurState.setSessionId(sessionId);
                metaCurState.setBatchMessageMode(message.getBatchMessageMode());
                String ftyName = message.getStateModelFactoryName();
                if (ftyName != null) {
                    metaCurState.setStateModelFactoryName(ftyName);
                } else {
                    metaCurState.setStateModelFactoryName(HelixConstants.DEFAULT_STATE_MODEL_FACTORY);
                }
                metaCurStates.add(metaCurState);
            }
        }
    }
    // batch create curState meta
    if (createCurStateKeys.size() > 0) {
        try {
            accessor.createChildren(createCurStateKeys, metaCurStates);
        } catch (Exception e) {
            LOG.error("fail to create cur-state znodes for messages: " + readMsgs, e);
        }
    }
    // update message state to READ in batch and schedule all read messages
    if (readMsgs.size() > 0) {
        updateMessageState(readMsgs, accessor, instanceName);
        for (MessageHandler handler : stateTransitionHandlers.values()) {
            HelixTask task = new HelixTask(handler._message, changeContext, handler, this);
            scheduleTask(task);
        }
        for (MessageHandler handler : nonStateTransitionHandlers) {
            HelixTask task = new HelixTask(handler._message, changeContext, handler, this);
            scheduleTask(task);
        }
    }
}
Also used : HelixStateMachineEngine(org.apache.helix.participant.HelixStateMachineEngine) Message(org.apache.helix.model.Message) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) HelixConfigScopeBuilder(org.apache.helix.model.builder.HelixConfigScopeBuilder) Builder(org.apache.helix.PropertyKey.Builder) ArrayList(java.util.ArrayList) LiveInstance(org.apache.helix.model.LiveInstance) CurrentState(org.apache.helix.model.CurrentState) HashSet(java.util.HashSet) HelixManager(org.apache.helix.HelixManager) HelixException(org.apache.helix.HelixException) HelixDataAccessor(org.apache.helix.HelixDataAccessor) PropertyKey(org.apache.helix.PropertyKey) PreFetch(org.apache.helix.api.listeners.PreFetch)

Example 63 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class HelixTaskExecutor method scheduleTask.

@Override
public boolean scheduleTask(MessageTask task) {
    String taskId = task.getTaskId();
    Message message = task.getMessage();
    NotificationContext notificationContext = task.getNotificationContext();
    HelixManager manager = notificationContext.getManager();
    try {
        // Check to see if dedicate thread pool for handling state transition messages is configured or provided.
        updateStateTransitionMessageThreadPool(message, manager);
        LOG.info("Scheduling message: " + taskId);
        // System.out.println("sched msg: " + message.getPartitionName() + "-"
        // + message.getTgtName() + "-" + message.getFromState() + "-"
        // + message.getToState());
        _statusUpdateUtil.logInfo(message, HelixTaskExecutor.class, "Message handling task scheduled", manager);
        // sync'ed
        synchronized (_lock) {
            if (!_taskMap.containsKey(taskId)) {
                ExecutorService exeSvc = findExecutorServiceForMsg(message);
                if (exeSvc == null) {
                    LOG.warn(String.format("Threadpool is null for type %s of message %s", message.getMsgType(), message.getMsgId()));
                    return false;
                }
                LOG.info("Submit task: " + taskId + " to pool: " + exeSvc);
                Future<HelixTaskResult> future = exeSvc.submit(task);
                _messageTaskMap.putIfAbsent(getMessageTarget(message.getResourceName(), message.getPartitionName()), taskId);
                TimerTask timerTask = null;
                if (message.getExecutionTimeout() > 0) {
                    timerTask = new MessageTimeoutTask(this, task);
                    _timer.schedule(timerTask, message.getExecutionTimeout());
                    LOG.info("Message starts with timeout " + message.getExecutionTimeout() + " MsgId: " + task.getTaskId());
                } else {
                    LOG.debug("Message does not have timeout. MsgId: " + task.getTaskId());
                }
                _taskMap.put(taskId, new MessageTaskInfo(task, future, timerTask));
                LOG.info("Message: " + taskId + " handling task scheduled");
                return true;
            } else {
                _statusUpdateUtil.logWarning(message, HelixTaskExecutor.class, "Message handling task already sheduled for " + taskId, manager);
            }
        }
    } catch (Exception e) {
        LOG.error("Error while executing task. " + message, e);
        _statusUpdateUtil.logError(message, HelixTaskExecutor.class, e, "Error while executing task " + e, manager);
    }
    return false;
}
Also used : NotificationContext(org.apache.helix.NotificationContext) HelixManager(org.apache.helix.HelixManager) Message(org.apache.helix.model.Message) TimerTask(java.util.TimerTask) ExecutorService(java.util.concurrent.ExecutorService) HelixException(org.apache.helix.HelixException)

Example 64 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class TestAutoRebalancePartitionLimit method beforeClass.

@Override
@BeforeClass
public void beforeClass() throws Exception {
    // Logger.getRootLogger().setLevel(Level.INFO);
    System.out.println("START " + CLASS_NAME + " at " + new Date(System.currentTimeMillis()));
    String namespace = "/" + CLUSTER_NAME;
    if (_gZkClient.exists(namespace)) {
        _gZkClient.deleteRecursively(namespace);
    }
    _setupTool = new ClusterSetup(ZK_ADDR);
    // setup storage cluster
    _setupTool.addCluster(CLUSTER_NAME, true);
    _setupTool.addResourceToCluster(CLUSTER_NAME, TEST_DB, 100, "OnlineOffline", RebalanceMode.FULL_AUTO + "", 0, 25);
    for (int i = 0; i < NODE_NR; i++) {
        String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
        _setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
    }
    _setupTool.rebalanceStorageCluster(CLUSTER_NAME, TEST_DB, 1);
    // start controller
    String controllerName = CONTROLLER_PREFIX + "_0";
    _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
    _controller.syncStart();
    // _startCMResultMap.get(controllerName)._manager;
    HelixManager manager = _controller;
    HelixDataAccessor accessor = manager.getHelixDataAccessor();
    // start dummy participants
    for (int i = 0; i < NODE_NR; i++) {
        String instanceName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
        _participants[i] = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, instanceName);
        _participants[i].syncStart();
        Thread.sleep(2000);
        boolean result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
        Assert.assertTrue(result);
        ExternalView ev = manager.getHelixDataAccessor().getProperty(accessor.keyBuilder().externalView(TEST_DB));
        System.out.println(ev.getPartitionSet().size());
        if (i < 3) {
            Assert.assertEquals(ev.getPartitionSet().size(), 25 * (i + 1));
        } else {
            Assert.assertEquals(ev.getPartitionSet().size(), 100);
        }
    }
    boolean result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
    Assert.assertTrue(result);
}
Also used : ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) ExternalView(org.apache.helix.model.ExternalView) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) HelixManager(org.apache.helix.HelixManager) MockParticipantManager(org.apache.helix.integration.manager.MockParticipantManager) ClusterSetup(org.apache.helix.tools.ClusterSetup) Date(java.util.Date) BeforeClass(org.testng.annotations.BeforeClass)

Example 65 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class TestAutoRebalancePartitionLimit method testAutoRebalanceWithMaxPartitionPerNode.

@Test()
public void testAutoRebalanceWithMaxPartitionPerNode() throws Exception {
    HelixManager manager = _controller;
    // kill 1 node
    _participants[0].syncStop();
    // verifyBalanceExternalView();
    boolean result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
    Assert.assertTrue(result);
    HelixDataAccessor accessor = manager.getHelixDataAccessor();
    ExternalView ev = manager.getHelixDataAccessor().getProperty(accessor.keyBuilder().externalView(TEST_DB));
    Assert.assertEquals(ev.getPartitionSet().size(), 100);
    _participants[1].syncStop();
    // verifyBalanceExternalView();
    result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
    Assert.assertTrue(result);
    ev = manager.getHelixDataAccessor().getProperty(accessor.keyBuilder().externalView(TEST_DB));
    Assert.assertEquals(ev.getPartitionSet().size(), 75);
    // add 2 nodes
    for (int i = 0; i < 2; i++) {
        String storageNodeName = PARTICIPANT_PREFIX + "_" + (1000 + i);
        _setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
        String newInstanceName = storageNodeName.replace(':', '_');
        MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, newInstanceName);
        participant.syncStart();
    }
    Thread.sleep(1000);
    result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
    Assert.assertTrue(result);
}
Also used : ExternalView(org.apache.helix.model.ExternalView) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) HelixManager(org.apache.helix.HelixManager) MockParticipantManager(org.apache.helix.integration.manager.MockParticipantManager) Test(org.testng.annotations.Test)

Aggregations

HelixManager (org.apache.helix.HelixManager)105 Test (org.testng.annotations.Test)44 HelixDataAccessor (org.apache.helix.HelixDataAccessor)35 ZNRecord (org.apache.helix.ZNRecord)27 Message (org.apache.helix.model.Message)23 PropertyKey (org.apache.helix.PropertyKey)20 Date (java.util.Date)18 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)17 Builder (org.apache.helix.PropertyKey.Builder)16 ArrayList (java.util.ArrayList)14 HashMap (java.util.HashMap)12 ExternalView (org.apache.helix.model.ExternalView)11 NotificationContext (org.apache.helix.NotificationContext)10 LiveInstance (org.apache.helix.model.LiveInstance)10 IdealState (org.apache.helix.model.IdealState)9 List (java.util.List)8 Criteria (org.apache.helix.Criteria)8 HelixAdmin (org.apache.helix.HelixAdmin)8 ZKHelixManager (org.apache.helix.manager.zk.ZKHelixManager)8 StringWriter (java.io.StringWriter)7