Search in sources :

Example 16 with NoNodeException

use of org.apache.zookeeper.KeeperException.NoNodeException in project helios by spotify.

the class ZooKeeperMasterModel method rollingUpdate.

@Override
public void rollingUpdate(final DeploymentGroup deploymentGroup, final JobId jobId, final RolloutOptions options) throws DeploymentGroupDoesNotExistException, JobDoesNotExistException {
    checkNotNull(deploymentGroup, "deploymentGroup");
    log.info("preparing to initiate rolling-update on deployment-group: name={}, jobId={}", deploymentGroup.getName(), jobId);
    final DeploymentGroup updated = deploymentGroup.toBuilder().setJobId(jobId).setRolloutOptions(options).setRollingUpdateReason(MANUAL).build();
    if (getJob(jobId) == null) {
        throw new JobDoesNotExistException(jobId);
    }
    final List<ZooKeeperOperation> operations = Lists.newArrayList();
    final ZooKeeperClient client = provider.get("rollingUpdate");
    operations.add(set(Paths.configDeploymentGroup(updated.getName()), updated));
    try {
        final RollingUpdateOp op = getInitRollingUpdateOps(updated, client);
        operations.addAll(op.operations());
        log.info("starting zookeeper transaction for rolling-update on " + "deployment-group name={} jobId={}. List of operations: {}", deploymentGroup.getName(), jobId, operations);
        client.transaction(operations);
        emitEvents(deploymentGroupEventTopic, op.events());
        log.info("initiated rolling-update on deployment-group: name={}, jobId={}", deploymentGroup.getName(), jobId);
    } catch (final NoNodeException e) {
        throw new DeploymentGroupDoesNotExistException(deploymentGroup.getName());
    } catch (final KeeperException e) {
        throw new HeliosRuntimeException("rolling-update on deployment-group " + deploymentGroup.getName() + " failed", e);
    }
}
Also used : RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 17 with NoNodeException

use of org.apache.zookeeper.KeeperException.NoNodeException in project otter by alibaba.

the class ArbitrateViewServiceImpl method listProcesses.

public List<ProcessStat> listProcesses(Long channelId, Long pipelineId) {
    List<ProcessStat> processStats = new ArrayList<ProcessStat>();
    String processRoot = ManagePathUtils.getProcessRoot(channelId, pipelineId);
    IZkConnection connection = zookeeper.getConnection();
    // zkclient会将获取stat信息和正常的操作分开,使用原生的zk进行优化
    ZooKeeper orginZk = ((ZooKeeperx) connection).getZookeeper();
    // 获取所有的process列表
    List<String> processNodes = zookeeper.getChildren(processRoot);
    List<Long> processIds = new ArrayList<Long>();
    for (String processNode : processNodes) {
        processIds.add(ManagePathUtils.getProcessId(processNode));
    }
    Collections.sort(processIds);
    for (int i = 0; i < processIds.size(); i++) {
        Long processId = processIds.get(i);
        // 当前的process可能会有变化
        ProcessStat processStat = new ProcessStat();
        processStat.setPipelineId(pipelineId);
        processStat.setProcessId(processId);
        List<StageStat> stageStats = new ArrayList<StageStat>();
        processStat.setStageStats(stageStats);
        try {
            String processPath = ManagePathUtils.getProcess(channelId, pipelineId, processId);
            Stat zkProcessStat = new Stat();
            List<String> stages = orginZk.getChildren(processPath, false, zkProcessStat);
            Collections.sort(stages, new StageComparator());
            StageStat prev = null;
            for (String stage : stages) {
                // 循环每个process下的stage
                String stagePath = processPath + "/" + stage;
                Stat zkStat = new Stat();
                StageStat stageStat = new StageStat();
                stageStat.setPipelineId(pipelineId);
                stageStat.setProcessId(processId);
                byte[] bytes = orginZk.getData(stagePath, false, zkStat);
                if (bytes != null && bytes.length > 0) {
                    // 特殊处理zookeeper里的data信息,manager没有对应node中PipeKey的对象,所以导致反序列化会失败,需要特殊处理,删除'@'符号
                    String json = StringUtils.remove(new String(bytes, "UTF-8"), '@');
                    EtlEventData data = JsonUtils.unmarshalFromString(json, EtlEventData.class);
                    stageStat.setNumber(data.getNumber());
                    stageStat.setSize(data.getSize());
                    Map exts = new HashMap();
                    if (!CollectionUtils.isEmpty(data.getExts())) {
                        exts.putAll(data.getExts());
                    }
                    exts.put("currNid", data.getCurrNid());
                    exts.put("nextNid", data.getNextNid());
                    exts.put("desc", data.getDesc());
                    stageStat.setExts(exts);
                }
                if (prev != null) {
                    // 对应的start时间为上一个节点的结束时间
                    stageStat.setStartTime(prev.getEndTime());
                } else {
                    // process的最后修改时间,select
                    stageStat.setStartTime(zkProcessStat.getMtime());
                // await成功后会设置USED标志位
                }
                stageStat.setEndTime(zkStat.getMtime());
                if (ArbitrateConstants.NODE_SELECTED.equals(stage)) {
                    stageStat.setStage(StageType.SELECT);
                } else if (ArbitrateConstants.NODE_EXTRACTED.equals(stage)) {
                    stageStat.setStage(StageType.EXTRACT);
                } else if (ArbitrateConstants.NODE_TRANSFORMED.equals(stage)) {
                    stageStat.setStage(StageType.TRANSFORM);
                // } else if
                // (ArbitrateConstants.NODE_LOADED.equals(stage)) {
                // stageStat.setStage(StageType.LOAD);
                }
                prev = stageStat;
                stageStats.add(stageStat);
            }
            // 添加一个当前正在处理的
            StageStat currentStageStat = new StageStat();
            currentStageStat.setPipelineId(pipelineId);
            currentStageStat.setProcessId(processId);
            if (prev == null) {
                byte[] bytes = orginZk.getData(processPath, false, zkProcessStat);
                if (bytes == null || bytes.length == 0) {
                    // 直接认为未使用,忽略之
                    continue;
                }
                ProcessNodeEventData nodeData = JsonUtils.unmarshalFromByte(bytes, ProcessNodeEventData.class);
                if (nodeData.getStatus().isUnUsed()) {
                    // 跳过该process
                    continue;
                } else {
                    // select操作
                    currentStageStat.setStage(StageType.SELECT);
                    currentStageStat.setStartTime(zkProcessStat.getMtime());
                }
            } else {
                // 判断上一个节点,确定当前的stage
                StageType stage = prev.getStage();
                if (stage.isSelect()) {
                    currentStageStat.setStage(StageType.EXTRACT);
                } else if (stage.isExtract()) {
                    currentStageStat.setStage(StageType.TRANSFORM);
                } else if (stage.isTransform()) {
                    currentStageStat.setStage(StageType.LOAD);
                } else if (stage.isLoad()) {
                    // 已经是最后一个节点了
                    continue;
                }
                // 开始时间为上一个节点的结束时间
                currentStageStat.setStartTime(prev.getEndTime());
            }
            if (currentStageStat.getStage().isLoad()) {
                // load必须为第一个process节点
                if (i == 0) {
                    stageStats.add(currentStageStat);
                }
            } else {
                // 其他情况都添加
                stageStats.add(currentStageStat);
            }
        } catch (NoNodeException e) {
        // ignore
        } catch (KeeperException e) {
            throw new ArbitrateException(e);
        } catch (InterruptedException e) {
        // ignore
        } catch (UnsupportedEncodingException e) {
        // ignore
        }
        processStats.add(processStat);
    }
    return processStats;
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ProcessStat(com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat) Stat(org.apache.zookeeper.data.Stat) StageStat(com.alibaba.otter.shared.common.model.statistics.stage.StageStat) StageType(com.alibaba.otter.shared.common.model.config.enums.StageType) ZooKeeperx(com.alibaba.otter.shared.common.utils.zookeeper.ZooKeeperx) ProcessNodeEventData(com.alibaba.otter.shared.arbitrate.model.ProcessNodeEventData) StageComparator(com.alibaba.otter.shared.arbitrate.impl.setl.helper.StageComparator) IZkConnection(org.I0Itec.zkclient.IZkConnection) UnsupportedEncodingException(java.io.UnsupportedEncodingException) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) ZooKeeper(org.apache.zookeeper.ZooKeeper) ProcessStat(com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat) ArbitrateException(com.alibaba.otter.shared.arbitrate.exception.ArbitrateException) StageStat(com.alibaba.otter.shared.common.model.statistics.stage.StageStat) HashMap(java.util.HashMap) Map(java.util.Map) KeeperException(org.apache.zookeeper.KeeperException)

Example 18 with NoNodeException

use of org.apache.zookeeper.KeeperException.NoNodeException in project commons by twitter.

the class AngryBirdZooKeeperServer method getSessionIdFromHostPair.

/**
   * Returns the session whose corresponding znode encodes "host:port"
   *
   * @param host ip address of the endpoint
   * @param port endpoint port
   * @return session id of the corresponding zk session if a match is found.
   */
private Optional<Long> getSessionIdFromHostPair(String host, int port) {
    // TODO(vinod): Instead of (host, port) args use the more generic byte[] as args
    // so that comparison can be made on znodes that are ServerSet ephemerals
    ZKDatabase zkDb = zooKeeperServer.getZKDatabase();
    for (long sessionId : zkDb.getSessions()) {
        for (String path : zkDb.getEphemerals(sessionId)) {
            LOG.info("SessionId:" + sessionId + " Path:" + path);
            try {
                String data = new String(zkDb.getData(path, new Stat(), null));
                LOG.info("Data in znode: " + data);
                TestEndpoint endpoint = parseEndpoint(data);
                LOG.info("Extracted endpoint " + endpoint);
                if (endpoint.getHost().equals(host) && endpoint.getPort() == port) {
                    LOG.info(String.format("Matching session id %s found for endpoint %s:%s", sessionId, host, port));
                    return Optional.of(sessionId);
                }
            } catch (NoNodeException e) {
                LOG.severe("Exception getting data for Path:" + path + " : " + e);
            } catch (ParseException e) {
                LOG.severe("Exception parsing data: " + e);
            } catch (NumberFormatException e) {
                LOG.severe("Exception in url format " + e);
            }
        }
    }
    return Optional.absent();
}
Also used : TestEndpoint(com.twitter.common.zookeeper.testing.angrybird.gen.TestEndpoint) Stat(org.apache.zookeeper.data.Stat) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ParseException(java.text.ParseException) ZKDatabase(org.apache.zookeeper.server.ZKDatabase)

Example 19 with NoNodeException

use of org.apache.zookeeper.KeeperException.NoNodeException in project pulsar by yahoo.

the class LeaderElectionService method elect.

/**
     * We try to get the data in the ELECTION_ROOT node. If the node is present (i.e. leader is present), we store it in
     * the currentLeader and keep a watch on the election node. If we lose the leader, then watch gets triggered and we
     * do the election again. If the node does not exist while getting the data, we get NoNodeException. This means,
     * there is no leader and we create the node at ELECTION_ROOT and write the leader broker's service URL in the node.
     * Once the leader is known, we call the listener method so that leader can take further actions.
     */
private void elect() {
    try {
        byte[] data = zkClient.getData(ELECTION_ROOT, new Watcher() {

            @Override
            public void process(WatchedEvent event) {
                log.warn("Type of the event is [{}] and path is [{}]", event.getType(), event.getPath());
                switch(event.getType()) {
                    case NodeDeleted:
                        log.warn("Election node {} is deleted, attempting re-election...", event.getPath());
                        if (event.getPath().equals(ELECTION_ROOT)) {
                            log.info("This should call elect again...");
                            executor.execute(new Runnable() {

                                @Override
                                public void run() {
                                    // If the node is deleted, attempt the re-election
                                    log.info("Broker [{}] is calling re-election from the thread", pulsar.getWebServiceAddress());
                                    elect();
                                }
                            });
                        }
                        break;
                    default:
                        log.warn("Got something wrong on watch: {}", event);
                        break;
                }
            }
        }, null);
        LeaderBroker leaderBroker = jsonMapper.readValue(data, LeaderBroker.class);
        currentLeader.set(leaderBroker);
        isLeader.set(false);
        leaderListener.brokerIsAFollowerNow();
        // If broker comes here it is a follower. Do nothing, wait for the watch to trigger
        log.info("Broker [{}] is the follower now. Waiting for the watch to trigger...", pulsar.getWebServiceAddress());
    } catch (NoNodeException nne) {
        // There's no leader yet... try to become the leader
        try {
            // Create the root node and add current broker's URL as its contents
            LeaderBroker leaderBroker = new LeaderBroker(pulsar.getWebServiceAddress());
            ZkUtils.createFullPathOptimistic(pulsar.getLocalZkCache().getZooKeeper(), ELECTION_ROOT, jsonMapper.writeValueAsBytes(leaderBroker), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
            // Update the current leader and set the flag to true
            currentLeader.set(new LeaderBroker(leaderBroker.getServiceUrl()));
            isLeader.set(true);
            // Notify the listener that this broker is now the leader so that it can collect usage and start load
            // manager.
            log.info("Broker [{}] is the leader now, notifying the listener...", pulsar.getWebServiceAddress());
            leaderListener.brokerIsTheLeaderNow();
        } catch (NodeExistsException nee) {
            // Re-elect the new leader
            log.warn("Got exception [{}] while creating election node because it already exists. Attempting re-election...", nee.getMessage());
            executor.execute(new Runnable() {

                @Override
                public void run() {
                    elect();
                }
            });
        } catch (Exception e) {
            // Kill the broker because this broker's session with zookeeper might be stale. Killing the broker will
            // make sure that we get the fresh zookeeper session.
            log.error("Got exception [{}] while creating the election node", e.getMessage());
            pulsar.getShutdownService().shutdown(-1);
        }
    } catch (Exception e) {
        // Kill the broker
        log.error("Could not get the content of [{}], got exception [{}]. Shutting down the broker...", ELECTION_ROOT, e);
        pulsar.getShutdownService().shutdown(-1);
    }
}
Also used : WatchedEvent(org.apache.zookeeper.WatchedEvent) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException) Watcher(org.apache.zookeeper.Watcher) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException)

Example 20 with NoNodeException

use of org.apache.zookeeper.KeeperException.NoNodeException in project lucene-solr by apache.

the class CreateCollectionCmd method getConfName.

private static void getConfName(SolrZkClient zkClient, String collection, String collectionPath, Map<String, Object> collectionProps) throws KeeperException, InterruptedException {
    // check for configName
    log.debug("Looking for collection configName");
    if (collectionProps.containsKey("configName")) {
        log.info("configName was passed as a param {}", collectionProps.get("configName"));
        return;
    }
    List<String> configNames = null;
    int retry = 1;
    int retryLimt = 6;
    for (; retry < retryLimt; retry++) {
        if (zkClient.exists(collectionPath, true)) {
            ZkNodeProps cProps = ZkNodeProps.load(zkClient.getData(collectionPath, null, null, true));
            if (cProps.containsKey(ZkController.CONFIGNAME_PROP)) {
                break;
            }
        }
        // if there is only one conf, use that
        try {
            configNames = zkClient.getChildren(ZkConfigManager.CONFIGS_ZKNODE, null, true);
        } catch (NoNodeException e) {
        // just keep trying
        }
        if (configNames != null && configNames.size() == 1) {
            // no config set named, but there is only 1 - use it
            log.info("Only one config set found in zk - using it:" + configNames.get(0));
            collectionProps.put(ZkController.CONFIGNAME_PROP, configNames.get(0));
            break;
        }
        if (configNames != null && configNames.contains(collection)) {
            log.info("Could not find explicit collection configName, but found config name matching collection name - using that set.");
            collectionProps.put(ZkController.CONFIGNAME_PROP, collection);
            break;
        }
        log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry);
        Thread.sleep(3000);
    }
    if (retry == retryLimt) {
        log.error("Could not find configName for collection " + collection);
        throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "Could not find configName for collection " + collection + " found:" + configNames);
    }
}
Also used : ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) StrUtils.formatString(org.apache.solr.common.util.StrUtils.formatString)

Aggregations

NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)44 KeeperException (org.apache.zookeeper.KeeperException)30 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)16 IOException (java.io.IOException)12 Stat (org.apache.zookeeper.data.Stat)12 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)11 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)9 Job (com.spotify.helios.common.descriptors.Job)8 ConnectionLossException (org.apache.zookeeper.KeeperException.ConnectionLossException)8 JobId (com.spotify.helios.common.descriptors.JobId)6 UnsupportedEncodingException (java.io.UnsupportedEncodingException)5 UnknownHostException (java.net.UnknownHostException)5 HashMap (java.util.HashMap)5 Map (java.util.Map)5 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)5 NodeExistsException (org.apache.zookeeper.KeeperException.NodeExistsException)5 SessionExpiredException (org.apache.zookeeper.KeeperException.SessionExpiredException)5 DeploymentGroup (com.spotify.helios.common.descriptors.DeploymentGroup)4 UUID (java.util.UUID)4 TimeoutException (java.util.concurrent.TimeoutException)4