use of com.torodb.mongodb.commands.pojos.MemberHeartbeatData in project torodb by torodb.
the class TopologyCoordinator method lookForSyncSource.
/**
* Looks for an optimal sync source to replicate from.
*
* The first attempt, we ignore those nodes with slave delay higher than our own, hidden nodes,
* and nodes that are excessively lagged. The second attempt includes such nodes, in case those
* are the only ones we can reach. This loop attempts to set 'closestIndex'.
*
* @param now the current time
* @param lastOpAppliedOp the last OpTime this node has apply
* @param onlyOptimal if true, slaves with more delay than ourselve, hidden nodes or
* excessively lagged nodes are ignored
* @param oldestSyncOpTime the oldest optime considered not excessively lagged. Only used if
* onlyOptimal is true.
* @return the new optimal sync source, which is not {@link Optional#isPresent() present} if no
* one can be chosen
*/
private Optional<MemberConfig> lookForSyncSource(Instant now, Optional<OpTime> lastOpAppliedOp, boolean onlyOptimal, OpTime oldestSyncOpTime) {
OpTime lastOpApplied = lastOpAppliedOp.orElse(OpTime.EPOCH);
Stream<MemberHeartbeatData> hbCandidateStream = _hbdata.stream().filter(MemberHeartbeatData::isUp).filter(hbData -> hbData.getState().isReadable()).filter(hbData -> hbData.getOpTime().isAfter(lastOpApplied));
if (onlyOptimal) {
hbCandidateStream = hbCandidateStream.filter(hbData -> hbData.getOpTime().isEqualOrAfter(oldestSyncOpTime));
}
Stream<MemberConfig> mcCandidateStream = hbCandidateStream.map(this::getMemberConfig).filter(mc -> !isBlacklistedMember(mc, now));
if (onlyOptimal) {
mcCandidateStream = mcCandidateStream.filter(mc -> !mc.isHidden()).filter(mc -> mc.getSlaveDelay() < slaveDelaySecs);
}
//If there are several candidates, the one whose ping is lower is returned
return mcCandidateStream.reduce((MemberConfig cand1, MemberConfig cand2) -> {
long ping1 = getPing(cand1.getHostAndPort());
long ping2 = getPing(cand2.getHostAndPort());
if (ping1 < ping2) {
return cand1;
}
return cand2;
});
}
use of com.torodb.mongodb.commands.pojos.MemberHeartbeatData in project torodb by torodb.
the class TopologyCoordinator method processHeartbeatResponse.
/**
* Processes a heartbeat response from "target" that arrived around "now", having spent
* "networkRoundTripTime" millis on the network.
* <p>
* Updates internal topology coordinator state, and returns instructions about what action to take
* next.
* <p>
* If the next action is {@link HeartbeatResponseAction#makeNoAction() "NoAction"} then nothing
* has to be done.
* <p>
* If the next action indicates {@link HeartbeatResponseAction#makeReconfigAction() "Reconfig"},
* the caller should verify the configuration in hbResponse is acceptable, perform any other
* reconfiguration actions it must, and call
* {@link #updateConfig(
* com.eightkdata.mongowp.mongoserver.api.safe.library.v3m0.pojos.ReplicaSetConfig,
* java.time.Instant, com.eightkdata.mongowp.OpTime) updateConfig}
* with the appropiate arguments.
* <p>
* This call should be paired (with intervening network communication) with a call to
* prepareHeartbeatRequest for the same "target".
*
* @param now the aproximated time when the response has been recived
* @param networkRoundTripTime the time spent on network
* @param target the host that send the respond
* @param hbResponse
*/
HeartbeatResponseAction processHeartbeatResponse(Instant now, Duration networkRoundTripTime, HostAndPort target, RemoteCommandResponse<ReplSetHeartbeatReply> hbResponse) {
PingStats hbStats = getPingOrDefault(target);
Preconditions.checkState(hbStats.getLastHeartbeatStartDate() != null, "It seems that a hb " + "response has been recived before it has been prepared");
if (!hbResponse.isOk()) {
hbStats.miss();
} else {
hbStats.hit(networkRoundTripTime);
}
boolean isUnauthorized = (hbResponse.getErrorCode() == ErrorCode.UNAUTHORIZED) || (hbResponse.getErrorCode() == ErrorCode.AUTHENTICATION_FAILED);
Duration alreadyElapsed = Duration.between(hbStats.getLastHeartbeatStartDate(), now);
Duration nextHeartbeatDelay;
// determine next start time
if (_rsConfig != null && (hbStats.getNumFailuresSinceLastStart() <= MAX_HEARTBEAT_RETRIES) && (alreadyElapsed.toMillis() < _rsConfig.getHeartbeatTimeoutPeriod())) {
if (isUnauthorized) {
nextHeartbeatDelay = HEARTBEAT_INTERVAL;
} else {
nextHeartbeatDelay = Duration.ZERO;
}
} else {
nextHeartbeatDelay = HEARTBEAT_INTERVAL;
}
Optional<ReplSetHeartbeatReply> commandReply = hbResponse.getCommandReply();
if (hbResponse.isOk() && commandReply.get().getConfig().isPresent()) {
long currentConfigVersion = _rsConfig != null ? _rsConfig.getConfigVersion() : -2;
ReplicaSetConfig newConfig = commandReply.get().getConfig().get();
assert newConfig != null;
if (newConfig.getConfigVersion() > currentConfigVersion) {
HeartbeatResponseAction nextAction = HeartbeatResponseAction.makeReconfigAction().setNextHeartbeatDelay(nextHeartbeatDelay);
return nextAction;
} else {
// target erroneously sent us one, even through it isn't newer.
if (newConfig.getConfigVersion() < currentConfigVersion) {
LOGGER.debug("Config version from heartbeat was older than ours.");
LOGGER.trace("Current config: {}. Config from heartbeat: {}", _rsConfig, newConfig);
} else {
LOGGER.trace("Config from heartbeat response was same as ours.");
}
}
}
// so return early.
if (_rsConfig == null) {
HeartbeatResponseAction nextAction = HeartbeatResponseAction.makeNoAction();
nextAction.setNextHeartbeatDelay(nextHeartbeatDelay);
return nextAction;
}
OptionalInt memberIndexOpt = _rsConfig.findMemberIndexByHostAndPort(target);
if (!memberIndexOpt.isPresent()) {
LOGGER.debug("replset: Could not find {} in current config so ignoring --" + " current config: {}", target, _rsConfig);
HeartbeatResponseAction nextAction = HeartbeatResponseAction.makeNoAction();
nextAction.setNextHeartbeatDelay(nextHeartbeatDelay);
return nextAction;
}
assert memberIndexOpt.isPresent();
int memberIndex = memberIndexOpt.getAsInt();
MemberHeartbeatData hbData = _hbdata.get(memberIndex);
assert hbData != null;
MemberConfig member = _rsConfig.getMembers().get(memberIndex);
if (!hbResponse.isOk()) {
if (isUnauthorized) {
LOGGER.debug("setAuthIssue: heartbeat response failed due to authentication" + " issue for member _id: {}", member.getId());
hbData.setAuthIssue(now);
} else if (hbStats.getNumFailuresSinceLastStart() > MAX_HEARTBEAT_RETRIES || alreadyElapsed.toMillis() >= _rsConfig.getHeartbeatTimeoutPeriod()) {
LOGGER.debug("setDownValues: heartbeat response failed for member _id:{}" + ", msg: {}", member.getId(), hbResponse.getErrorDesc());
hbData.setDownValues(now, hbResponse.getErrorDesc());
} else {
LOGGER.trace("Bad heartbeat response from {}; trying again; Retries left: {}; " + "{} ms have already elapsed", target, MAX_HEARTBEAT_RETRIES - hbStats.getNumFailuresSinceLastStart(), alreadyElapsed.toMillis());
}
} else {
ReplSetHeartbeatReply nonNullReply = commandReply.get();
LOGGER.trace("setUpValues: heartbeat response good for member _id:{}, msg: {}", member.getId(), nonNullReply.getHbmsg());
hbData.setUpValues(now, member.getHostAndPort(), nonNullReply);
}
HeartbeatResponseAction nextAction = updateHeartbeatDataImpl(memberIndex, now);
nextAction.setNextHeartbeatDelay(nextHeartbeatDelay);
return nextAction;
}
use of com.torodb.mongodb.commands.pojos.MemberHeartbeatData in project torodb by torodb.
the class TopologyCoordinator method updateHeartbeatDataImpl.
/**
* Performs updating {@link #_hbdata} and {@link #_currentPrimaryIndex} for
* {@link #processHeartbeatResponse(org.threeten.bp.Instant, org.threeten.bp.Duration,
* com.google.common.net.HostAndPort,
* com.eightkdata.mongowp.client.core.MongoConnection.RemoteCommandResponse,
* com.eightkdata.mongowp.OpTime) }.
*/
private HeartbeatResponseAction updateHeartbeatDataImpl(int updatedConfigIndex, Instant now) {
// the updated data supports that notion. If not, erase our notion of who is primary.
if (updatedConfigIndex == _currentPrimaryIndex) {
final MemberHeartbeatData updatedHbData = _hbdata.get(updatedConfigIndex);
assert updatedHbData != null;
if (!updatedHbData.isUp() || updatedHbData.getState() != MemberState.RS_PRIMARY) {
_currentPrimaryIndex = -1;
}
}
HeartbeatResponseAction newAction;
newAction = ifTwoPrimariesChecks(now);
if (newAction != null) {
return newAction;
}
// We do not believe that any remote is primary.
assert _hbdata.stream().noneMatch(input -> input.isUp() && input.getState() == MemberState.RS_PRIMARY);
assert _currentPrimaryIndex == -1;
return HeartbeatResponseAction.makeNoAction();
}
use of com.torodb.mongodb.commands.pojos.MemberHeartbeatData in project torodb by torodb.
the class TopologyCoordinator method getMaybeUpHostAndPorts.
/**
* Retrieves a vector of HostAndPorts containing all nodes that are neither DOWN.
*/
List<HostAndPort> getMaybeUpHostAndPorts() {
List<HostAndPort> upHosts = new ArrayList<>(_hbdata.size());
for (int i = 0; i < _hbdata.size(); i++) {
MemberHeartbeatData it = _hbdata.get(i);
if (it.maybeUp()) {
// skip DOWN nodes
continue;
}
upHosts.add(_rsConfig.getMembers().get(i).getHostAndPort());
}
return upHosts;
}
use of com.torodb.mongodb.commands.pojos.MemberHeartbeatData in project torodb by torodb.
the class TopologyCoordinator method shouldChangeSyncSource.
/**
* Determines if a new sync source should be chosen, if a better candidate sync source is
* available.
*
* It returns true if there exists a viable sync source member other than our current source,
* whose oplog has reached an optime greater than the max sync source lag later than current
* source's. It can return true in other scenarios (like if {@link #setForceSyncSourceIndex(int) }
* has been called or if we don't have a current sync source.
*
* @param now is used to skip over currently blacklisted sync sources.
* @return
*/
boolean shouldChangeSyncSource(HostAndPort currentSource, Instant now) {
// If the user requested a sync source change, return true.
if (_forceSyncSourceIndex != -1) {
return true;
}
OptionalInt currentMemberIndex = _rsConfig.findMemberIndexByHostAndPort(currentSource);
if (!currentMemberIndex.isPresent()) {
return true;
}
assert _hbdata.get(currentMemberIndex.getAsInt()) != null;
OpTime currentOpTime = _hbdata.get(currentMemberIndex.getAsInt()).getOpTime();
if (currentOpTime == null) {
// change.
return false;
}
long currentSecs = currentOpTime.getSecs();
long goalSecs = currentSecs + _maxSyncSourceLagSecs;
for (int i = 0; i < _hbdata.size(); i++) {
MemberHeartbeatData it = _hbdata.get(i);
MemberConfig candidateConfig = _rsConfig.getMembers().get(i);
OpTime itOpTime = it.getOpTime();
if (itOpTime != null && it.isUp() && it.getState().isReadable() && !isBlacklistedMember(candidateConfig, now) && goalSecs < itOpTime.getSecs()) {
LOGGER.info("changing sync target because current sync target's most recent OpTime " + "is {} which is more than {} seconds behind member {} whose most recent " + "OpTime is {} ", currentOpTime, _maxSyncSourceLagSecs, candidateConfig.getHostAndPort(), itOpTime);
return true;
}
}
return false;
}
Aggregations