use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class MpPromoteAlgo method repairSurvivors.
/** Send missed-messages to survivors. Exciting! */
public void repairSurvivors() {
// out corrections!
if (this.m_promotionResult.isCancelled()) {
tmLog.debug(m_whoami + "skipping repair message creation for cancelled Term.");
return;
}
tmLog.debug(m_whoami + "received all repair logs and is repairing surviving replicas.");
for (Iv2RepairLogResponseMessage li : m_repairLogUnion) {
// send the repair log union to all the survivors. SPIs will ignore
// CompleteTransactionMessages for transactions which have already
// completed, so this has the effect of making sure that any holes
// in the repair log are filled without explicitly having to
// discover and track them.
VoltMessage repairMsg = createRepairMessage(li);
tmLog.debug(m_whoami + "repairing: " + m_survivors + " with: " + TxnEgo.txnIdToString(li.getTxnId()));
if (tmLog.isTraceEnabled()) {
tmLog.trace(m_whoami + "repairing with message: " + repairMsg);
}
m_mailbox.repairReplicasWith(m_survivors, repairMsg);
}
m_promotionResult.set(new RepairResult(m_maxSeenTxnId));
}
use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class SpScheduler method updateReplicas.
// This is going to run in the BabySitter's thread. This and deliver are synchronized by
// virtue of both being called on InitiatorMailbox and not directly called.
// (That is, InitiatorMailbox's API, used by BabySitter, is synchronized on the same
// lock deliver() is synchronized on.)
@Override
public void updateReplicas(List<Long> replicas, Map<Integer, Long> partitionMasters) {
// First - correct the official replica set.
m_replicaHSIds = replicas;
// Update the list of remote replicas that we'll need to send to
List<Long> sendToHSIds = new ArrayList<Long>(m_replicaHSIds);
sendToHSIds.remove(m_mailbox.getHSId());
m_sendToHSIds = Longs.toArray(sendToHSIds);
// Cleanup duplicate counters and collect DONE counters
// in this list for further processing.
List<DuplicateCounterKey> doneCounters = new LinkedList<DuplicateCounterKey>();
for (Entry<DuplicateCounterKey, DuplicateCounter> entry : m_duplicateCounters.entrySet()) {
DuplicateCounter counter = entry.getValue();
int result = counter.updateReplicas(m_replicaHSIds);
if (result == DuplicateCounter.DONE) {
doneCounters.add(entry.getKey());
}
}
// Maintain the CI invariant that responses arrive in txnid order.
Collections.sort(doneCounters);
for (DuplicateCounterKey key : doneCounters) {
DuplicateCounter counter = m_duplicateCounters.remove(key);
final TransactionState txn = m_outstandingTxns.get(key.m_txnId);
if (txn == null || txn.isDone()) {
m_outstandingTxns.remove(key.m_txnId);
// for MP write txns, we should use it's first SpHandle in the TransactionState
// for SP write txns, we can just use the SpHandle from the DuplicateCounterKey
long m_safeSpHandle = txn == null ? key.m_spHandle : txn.m_spHandle;
setRepairLogTruncationHandle(m_safeSpHandle);
}
VoltMessage resp = counter.getLastResponse();
if (resp != null) {
// sure we write ours into the message getting sent to the MPI
if (resp instanceof FragmentResponseMessage) {
FragmentResponseMessage fresp = (FragmentResponseMessage) resp;
fresp.setExecutorSiteId(m_mailbox.getHSId());
}
m_mailbox.send(counter.m_destinationId, resp);
} else {
hostLog.warn("TXN " + counter.getTxnId() + " lost all replicas and " + "had no responses. This should be impossible?");
}
}
SettableFuture<Boolean> written = writeIv2ViableReplayEntry();
// Get the fault log status here to ensure the leader has written it to disk
// before initiating transactions again.
blockFaultLogWriteStatus(written);
}
use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class SpPromoteAlgo method prepareForFaultRecovery.
/** Start fixing survivors: setup scoreboard and request repair logs. */
void prepareForFaultRecovery() {
for (Long hsid : m_survivors) {
m_replicaRepairStructs.put(hsid, new ReplicaRepairStruct());
}
tmLog.info(m_whoami + "found (including self) " + m_survivors.size() + " surviving replicas to repair. " + " Survivors: " + CoreUtils.hsIdCollectionToString(m_survivors));
VoltMessage logRequest = new Iv2RepairLogRequestMessage(m_requestId, Iv2RepairLogRequestMessage.SPREQUEST);
m_mailbox.send(com.google_voltpatches.common.primitives.Longs.toArray(m_survivors), logRequest);
}
use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class StreamSnapshotAckReceiver method run.
@Override
public void run() {
rejoinLog.trace("Starting ack receiver thread");
try {
while (true) {
rejoinLog.trace("Blocking on receiving mailbox");
// Wait for 10 minutes
VoltMessage msg = m_mb.recvBlocking(10 * 60 * 1000);
if (msg == null) {
rejoinLog.warn("No stream snapshot ack message was received in the past 10 minutes" + " or the thread was interrupted (expected eofs: " + m_expectedEOFs.get() + ")");
continue;
}
// TestMidRejoinDeath ignores acks to trigger the watchdog
if (StreamSnapshotDataTarget.m_rejoinDeathTestMode && (m_msgFactory.getAckTargetId(msg) == 1)) {
continue;
}
SerializableException se = m_msgFactory.getException(msg);
if (se != null) {
m_lastException = se;
rejoinLog.error("Received exception in ack receiver", se);
return;
}
AckCallback ackCallback = m_callbacks.get(m_msgFactory.getAckTargetId(msg));
if (ackCallback == null) {
rejoinLog.error("Unknown target ID " + m_msgFactory.getAckTargetId(msg) + " in stream snapshot ack message");
} else if (m_msgFactory.getAckBlockIndex(msg) != -1) {
ackCallback.receiveAck(m_msgFactory.getAckBlockIndex(msg));
}
if (m_msgFactory.isAckEOS(msg)) {
// messages are received before terminating the thread.
if (m_expectedEOFs.decrementAndGet() == 0) {
return;
}
}
}
} catch (Exception e) {
m_lastException = e;
rejoinLog.error("Error reading a message from a recovery stream", e);
} finally {
rejoinLog.trace("Ack receiver thread exiting");
}
}
use of org.voltcore.messaging.VoltMessage in project voltdb by VoltDB.
the class MeshArbiter method notifyOnKill.
/**
* Notify all survivors when you are closing links to nodes
* @param decision map where the keys contain the kill sites
* and its values are their last known safe transaction ids
* @return true if successfully confirmed that all survivors
* agree on the decision, false otherwise.
*/
protected boolean notifyOnKill(Set<Long> hsIds, Map<Long, Long> decision) {
SiteFailureMessage.Builder sfmb = SiteFailureMessage.builder().decisions(decision.keySet()).failures(decision.keySet());
Set<Long> dests = Sets.filter(m_seeker.getSurvivors(), not(equalTo(m_hsId)));
if (dests.isEmpty())
return true;
sfmb.survivors(Sets.difference(m_seeker.getSurvivors(), decision.keySet()));
sfmb.safeTxnIds(getSafeTxnIdsForSites(hsIds));
SiteFailureMessage sfm = sfmb.build();
m_mailbox.send(Longs.toArray(dests), sfm);
m_recoveryLog.info("Agreement, Sending [" + CoreUtils.hsIdCollectionToString(dests) + "] " + sfm);
// that we've entered a loop, exit here.
if (m_localHistoricDecisions.size() >= 100) {
// Too many decisions have been made without converging
RateLimitedLogger.tryLogForMessage(System.currentTimeMillis(), 10, TimeUnit.SECONDS, m_recoveryLog, Level.WARN, "Agreement, %d local decisions have been made without converging", m_localHistoricDecisions.size());
}
for (SiteFailureMessage lhd : m_localHistoricDecisions) {
if (lhd.m_survivors.equals(sfm.m_survivors)) {
m_recoveryLog.info("Agreement, detected decision loop. Exiting");
return true;
}
}
m_localHistoricDecisions.add(sfm);
// Wait for all survivors in the local decision to send their decisions over.
// If one of the host's decision conflicts with ours, remove that host's link
// and repeat the decision process.
final Set<Long> expectedSurvivors = Sets.filter(sfm.m_survivors, not(equalTo(m_hsId)));
m_recoveryLog.info("Agreement, Waiting for agreement on decision from survivors " + CoreUtils.hsIdCollectionToString(expectedSurvivors));
final Iterator<SiteFailureMessage> iter = m_decidedSurvivors.values().iterator();
while (iter.hasNext()) {
final SiteFailureMessage remoteDecision = iter.next();
if (expectedSurvivors.contains(remoteDecision.m_sourceHSId)) {
if (remoteDecision.m_decision.contains(m_hsId)) {
iter.remove();
m_recoveryLog.info("Agreement, Received inconsistent decision from " + CoreUtils.hsIdToString(remoteDecision.m_sourceHSId) + ", " + remoteDecision);
final FaultMessage localFault = new FaultMessage(m_hsId, remoteDecision.m_sourceHSId);
localFault.m_sourceHSId = m_hsId;
m_mailbox.deliverFront(localFault);
return false;
}
}
}
long start = System.currentTimeMillis();
boolean allDecisionsMatch = true;
do {
final VoltMessage msg = m_mailbox.recvBlocking(receiveSubjects, 5);
if (msg == null) {
// Send a heartbeat to keep the dead host timeout active.
m_meshAide.sendHeartbeats(m_seeker.getSurvivors());
final long duration = System.currentTimeMillis() - start;
if (duration > 20000) {
m_recoveryLog.error("Agreement, Still waiting for decisions from " + CoreUtils.hsIdCollectionToString(Sets.difference(expectedSurvivors, m_decidedSurvivors.keySet())) + " after " + TimeUnit.MILLISECONDS.toSeconds(duration) + " seconds");
start = System.currentTimeMillis();
}
continue;
}
if (m_hsId != msg.m_sourceHSId && !expectedSurvivors.contains(msg.m_sourceHSId)) {
// Ignore messages from failed sites
continue;
}
if (msg.getSubject() == Subject.SITE_FAILURE_UPDATE.getId()) {
final SiteFailureMessage fm = (SiteFailureMessage) msg;
if (!fm.m_decision.isEmpty()) {
if (expectedSurvivors.contains(fm.m_sourceHSId)) {
if (fm.m_decision.contains(m_hsId)) {
m_decidedSurvivors.remove(fm.m_sourceHSId);
// The remote host has decided that we are gone, remove the remote host
final FaultMessage localFault = new FaultMessage(m_hsId, fm.m_sourceHSId);
localFault.m_sourceHSId = m_hsId;
m_mailbox.deliverFront(localFault);
return false;
} else {
m_decidedSurvivors.put(fm.m_sourceHSId, fm);
}
}
} else {
m_mailbox.deliverFront(fm);
return false;
}
} else if (msg.getSubject() == Subject.FAILURE.getId()) {
final FaultMessage fm = (FaultMessage) msg;
if (!fm.decided) {
// In case of concurrent fault, handle it
m_mailbox.deliverFront(msg);
return false;
} else if (mayIgnore(hsIds, fm) == Discard.DoNot) {
m_mailbox.deliverFront(msg);
return false;
}
}
for (SiteFailureMessage remoteDecision : m_decidedSurvivors.values()) {
if (!sfm.m_survivors.equals(remoteDecision.m_survivors)) {
allDecisionsMatch = false;
}
}
} while (!m_decidedSurvivors.keySet().containsAll(expectedSurvivors) && allDecisionsMatch);
return true;
}
Aggregations