use of org.voltdb.dtxn.TransactionState in project voltdb by VoltDB.
the class VoltSystemProcedure method executeSysProcPlanFragmentsAsync.
/**
* Produce work units, possibly on all sites, for a list of plan fragments.
* The final plan fragment must aggregate intermediate results and produce a
* single output dependency. This aggregate output is returned as the
* result.
*
* @param pfs
* an array of synthesized plan fragments
* @param aggregatorOutputDependencyId
* dependency id produced by the aggregation pf The id of the
* table returned as the result of this procedure.
*/
public void executeSysProcPlanFragmentsAsync(SynthesizedPlanFragment[] pfs) {
TransactionState txnState = m_runner.getTxnState();
for (SynthesizedPlanFragment pf : pfs) {
assert (pf.parameters != null);
// run this on
if (pf.multipartition) {
assert ((pf.outputDepId & DtxnConstants.MULTIPARTITION_DEPENDENCY) == DtxnConstants.MULTIPARTITION_DEPENDENCY);
}
FragmentTaskMessage task = FragmentTaskMessage.createWithOneFragment(txnState.initiatorHSId, m_site.getCorrespondingSiteId(), txnState.txnId, txnState.uniqueId, txnState.isReadOnly(), fragIdToHash(pf.fragmentId), pf.outputDepId, pf.parameters, false, txnState.isForReplay());
if (pf.inputDepIds != null) {
for (int depId : pf.inputDepIds) {
task.addInputDepId(0, depId);
}
}
task.setFragmentTaskType(FragmentTaskMessage.SYS_PROC_PER_SITE);
if (pf.suppressDuplicates) {
task.setFragmentTaskType(FragmentTaskMessage.SYS_PROC_PER_PARTITION);
}
if (pf.multipartition) {
// create a workunit for every execution site
txnState.createAllParticipatingFragmentWork(task);
} else {
// create one workunit for the current site
if (pf.siteId == -1)
txnState.createLocalFragmentWork(task, false);
else
txnState.createFragmentWork(new long[] { pf.siteId }, task);
}
}
}
use of org.voltdb.dtxn.TransactionState in project voltdb by VoltDB.
the class VoltSystemProcedure method executeSysProcPlanFragments.
/**
* Produce work units, possibly on all sites, for a list of plan fragments.
* The final plan fragment must aggregate intermediate results and produce a
* single output dependency. This aggregate output is returned as the
* result.
*
* @param pfs
* an array of synthesized plan fragments
* @param aggregatorOutputDependencyId
* dependency id produced by the aggregation pf The id of the
* table returned as the result of this procedure.
* @return the resulting VoltTable as a length-one array.
*/
public VoltTable[] executeSysProcPlanFragments(SynthesizedPlanFragment[] pfs, int aggregatorOutputDependencyId) {
TransactionState txnState = m_runner.getTxnState();
// execution of the current stored procedure.
assert (txnState != null);
txnState.setupProcedureResume(false, new int[] { aggregatorOutputDependencyId });
final ArrayList<VoltTable> results = new ArrayList<>();
executeSysProcPlanFragmentsAsync(pfs);
// execute the tasks that just got queued.
// recursively call recurableRun and don't allow it to shutdown
Map<Integer, List<VoltTable>> mapResults = m_site.recursableRun(txnState);
if (mapResults != null) {
List<VoltTable> matchingTablesForId = mapResults.get(aggregatorOutputDependencyId);
if (matchingTablesForId == null) {
log.error("Sysproc received a stale fragment response message from before the " + "transaction restart.");
throw new MpTransactionState.FragmentFailureException();
} else {
results.add(matchingTablesForId.get(0));
}
}
return results.toArray(new VoltTable[0]);
}
use of org.voltdb.dtxn.TransactionState in project voltdb by VoltDB.
the class SpScheduler method handleCompleteTransactionMessage.
private void handleCompleteTransactionMessage(CompleteTransactionMessage message) {
CompleteTransactionMessage msg = message;
if (m_isLeader) {
msg = new CompleteTransactionMessage(m_mailbox.getHSId(), m_mailbox.getHSId(), message);
// Set the spHandle so that on repair the new master will set the max seen spHandle
// correctly
advanceTxnEgo();
msg.setSpHandle(getCurrentTxnId());
if (m_sendToHSIds.length > 0 && !msg.isReadOnly()) {
m_mailbox.send(m_sendToHSIds, msg);
}
} else {
setMaxSeenTxnId(msg.getSpHandle());
}
logRepair(msg);
TransactionState txn = m_outstandingTxns.get(msg.getTxnId());
// now, fix that later.
if (txn != null) {
CompleteTransactionMessage finalMsg = msg;
final VoltTrace.TraceEventBatch traceLog = VoltTrace.log(VoltTrace.Category.SPI);
if (traceLog != null) {
traceLog.add(() -> VoltTrace.instant("recvCompleteTxn", "txnId", TxnEgo.txnIdToString(finalMsg.getTxnId()), "partition", Integer.toString(m_partitionId), "hsId", CoreUtils.hsIdToString(m_mailbox.getHSId())));
}
final boolean isSysproc = ((FragmentTaskMessage) txn.getNotice()).isSysProcTask();
if (m_sendToHSIds.length > 0 && !msg.isRestart() && (!msg.isReadOnly() || isSysproc)) {
DuplicateCounter counter;
counter = new DuplicateCounter(msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, msg);
safeAddToDuplicateCounterMap(new DuplicateCounterKey(msg.getTxnId(), msg.getSpHandle()), counter);
}
Iv2Trace.logCompleteTransactionMessage(msg, m_mailbox.getHSId());
final CompleteTransactionTask task = new CompleteTransactionTask(m_mailbox, txn, m_pendingTasks, msg);
queueOrOfferMPTask(task);
} else {
// Generate a dummy response message when this site has not seen previous FragmentTaskMessage,
// the leader may have started to wait for replicas' response messages.
// This can happen in the early phase of site rejoin before replica receiving the snapshot initiation,
// it also means this CompleteTransactionMessage message will be dropped because it's after snapshot.
final CompleteTransactionResponseMessage resp = new CompleteTransactionResponseMessage(msg);
resp.m_sourceHSId = m_mailbox.getHSId();
handleCompleteTransactionResponseMessage(resp);
}
}
use of org.voltdb.dtxn.TransactionState in project voltdb by VoltDB.
the class SpScheduler method updateReplicas.
// This is going to run in the BabySitter's thread. This and deliver are synchronized by
// virtue of both being called on InitiatorMailbox and not directly called.
// (That is, InitiatorMailbox's API, used by BabySitter, is synchronized on the same
// lock deliver() is synchronized on.)
@Override
public void updateReplicas(List<Long> replicas, Map<Integer, Long> partitionMasters) {
// First - correct the official replica set.
m_replicaHSIds = replicas;
// Update the list of remote replicas that we'll need to send to
List<Long> sendToHSIds = new ArrayList<Long>(m_replicaHSIds);
sendToHSIds.remove(m_mailbox.getHSId());
m_sendToHSIds = Longs.toArray(sendToHSIds);
// Cleanup duplicate counters and collect DONE counters
// in this list for further processing.
List<DuplicateCounterKey> doneCounters = new LinkedList<DuplicateCounterKey>();
for (Entry<DuplicateCounterKey, DuplicateCounter> entry : m_duplicateCounters.entrySet()) {
DuplicateCounter counter = entry.getValue();
int result = counter.updateReplicas(m_replicaHSIds);
if (result == DuplicateCounter.DONE) {
doneCounters.add(entry.getKey());
}
}
// Maintain the CI invariant that responses arrive in txnid order.
Collections.sort(doneCounters);
for (DuplicateCounterKey key : doneCounters) {
DuplicateCounter counter = m_duplicateCounters.remove(key);
final TransactionState txn = m_outstandingTxns.get(key.m_txnId);
if (txn == null || txn.isDone()) {
m_outstandingTxns.remove(key.m_txnId);
// for MP write txns, we should use it's first SpHandle in the TransactionState
// for SP write txns, we can just use the SpHandle from the DuplicateCounterKey
long m_safeSpHandle = txn == null ? key.m_spHandle : txn.m_spHandle;
setRepairLogTruncationHandle(m_safeSpHandle);
}
VoltMessage resp = counter.getLastResponse();
if (resp != null) {
// sure we write ours into the message getting sent to the MPI
if (resp instanceof FragmentResponseMessage) {
FragmentResponseMessage fresp = (FragmentResponseMessage) resp;
fresp.setExecutorSiteId(m_mailbox.getHSId());
}
m_mailbox.send(counter.m_destinationId, resp);
} else {
hostLog.warn("TXN " + counter.getTxnId() + " lost all replicas and " + "had no responses. This should be impossible?");
}
}
SettableFuture<Boolean> written = writeIv2ViableReplayEntry();
// Get the fault log status here to ensure the leader has written it to disk
// before initiating transactions again.
blockFaultLogWriteStatus(written);
}
use of org.voltdb.dtxn.TransactionState in project voltdb by VoltDB.
the class SpScheduler method doLocalFragmentOffer.
/**
* Do the work necessary to turn the FragmentTaskMessage into a
* TransactionTask which can be queued to the TransactionTaskQueue.
* This is reused by both the normal message handling path and the repair
* path, and assumes that the caller has dealt with or ensured that the
* necessary ID, SpHandles, and replication issues are resolved.
*/
private void doLocalFragmentOffer(FragmentTaskMessage msg) {
// Thread name has to be materialized here
final String threadName = Thread.currentThread().getName();
final VoltTrace.TraceEventBatch traceLog = VoltTrace.log(VoltTrace.Category.SPI);
if (traceLog != null) {
traceLog.add(() -> VoltTrace.meta("process_name", "name", CoreUtils.getHostnameOrAddress())).add(() -> VoltTrace.meta("thread_name", "name", threadName)).add(() -> VoltTrace.meta("thread_sort_index", "sort_index", Integer.toString(10000))).add(() -> VoltTrace.beginAsync("recvfragment", MiscUtils.hsIdPairTxnIdToString(m_mailbox.getHSId(), m_mailbox.getHSId(), msg.getSpHandle(), msg.getTxnId()), "txnId", TxnEgo.txnIdToString(msg.getTxnId()), "partition", m_partitionId, "hsId", CoreUtils.hsIdToString(m_mailbox.getHSId()), "final", msg.isFinalTask()));
}
TransactionState txn = m_outstandingTxns.get(msg.getTxnId());
boolean logThis = false;
// something in progress already
if (txn == null) {
txn = new ParticipantTransactionState(msg.getSpHandle(), msg, msg.isReadOnly());
m_outstandingTxns.put(msg.getTxnId(), txn);
// Only want to send things to the command log if it satisfies this predicate
// AND we've never seen anything for this transaction before. We can't
// actually log until we create a TransactionTask, though, so just keep track
// of whether it needs to be done.
// Like SP, we should log writes and safe reads.
// Fast reads can be directly put on the task queue.
boolean shortcutRead = msg.isReadOnly() && (m_defaultConsistencyReadLevel == ReadLevel.FAST);
logThis = !shortcutRead;
}
// that for now, meh, but if this scope grows then it should get refactored out
if (msg.isFinalTask() && txn.isReadOnly()) {
m_outstandingTxns.remove(msg.getTxnId());
}
TransactionTask task;
if (msg.isSysProcTask()) {
task = new SysprocFragmentTask(m_mailbox, (ParticipantTransactionState) txn, m_pendingTasks, msg, null);
} else {
task = new FragmentTask(m_mailbox, (ParticipantTransactionState) txn, m_pendingTasks, msg, null);
}
if (logThis) {
ListenableFuture<Object> durabilityBackpressureFuture = m_cl.log(msg.getInitiateTask(), msg.getSpHandle(), Ints.toArray(msg.getInvolvedPartitions()), m_durabilityListener, task);
if (traceLog != null && durabilityBackpressureFuture != null) {
traceLog.add(() -> VoltTrace.beginAsync("durability", MiscUtils.hsIdTxnIdToString(m_mailbox.getHSId(), msg.getSpHandle()), "txnId", TxnEgo.txnIdToString(msg.getTxnId()), "partition", Integer.toString(m_partitionId)));
}
//Async command logging has to offer the task immediately with a Future for backpressure
if (m_cl.canOfferTask()) {
m_pendingTasks.offer(task.setDurabilityBackpressureFuture(durabilityBackpressureFuture));
} else {
/* Getting here means that the task is the first fragment of an MP txn and
* synchronous command logging is on, so create a backlog for future tasks of
* this MP arrived before it's marked durable.
*
* This is important for synchronous command logging and MP txn restart. Without
* this, a restarted MP txn may not be gated by logging of the first fragment.
*/
assert !m_mpsPendingDurability.containsKey(task.getTxnId());
m_mpsPendingDurability.put(task.getTxnId(), new ArrayDeque<TransactionTask>());
}
} else {
queueOrOfferMPTask(task);
}
}
Aggregations