use of org.neo4j.time.Stopwatch in project neo4j by neo4j.
the class CheckerContext method timeOperation.
private void timeOperation(String description, ThrowingRunnable action, boolean linePadding) throws Exception {
Stopwatch stopwatch = Stopwatch.start();
try {
debug(linePadding, "STARTED %s", description);
action.doRun();
debug(linePadding, "COMPLETED %s, took %s", description, duration(stopwatch.elapsed(TimeUnit.MILLISECONDS)));
} catch (Exception e) {
debug(linePadding, "FAILED %s after %s", description, duration(stopwatch.elapsed(TimeUnit.MILLISECONDS)));
throw e;
}
}
use of org.neo4j.time.Stopwatch in project neo4j by neo4j.
the class TransactionIdTracker method awaitUpToDate.
/**
* Wait for a specific transaction (the Oldest Acceptable Transaction - OAT) to have been applied before
* continuing. This method is useful in a clustered deployment, where different members of the cluster are expected
* to apply transactions at slightly different times.
* <p>
* We assume the OAT will always have been applied on one member of the cluster, therefore it is sensible to wait
* for it to be applied on this member.
* <p>
* The effect is either:
* <ol>
* <li>If the transaction in question has already been applied, return immediately.
* This is the most common case because we expect the interval between dependent requests from the client
* to be longer than the replication lag between cluster members.</li>
* <li>The transaction has not yet been applied, block until the background replication process has applied it,
* or timeout.</li>
* </ol>
*
* @param namedDatabaseId id of the database to find the transaction id.
* @param oldestAcceptableTxId id of the Oldest Acceptable Transaction (OAT) that must have been applied before
* continuing work.
* @param timeout maximum duration to wait for OAT to be applied
*/
public void awaitUpToDate(NamedDatabaseId namedDatabaseId, long oldestAcceptableTxId, Duration timeout) {
var db = database(namedDatabaseId);
if (oldestAcceptableTxId <= BASE_TX_ID) {
return;
}
var lastTransactionId = -1L;
try {
Stopwatch startTime = clock.startStopWatch();
do {
if (isNotAvailable(db)) {
throw databaseUnavailable(db);
}
lastTransactionId = currentTransactionId(db);
if (oldestAcceptableTxId <= lastTransactionId) {
return;
}
waitWhenNotUpToDate();
} while (!startTime.hasTimedOut(timeout));
throw unreachableDatabaseVersion(db, lastTransactionId, oldestAcceptableTxId);
} catch (RuntimeException e) {
if (isNotAvailable(db)) {
throw databaseUnavailable(db, e);
}
throw unreachableDatabaseVersion(db, lastTransactionId, oldestAcceptableTxId, e);
}
}
use of org.neo4j.time.Stopwatch in project neo4j by neo4j.
the class CheckPointerImpl method doCheckPoint.
private long doCheckPoint(TriggerInfo triggerInfo) throws IOException {
var databaseTracer = tracers.getDatabaseTracer();
var pageCacheTracer = tracers.getPageCacheTracer();
var versionContext = versionContextSupplier.createVersionContext();
try (var cursorContext = new CursorContext(pageCacheTracer.createPageCursorTracer(CHECKPOINT_TAG), versionContext);
LogCheckPointEvent event = databaseTracer.beginCheckPoint()) {
long[] lastClosedTransaction = metadataProvider.getLastClosedTransaction();
long lastClosedTransactionId = lastClosedTransaction[0];
versionContext.initWrite(lastClosedTransactionId);
LogPosition logPosition = new LogPosition(lastClosedTransaction[1], lastClosedTransaction[2]);
String checkpointReason = triggerInfo.describe(lastClosedTransactionId);
/*
* Check kernel health before going into waiting for transactions to be closed, to avoid
* getting into a scenario where we would await a condition that would potentially never
* happen.
*/
databaseHealth.assertHealthy(IOException.class);
/*
* First we flush the store. If we fail now or during the flush, on recovery we'll find the
* earlier check point and replay from there all the log entries. Everything will be ok.
*/
msgLog.info(checkpointReason + " checkpoint started...");
Stopwatch startTime = Stopwatch.start();
forceOperation.flushAndForce(cursorContext);
/*
* Check kernel health before going to write the next check point. In case of a panic this check point
* will be aborted, which is the safest alternative so that the next recovery will have a chance to
* repair the damages.
*/
databaseHealth.assertHealthy(IOException.class);
checkpointAppender.checkPoint(event, logPosition, clock.instant(), checkpointReason);
threshold.checkPointHappened(lastClosedTransactionId);
long durationMillis = startTime.elapsed(MILLISECONDS);
msgLog.info(checkpointReason + " checkpoint completed in " + duration(durationMillis));
event.checkpointCompleted(durationMillis);
/*
* Prune up to the version pointed from the latest check point,
* since it might be an earlier version than the current log version.
*/
logPruning.pruneLogs(logPosition.getLogVersion());
lastCheckPointedTx = lastClosedTransactionId;
return lastClosedTransactionId;
} catch (Throwable t) {
// Why only log failure here? It's because check point can potentially be made from various
// points of execution e.g. background thread triggering check point if needed and during
// shutdown where it's better to have more control over failure handling.
msgLog.error("Checkpoint failed", t);
throw t;
}
}
use of org.neo4j.time.Stopwatch in project neo4j by neo4j.
the class TransactionLogsRecovery method init.
@Override
public void init() throws Exception {
RecoveryStartInformation recoveryStartInformation = recoveryService.getRecoveryStartInformation();
if (!recoveryStartInformation.isRecoveryRequired()) {
schemaLife.init();
return;
}
Stopwatch recoveryStartTime = Stopwatch.start();
LogPosition recoveryStartPosition = recoveryStartInformation.getTransactionLogPosition();
monitor.recoveryRequired(recoveryStartPosition);
LogPosition recoveryToPosition = recoveryStartPosition;
LogPosition lastTransactionPosition = recoveryStartPosition;
CommittedTransactionRepresentation lastTransaction = null;
CommittedTransactionRepresentation lastReversedTransaction = null;
if (!recoveryStartInformation.isMissingLogs()) {
try {
long lowestRecoveredTxId = TransactionIdStore.BASE_TX_ID;
try (var transactionsToRecover = recoveryService.getTransactionsInReverseOrder(recoveryStartPosition);
var recoveryVisitor = recoveryService.getRecoveryApplier(REVERSE_RECOVERY, pageCacheTracer, REVERSE_RECOVERY_TAG)) {
while (transactionsToRecover.next()) {
recoveryStartupChecker.checkIfCanceled();
CommittedTransactionRepresentation transaction = transactionsToRecover.get();
if (lastReversedTransaction == null) {
lastReversedTransaction = transaction;
initProgressReporter(recoveryStartInformation, lastReversedTransaction);
}
recoveryVisitor.visit(transaction);
lowestRecoveredTxId = transaction.getCommitEntry().getTxId();
reportProgress();
}
}
monitor.reverseStoreRecoveryCompleted(lowestRecoveredTxId);
// We cannot initialise the schema (tokens, schema cache, indexing service, etc.) until we have returned the store to a consistent state.
// We need to be able to read the store before we can even figure out what indexes, tokens, etc. we have. Hence we defer the initialisation
// of the schema life until after we've done the reverse recovery.
schemaLife.init();
try (var transactionsToRecover = recoveryService.getTransactions(recoveryStartPosition);
var recoveryVisitor = recoveryService.getRecoveryApplier(RECOVERY, pageCacheTracer, RECOVERY_TAG)) {
while (transactionsToRecover.next()) {
recoveryStartupChecker.checkIfCanceled();
lastTransaction = transactionsToRecover.get();
long txId = lastTransaction.getCommitEntry().getTxId();
recoveryVisitor.visit(lastTransaction);
monitor.transactionRecovered(txId);
numberOfRecoveredTransactions++;
lastTransactionPosition = transactionsToRecover.position();
recoveryToPosition = lastTransactionPosition;
reportProgress();
}
recoveryToPosition = transactionsToRecover.position();
}
} catch (Error | ClosedByInterruptException | DatabaseStartAbortedException e) {
// the users are able to workaround this if truncations is really needed.
throw e;
} catch (Throwable t) {
if (failOnCorruptedLogFiles) {
throwUnableToCleanRecover(t);
}
if (lastTransaction != null) {
LogEntryCommit commitEntry = lastTransaction.getCommitEntry();
monitor.failToRecoverTransactionsAfterCommit(t, commitEntry, recoveryToPosition);
} else {
monitor.failToRecoverTransactionsAfterPosition(t, recoveryStartPosition);
}
}
progressReporter.completed();
logsTruncator.truncate(recoveryToPosition);
}
try (var cursorContext = new CursorContext(pageCacheTracer.createPageCursorTracer(RECOVERY_COMPLETED_TAG))) {
final boolean missingLogs = recoveryStartInformation.isMissingLogs();
recoveryService.transactionsRecovered(lastTransaction, lastTransactionPosition, recoveryToPosition, recoveryStartInformation.getCheckpointPosition(), missingLogs, cursorContext);
}
monitor.recoveryCompleted(numberOfRecoveredTransactions, recoveryStartTime.elapsed(MILLISECONDS));
}
use of org.neo4j.time.Stopwatch in project neo4j by neo4j.
the class WindowsBootloaderOs method runProcess.
private void runProcess(List<String> command, ProcessManager.Behaviour behaviour) {
List<String> entireCommand = asExternalCommand(command);
ctx.processManager().run(entireCommand, behaviour);
if (entireCommand.stream().anyMatch(cmd -> cmd.equals(powershellCmd())) && command.stream().anyMatch(cmd -> cmd.endsWith(PRUNSRV_I_386_EXE) || cmd.endsWith(PRUNSRV_AMD_64_EXE))) {
// This is special condition where we run a command with our prunsrv windows-service util and we have to run it with powershell,
// probably because we're running a command which exceeds 2000 characters which is the limit of cmd.exe.
// Since it seems to be really hard to make powershell wait for completion of commands that it runs (we've certainly tried)
// then we have to try and wait the completion manually here. The general idea is to see if there's any prunsrv process running
// and we're simply waiting until there is none. Now this is somewhat risky because if there's any other process with the exact same name
// we'll wait here for the max time. Although know that the PS1 scripts that this was ported from doesn't even have the option to run
// these prunsrv commands in powershell.exe, it always ran them in cmd.exe. The main cause of a command line being too long for cmd.exe
// is that the classpath is too long since other things are somewhat fixed and doesn't exceed this limit on any sane environment.
// And the main reason the classpath is too long is that we're currently running in a test environment, because in a real-world
// packaging environment the classpath is a couple of wildcard directories.
Stopwatch stopwatch = Stopwatch.start();
do {
try {
resultFromPowerShellCommand("Get-Process", PRUNSRV_AMD_64_EXE + "," + PRUNSRV_I_386_EXE);
// If this command completes normally there's at least one running process containing that name
try {
Thread.sleep(100);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
} catch (BootFailureException e) {
// If this command returns exit code != 0 it typically means that there's no processes of this name running
break;
}
} while (!stopwatch.hasTimedOut(Bootloader.DEFAULT_NEO4J_SHUTDOWN_TIMEOUT, TimeUnit.SECONDS));
}
}
Aggregations