use of org.apache.ignite.internal.processors.cache.persistence.CorruptedDataStructureException in project ignite by apache.
the class DiagnosticProcessor method onFailure.
/**
* Print diagnostic info about failure occurred on {@code ignite} instance.
* Failure details is contained in {@code failureCtx}.
*
* @param failureCtx Failure context.
*/
public void onFailure(FailureContext failureCtx) {
// Dump data structures page locks.
if (IGNITE_DUMP_PAGE_LOCK_ON_FAILURE)
ctx.cache().context().diagnostic().pageLockTracker().dumpLocksToLog();
CorruptedDataStructureException corruptedDataStructureEx = X.cause(failureCtx.error(), CorruptedDataStructureException.class);
if (corruptedDataStructureEx != null && !F.isEmpty(corruptedDataStructureEx.pageIds()) && fileIOFactory != null) {
File[] walDirs = walDirs(ctx);
if (F.isEmpty(walDirs)) {
if (log.isInfoEnabled())
log.info("Skipping dump diagnostic info due to WAL not configured");
} else {
try {
File corruptedPagesFile = corruptedPagesFile(diagnosticPath, fileIOFactory, corruptedDataStructureEx.groupId(), corruptedDataStructureEx.pageIds());
String walDirsStr = Arrays.stream(walDirs).map(File::getAbsolutePath).collect(joining(", ", "[", "]"));
String args = "walDir=" + walDirs[0].getAbsolutePath() + (walDirs.length == 1 ? "" : " walArchiveDir=" + walDirs[1].getAbsolutePath());
if (ctx.config().getDataStorageConfiguration().getPageSize() != DFLT_PAGE_SIZE)
args += " pageSize=" + ctx.config().getDataStorageConfiguration().getPageSize();
args += " pages=" + corruptedPagesFile.getAbsolutePath();
log.warning(corruptedDataStructureEx.getClass().getSimpleName() + " has occurred. " + "To diagnose it, make a backup of the following directories: " + walDirsStr + ". " + "Then, run the following command: java -cp <classpath> " + "org.apache.ignite.development.utils.IgniteWalConverter " + args);
} catch (Throwable t) {
String pages = LongStream.of(corruptedDataStructureEx.pageIds()).mapToObj(pageId -> corruptedDataStructureEx.groupId() + ":" + pageId).collect(joining("\n", "", ""));
log.error("Failed to dump diagnostic info of partition corruption. Page ids:\n" + pages, t);
}
}
}
}
use of org.apache.ignite.internal.processors.cache.persistence.CorruptedDataStructureException in project ignite by apache.
the class FailureProcessor method process.
/**
* Processes failure accordingly to given failure handler.
*
* @param failureCtx Failure context.
* @param hnd Failure handler.
* @return {@code True} If this very call led to Ignite node invalidation.
*/
public synchronized boolean process(FailureContext failureCtx, FailureHandler hnd) {
assert failureCtx != null;
assert hnd != null;
if (// Node already terminating, no reason to process more errors.
this.failureCtx != null)
return false;
if (failureTypeIgnored(failureCtx, hnd)) {
U.quietAndWarn(ignite.log(), IGNORED_FAILURE_LOG_MSG + "[hnd=" + hnd + ", failureCtx=" + failureCtx + ']', failureCtx.error());
} else {
U.error(ignite.log(), FAILURE_LOG_MSG + "[hnd=" + hnd + ", failureCtx=" + failureCtx + ']', failureCtx.error());
}
if (reserveBuf != null && X.hasCause(failureCtx.error(), OutOfMemoryError.class))
reserveBuf = null;
CorruptedDataStructureException corruptedDataStructureEx = X.cause(failureCtx.error(), CorruptedDataStructureException.class);
if (corruptedDataStructureEx != null) {
CacheGroupContext grpCtx = ctx.cache().cacheGroup(corruptedDataStructureEx.groupId());
if (grpCtx != null && grpCtx.dataRegion() != null) {
if (grpCtx.dataRegion().config().isPersistenceEnabled()) {
log.error("A critical problem with persistence data structures was detected." + " Please make backup of persistence storage and WAL files for further analysis." + " Persistence storage path: " + ctx.config().getDataStorageConfiguration().getStoragePath() + " WAL path: " + ctx.config().getDataStorageConfiguration().getWalPath() + " WAL archive path: " + ctx.config().getDataStorageConfiguration().getWalArchivePath());
} else
log.error("A critical problem with in-memory data structures was detected.");
}
}
if (igniteDumpThreadsOnFailure && !throttleThreadDump(failureCtx.type()))
U.dumpThreads(log, !failureTypeIgnored(failureCtx, hnd));
DiagnosticProcessor diagnosticProcessor = ctx.diagnostic();
if (diagnosticProcessor != null)
diagnosticProcessor.onFailure(failureCtx);
boolean invalidated = hnd.onFailure(ignite, failureCtx);
if (invalidated) {
this.failureCtx = failureCtx;
log.error("Ignite node is in invalid state due to a critical failure.");
}
return invalidated;
}
use of org.apache.ignite.internal.processors.cache.persistence.CorruptedDataStructureException in project ignite by apache.
the class BPlusTree method findFirst.
/**
* Returns a value mapped to the lowest key, or {@code null} if tree is empty or no entry matches the passed filter.
* @param filter Filter closure.
* @return Value.
* @throws IgniteCheckedException If failed.
*/
public T findFirst(TreeRowClosure<L, T> filter) throws IgniteCheckedException {
checkDestroyed();
long curPageId = 0L;
long nextPageId = 0L;
try {
for (; ; ) {
long metaPage = acquirePage(metaPageId);
try {
// Level 0 is always at the bottom.
curPageId = getFirstPageId(metaPageId, metaPage, 0);
} finally {
releasePage(metaPageId, metaPage);
}
long curPage = acquirePage(curPageId);
try {
long curPageAddr = readLock(curPageId, curPage);
if (curPageAddr == 0)
// The first page has gone: restart scan.
continue;
try {
BPlusIO<L> io = io(curPageAddr);
assert io.isLeaf();
for (; ; ) {
int cnt = io.getCount(curPageAddr);
for (int i = 0; i < cnt; ++i) {
if (filter == null || filter.apply(this, io, curPageAddr, i))
return getRow(io, curPageAddr, i);
}
nextPageId = io.getForward(curPageAddr);
if (nextPageId == 0)
return null;
long nextPage = acquirePage(nextPageId);
try {
long nextPageAddr = readLock(nextPageId, nextPage);
// In the current implementation the next page can't change when the current page is locked.
assert nextPageAddr != 0 : nextPageAddr;
try {
long pa = curPageAddr;
// Set to zero to avoid double unlocking in finalizer.
curPageAddr = 0;
readUnlock(curPageId, curPage, pa);
long p = curPage;
// Set to zero to avoid double release in finalizer.
curPage = 0;
releasePage(curPageId, p);
curPageId = nextPageId;
curPage = nextPage;
curPageAddr = nextPageAddr;
nextPage = 0;
nextPageAddr = 0;
} finally {
if (nextPageAddr != 0)
readUnlock(nextPageId, nextPage, nextPageAddr);
}
} finally {
if (nextPage != 0)
releasePage(nextPageId, nextPage);
}
}
} finally {
if (curPageAddr != 0)
readUnlock(curPageId, curPage, curPageAddr);
}
} finally {
if (curPage != 0)
releasePage(curPageId, curPage);
}
}
} catch (CorruptedDataStructureException e) {
throw e;
} catch (IgniteCheckedException e) {
throw new IgniteCheckedException("Runtime failure on first row lookup", e);
} catch (RuntimeException | AssertionError e) {
throw corruptedTreeException("Runtime failure on first row lookup", e, grpId, curPageId, nextPageId);
} finally {
checkDestroyed();
}
}
use of org.apache.ignite.internal.processors.cache.persistence.CorruptedDataStructureException in project ignite by apache.
the class BPlusTree method findOne.
/**
* @param row Lookup row for exact match.
* @param x Implementation specific argument, {@code null} always means that we need to return full detached data row.
* @return Found result or {@code null}.
* @throws IgniteCheckedException If failed.
*/
public final <R> R findOne(L row, TreeRowClosure<L, T> c, Object x) throws IgniteCheckedException {
checkDestroyed();
GetOne g = new GetOne(row, c, x, false);
try {
doFind(g);
return (R) g.row;
} catch (CorruptedDataStructureException e) {
throw e;
} catch (IgniteCheckedException e) {
throw new IgniteCheckedException("Runtime failure on lookup row: " + row, e);
} catch (RuntimeException | AssertionError e) {
throw corruptedTreeException("Runtime failure on lookup row: " + row, e, grpId, g.pageId);
} finally {
checkDestroyed();
}
}
use of org.apache.ignite.internal.processors.cache.persistence.CorruptedDataStructureException in project ignite by apache.
the class BPlusTree method doPut.
/**
* @param row New value.
* @param needOld {@code True} If need return old value.
* @return Old row.
* @throws IgniteCheckedException If failed.
*/
private T doPut(T row, boolean needOld) throws IgniteCheckedException {
checkDestroyed();
Put p = new Put(row, needOld);
try {
for (; ; ) {
// Go down with retries.
p.init();
Result res = putDown(p, p.rootId, 0L, p.rootLvl);
switch(res) {
case RETRY:
case RETRY_ROOT:
checkInterrupted();
continue;
case FOUND:
// We may need to perform an inner replace on the upper level.
if (!p.isFinished()) {
res = p.finishTail();
// If not found, then the root split has happened and operation should be retried from the actual root.
if (res == RETRY || res == NOT_FOUND) {
p.releaseTail();
assert p.checkTailLevel(getRootLevel()) : "tail=" + p.tail + ", res=" + res;
checkInterrupted();
continue;
}
}
return p.oldRow;
default:
throw new IllegalStateException("Result: " + res);
}
}
} catch (CorruptedDataStructureException e) {
throw e;
} catch (IgniteCheckedException e) {
throw new IgniteCheckedException("Runtime failure on row: " + row, e);
} catch (RuntimeException | AssertionError e) {
throw corruptedTreeException("Runtime failure on row: " + row, e, grpId, p.pageId);
} finally {
checkDestroyed();
}
}
Aggregations