use of alluxio.retry.TimeoutRetry in project alluxio by Alluxio.
the class RocksStore method createDb.
private void createDb() throws RocksDBException {
List<ColumnFamilyDescriptor> cfDescriptors = new ArrayList<>();
cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY));
cfDescriptors.addAll(mColumnFamilyDescriptors);
// a list which will hold the handles for the column families once the db is opened
List<ColumnFamilyHandle> columns = new ArrayList<>();
final TimeoutRetry retryPolicy = new TimeoutRetry(ROCKS_OPEN_RETRY_TIMEOUT, 100);
RocksDBException lastException = null;
while (retryPolicy.attempt()) {
try {
mDb = RocksDB.open(mDbOpts, mDbPath, cfDescriptors, columns);
break;
} catch (RocksDBException e) {
// sometimes the previous terminated process's lock may not have been fully cleared yet
// retry until timeout to make sure that isn't the case
lastException = e;
}
}
if (mDb == null && lastException != null) {
throw lastException;
}
mCheckpoint = Checkpoint.create(mDb);
for (int i = 0; i < columns.size() - 1; i++) {
// Skip the default column.
mColumnHandles.get(i).set(columns.get(i + 1));
}
LOG.info("Opened rocks database under path {}", mDbPath);
}
use of alluxio.retry.TimeoutRetry in project alluxio by Alluxio.
the class AbstractMaster method waitForJournalFlush.
/**
* Waits for the flush counter to be flushed to the journal. If the counter is
* {@link #INVALID_FLUSH_COUNTER}, this is a noop.
*
* @param journalContext the journal context
*/
private void waitForJournalFlush(JournalContext journalContext) {
if (journalContext.getFlushCounter() == INVALID_FLUSH_COUNTER) {
// Check this before the precondition.
return;
}
Preconditions.checkNotNull(mAsyncJournalWriter, PreconditionMessage.ASYNC_JOURNAL_WRITER_NULL);
RetryPolicy retry = new TimeoutRetry(JOURNAL_FLUSH_RETRY_TIMEOUT_MS, Constants.SECOND_MS);
int attempts = 0;
while (retry.attemptRetry()) {
try {
attempts++;
mAsyncJournalWriter.flush(journalContext.getFlushCounter());
return;
} catch (IOException e) {
LOG.warn("Journal flush failed. retrying...", e);
}
}
LOG.error("Journal flush failed after {} attempts. Terminating process to prevent inconsistency.", attempts);
if (Configuration.getBoolean(PropertyKey.TEST_MODE)) {
throw new RuntimeException("Journal flush failed after " + attempts + " attempts. Terminating process to prevent inconsistency.");
}
System.exit(-1);
}
use of alluxio.retry.TimeoutRetry in project alluxio by Alluxio.
the class RetryHandlingBlockWorkerClient method lockUfsBlock.
@Override
public LockBlockResource lockUfsBlock(final long blockId, final LockBlockOptions options) throws IOException, AlluxioException {
int retryInterval = Constants.SECOND_MS;
RetryPolicy retryPolicy = new TimeoutRetry(Configuration.getLong(PropertyKey.USER_UFS_BLOCK_OPEN_TIMEOUT_MS), retryInterval);
do {
LockBlockResource resource = lockBlock(blockId, options);
if (resource.getResult().getLockBlockStatus().ufsTokenNotAcquired()) {
LOG.debug("Failed to acquire a UFS read token because of contention for block {} with " + "LockBlockOptions {}", blockId, options);
} else {
return resource;
}
} while (retryPolicy.attemptRetry());
throw new UfsBlockAccessTokenUnavailableException(ExceptionMessage.UFS_BLOCK_ACCESS_TOKEN_UNAVAILABLE, blockId, options.getUfsPath());
}
use of alluxio.retry.TimeoutRetry in project alluxio by Alluxio.
the class DefaultBlockWorker method createBlockReader.
@Override
public BlockReader createBlockReader(BlockReadRequest request) throws BlockDoesNotExistException, IOException {
long sessionId = request.getSessionId();
long blockId = request.getId();
RetryPolicy retryPolicy = new TimeoutRetry(UFS_BLOCK_OPEN_TIMEOUT_MS, Constants.SECOND_MS);
while (retryPolicy.attempt()) {
BlockReader reader = createLocalBlockReader(sessionId, blockId, request.getStart());
if (reader != null) {
Metrics.WORKER_ACTIVE_CLIENTS.inc();
return reader;
}
boolean checkUfs = request.isPersisted() || (request.getOpenUfsBlockOptions() != null && request.getOpenUfsBlockOptions().hasBlockInUfsTier() && request.getOpenUfsBlockOptions().getBlockInUfsTier());
if (!checkUfs) {
throw new BlockDoesNotExistException(ExceptionMessage.NO_BLOCK_ID_FOUND, blockId);
}
// When the block does not exist in Alluxio but exists in UFS, try to open the UFS block.
try {
Metrics.WORKER_ACTIVE_CLIENTS.inc();
return createUfsBlockReader(request.getSessionId(), request.getId(), request.getStart(), request.isPositionShort(), request.getOpenUfsBlockOptions());
} catch (Exception e) {
throw new UnavailableException(String.format("Failed to read block ID=%s from tiered storage and UFS tier: %s", request.getId(), e.toString()));
}
}
throw new UnavailableException(ExceptionMessage.UFS_BLOCK_ACCESS_TOKEN_UNAVAILABLE.getMessage(request.getId(), request.getOpenUfsBlockOptions().getUfsPath()));
}
use of alluxio.retry.TimeoutRetry in project alluxio by Alluxio.
the class MasterJournalContext method waitForJournalFlush.
/**
* Waits for the flush counter to be flushed to the journal. If the counter is
* {@link #INVALID_FLUSH_COUNTER}, this is a noop.
*/
private void waitForJournalFlush() throws UnavailableException {
if (mFlushCounter == INVALID_FLUSH_COUNTER) {
// Check this before the precondition.
return;
}
RetryPolicy retry = new TimeoutRetry(FLUSH_RETRY_TIMEOUT_MS, FLUSH_RETRY_INTERVAL_MS);
while (retry.attempt()) {
try {
mAsyncJournalWriter.flush(mFlushCounter);
return;
} catch (NotLeaderException | JournalClosedException e) {
throw new UnavailableException(String.format("Failed to complete request: %s", e.getMessage()), e);
} catch (AlluxioStatusException e) {
// written already
if (e.getStatus().equals(Status.CANCELLED)) {
LOG.warn("Journal flush interrupted because the RPC was cancelled. ", e);
} else {
LOG.warn("Journal flush failed. retrying...", e);
}
} catch (IOException e) {
if (e instanceof AlluxioStatusException && ((AlluxioStatusException) e).getStatusCode() == Status.Code.CANCELLED) {
throw new UnavailableException(String.format("Failed to complete request: %s", e.getMessage()), e);
}
LOG.warn("Journal flush failed. retrying...", e);
} catch (Throwable e) {
ProcessUtils.fatalError(LOG, e, "Journal flush failed");
}
}
ProcessUtils.fatalError(LOG, "Journal flush failed after %d attempts", retry.getAttemptCount());
}
Aggregations