use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class AlluxioFileInStreamTest method positionedReadRetry.
@Test
public void positionedReadRetry() throws Exception {
TestBlockInStream workingStream = mInStreams.get(0);
TestBlockInStream brokenStream = mock(TestBlockInStream.class);
when(mBlockStore.getInStream(eq(0L), any(InStreamOptions.class), any())).thenReturn(brokenStream).thenReturn(workingStream);
when(brokenStream.positionedRead(anyLong(), any(byte[].class), anyInt(), anyInt())).thenThrow(new UnavailableException("test exception"));
byte[] b = new byte[(int) BLOCK_LENGTH * 2];
mTestStream.positionedRead(BLOCK_LENGTH / 2, b, 0, b.length);
doReturn(0).when(brokenStream).positionedRead(anyLong(), any(byte[].class), anyInt(), anyInt());
verify(brokenStream, times(1)).positionedRead(anyLong(), any(byte[].class), anyInt(), anyInt());
assertArrayEquals(BufferUtils.getIncreasingByteArray((int) BLOCK_LENGTH / 2, (int) BLOCK_LENGTH * 2), b);
}
use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class RaftJournalSystem method getQuorumServerInfoList.
/**
* Used to get information of internal RAFT quorum.
*
* @return list of information for participating servers in RAFT quorum
*/
public synchronized List<QuorumServerInfo> getQuorumServerInfoList() throws IOException {
List<QuorumServerInfo> quorumMemberStateList = new LinkedList<>();
GroupInfoReply groupInfo = getGroupInfo();
if (groupInfo == null) {
throw new UnavailableException("Cannot get raft group info");
}
if (groupInfo.getException() != null) {
throw groupInfo.getException();
}
RaftProtos.RoleInfoProto roleInfo = groupInfo.getRoleInfoProto();
if (roleInfo == null) {
throw new UnavailableException("Cannot get server role info");
}
RaftProtos.LeaderInfoProto leaderInfo = roleInfo.getLeaderInfo();
if (leaderInfo == null) {
throw new UnavailableException("Cannot get server leader info");
}
for (RaftProtos.ServerRpcProto member : leaderInfo.getFollowerInfoList()) {
HostAndPort hp = HostAndPort.fromString(member.getId().getAddress());
NetAddress memberAddress = NetAddress.newBuilder().setHost(hp.getHost()).setRpcPort(hp.getPort()).build();
quorumMemberStateList.add(QuorumServerInfo.newBuilder().setIsLeader(false).setPriority(member.getId().getPriority()).setServerAddress(memberAddress).setServerState(member.getLastRpcElapsedTimeMs() > mConf.getMaxElectionTimeoutMs() ? QuorumServerState.UNAVAILABLE : QuorumServerState.AVAILABLE).build());
}
InetSocketAddress localAddress = mConf.getLocalAddress();
NetAddress self = NetAddress.newBuilder().setHost(localAddress.getHostString()).setRpcPort(localAddress.getPort()).build();
quorumMemberStateList.add(QuorumServerInfo.newBuilder().setIsLeader(true).setPriority(roleInfo.getSelf().getPriority()).setServerAddress(self).setServerState(QuorumServerState.AVAILABLE).build());
quorumMemberStateList.sort(Comparator.comparing(info -> info.getServerAddress().toString()));
return quorumMemberStateList;
}
use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class MasterJournalContext method waitForJournalFlush.
/**
* Waits for the flush counter to be flushed to the journal. If the counter is
* {@link #INVALID_FLUSH_COUNTER}, this is a noop.
*/
private void waitForJournalFlush() throws UnavailableException {
if (mFlushCounter == INVALID_FLUSH_COUNTER) {
// Check this before the precondition.
return;
}
RetryPolicy retry = new TimeoutRetry(FLUSH_RETRY_TIMEOUT_MS, FLUSH_RETRY_INTERVAL_MS);
while (retry.attempt()) {
try {
mAsyncJournalWriter.flush(mFlushCounter);
return;
} catch (NotLeaderException | JournalClosedException e) {
throw new UnavailableException(String.format("Failed to complete request: %s", e.getMessage()), e);
} catch (AlluxioStatusException e) {
// written already
if (e.getStatus().equals(Status.CANCELLED)) {
LOG.warn("Journal flush interrupted because the RPC was cancelled. ", e);
} else {
LOG.warn("Journal flush failed. retrying...", e);
}
} catch (IOException e) {
if (e instanceof AlluxioStatusException && ((AlluxioStatusException) e).getStatusCode() == Status.Code.CANCELLED) {
throw new UnavailableException(String.format("Failed to complete request: %s", e.getMessage()), e);
}
LOG.warn("Journal flush failed. retrying...", e);
} catch (Throwable e) {
ProcessUtils.fatalError(LOG, e, "Journal flush failed");
}
}
ProcessUtils.fatalError(LOG, "Journal flush failed after %d attempts", retry.getAttemptCount());
}
use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class ReplicationChecker method check.
private Set<Long> check(Set<Long> inodes, ReplicationHandler handler, Mode mode) throws InterruptedException {
Set<Long> processedFileIds = new HashSet<>();
for (long inodeId : inodes) {
if (mActiveJobToInodeID.size() >= mMaxActiveJobs) {
return processedFileIds;
}
if (mActiveJobToInodeID.containsValue(inodeId)) {
continue;
}
Set<Triple<AlluxioURI, Long, Integer>> requests = new HashSet<>();
// Throw if interrupted.
if (Thread.interrupted()) {
throw new InterruptedException("ReplicationChecker interrupted.");
}
// locking the entire path but just the inode file since this access is read-only.
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(inodeId, LockPattern.READ)) {
InodeFile file = inodePath.getInodeFile();
for (long blockId : file.getBlockIds()) {
BlockInfo blockInfo = null;
try {
blockInfo = mBlockMaster.getBlockInfo(blockId);
} catch (BlockInfoException e) {
// Cannot find this block in Alluxio from BlockMaster, possibly persisted in UFS
} catch (UnavailableException e) {
// The block master is not available, wait for the next heartbeat
LOG.warn("The block master is not available: {}", e.toString());
return processedFileIds;
}
int currentReplicas = (blockInfo == null) ? 0 : blockInfo.getLocations().size();
switch(mode) {
case EVICT:
int maxReplicas = file.getReplicationMax();
if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > maxReplicas) {
maxReplicas = file.getReplicationDurable();
}
if (currentReplicas > maxReplicas) {
requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, currentReplicas - maxReplicas));
}
break;
case REPLICATE:
int minReplicas = file.getReplicationMin();
if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > minReplicas) {
minReplicas = file.getReplicationDurable();
}
if (currentReplicas < minReplicas) {
// if this file is not persisted and block master thinks it is lost, no effort made
if (!file.isPersisted() && mBlockMaster.isBlockLost(blockId)) {
continue;
}
requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, minReplicas - currentReplicas));
}
break;
default:
LOG.warn("Unexpected replication mode {}.", mode);
}
}
} catch (FileDoesNotExistException e) {
LOG.warn("Failed to check replication level for inode id {} : {}", inodeId, e.toString());
}
for (Triple<AlluxioURI, Long, Integer> entry : requests) {
AlluxioURI uri = entry.getLeft();
long blockId = entry.getMiddle();
int numReplicas = entry.getRight();
try {
long jobId;
switch(mode) {
case EVICT:
jobId = handler.evict(uri, blockId, numReplicas);
break;
case REPLICATE:
jobId = handler.replicate(uri, blockId, numReplicas);
break;
default:
throw new RuntimeException(String.format("Unexpected replication mode {}.", mode));
}
processedFileIds.add(inodeId);
mActiveJobToInodeID.put(jobId, inodeId);
} catch (JobDoesNotExistException | ResourceExhaustedException e) {
LOG.warn("The job service is busy, will retry later. {}", e.toString());
return processedFileIds;
} catch (UnavailableException e) {
LOG.warn("Unable to complete the replication check: {}, will retry later.", e.toString());
return processedFileIds;
} catch (Exception e) {
SAMPLING_LOG.warn("Unexpected exception encountered when starting a {} job (uri={}," + " block ID={}, num replicas={}) : {}", mode, uri, blockId, numReplicas, e.toString());
LOG.debug("Job service unexpected exception: ", e);
}
}
}
return processedFileIds;
}
use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class ReplicationChecker method checkMisreplicated.
private void checkMisreplicated(Set<Long> inodes, ReplicationHandler handler) throws InterruptedException {
for (long inodeId : inodes) {
if (mActiveJobToInodeID.size() >= mMaxActiveJobs) {
return;
}
if (mActiveJobToInodeID.containsValue(inodeId)) {
continue;
}
// Throw if interrupted.
if (Thread.interrupted()) {
throw new InterruptedException("ReplicationChecker interrupted.");
}
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(inodeId, LockPattern.READ)) {
InodeFile file = inodePath.getInodeFile();
for (long blockId : file.getBlockIds()) {
BlockInfo blockInfo = null;
try {
blockInfo = mBlockMaster.getBlockInfo(blockId);
} catch (BlockInfoException e) {
// Cannot find this block in Alluxio from BlockMaster, possibly persisted in UFS
} catch (UnavailableException e) {
// The block master is not available, wait for the next heartbeat
LOG.warn("The block master is not available: {}", e.toString());
return;
}
if (blockInfo == null) {
// no block info available, we simply log and return;
LOG.warn("Block info is null");
return;
}
for (Map.Entry<String, String> entry : findMisplacedBlock(file, blockInfo).entrySet()) {
try {
final long jobId = handler.migrate(inodePath.getUri(), blockId, entry.getKey(), entry.getValue());
mActiveJobToInodeID.put(jobId, inodeId);
} catch (Exception e) {
LOG.warn("Unexpected exception encountered when starting a migration job (uri={}," + " block ID={}, workerHost= {}) : {}", inodePath.getUri(), blockId, entry.getKey(), e.toString());
LOG.debug("Exception: ", e);
}
}
}
} catch (FileDoesNotExistException e) {
LOG.warn("Failed to check replication level for inode id {} : {}", inodeId, e.toString());
}
}
}
Aggregations