use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class AlluxioBlockStore method getInStream.
/**
* {@link #getInStream(long, InStreamOptions, Map)}.
*
* @param info the block info
* @param options the options associated with the read request
* @param failedWorkers the map of workers address to most recent failure time
* @return a stream which reads from the beginning of the block
*/
public BlockInStream getInStream(BlockInfo info, InStreamOptions options, Map<WorkerNetAddress, Long> failedWorkers) throws IOException {
Pair<WorkerNetAddress, BlockInStreamSource> dataSourceAndType = getDataSourceAndType(info, options.getStatus(), options.getUfsReadLocationPolicy(), failedWorkers);
WorkerNetAddress dataSource = dataSourceAndType.getFirst();
BlockInStreamSource dataSourceType = dataSourceAndType.getSecond();
try {
return BlockInStream.create(mContext, info, dataSource, dataSourceType, options);
} catch (UnavailableException e) {
// When BlockInStream created failed, it will update the passed-in failedWorkers
// to attempt to avoid reading from this failed worker in next try.
failedWorkers.put(dataSource, System.currentTimeMillis());
throw e;
}
}
use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class AlluxioBlockStore method getDataSourceAndType.
/**
* Gets the data source and type of data source of a block. This method is primarily responsible
* for determining the data source and type of data source. It takes a map of failed workers and
* their most recently failed time and tries to update it when BlockInStream created failed,
* attempting to avoid reading from a recently failed worker.
*
* @param info the info of the block to read
* @param status the URIStatus associated with the read request
* @param policy the policy determining the Alluxio worker location
* @param failedWorkers the map of workers address to most recent failure time
* @return the data source and type of data source of the block
*/
public Pair<WorkerNetAddress, BlockInStreamSource> getDataSourceAndType(BlockInfo info, URIStatus status, BlockLocationPolicy policy, Map<WorkerNetAddress, Long> failedWorkers) throws IOException {
List<BlockLocation> locations = info.getLocations();
List<BlockWorkerInfo> blockWorkerInfo = Collections.EMPTY_LIST;
// Initial target workers to read the block given the block locations.
Set<WorkerNetAddress> workerPool;
// Note that, it is possible that the blocks have been written as UFS blocks
if (status.isPersisted() || status.getPersistenceState().equals("TO_BE_PERSISTED")) {
blockWorkerInfo = mContext.getCachedWorkers();
if (blockWorkerInfo.isEmpty()) {
throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
}
workerPool = blockWorkerInfo.stream().map(BlockWorkerInfo::getNetAddress).collect(toSet());
} else {
if (locations.isEmpty()) {
blockWorkerInfo = mContext.getCachedWorkers();
if (blockWorkerInfo.isEmpty()) {
throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
}
throw new UnavailableException(ExceptionMessage.BLOCK_UNAVAILABLE.getMessage(info.getBlockId()));
}
workerPool = locations.stream().map(BlockLocation::getWorkerAddress).collect(toSet());
}
// Workers to read the block, after considering failed workers.
Set<WorkerNetAddress> workers = handleFailedWorkers(workerPool, failedWorkers);
// TODO(calvin, jianjian): Consider containing these two variables in one object
BlockInStreamSource dataSourceType = null;
WorkerNetAddress dataSource = null;
locations = locations.stream().filter(location -> workers.contains(location.getWorkerAddress())).collect(toList());
// First try to read data from Alluxio
if (!locations.isEmpty()) {
// TODO(calvin): Get location via a policy
List<WorkerNetAddress> tieredLocations = locations.stream().map(location -> location.getWorkerAddress()).collect(toList());
Collections.shuffle(tieredLocations);
Optional<Pair<WorkerNetAddress, Boolean>> nearest = BlockLocationUtils.nearest(mTieredIdentity, tieredLocations, mContext.getClusterConf());
if (nearest.isPresent()) {
dataSource = nearest.get().getFirst();
dataSourceType = nearest.get().getSecond() ? mContext.hasProcessLocalWorker() ? BlockInStreamSource.PROCESS_LOCAL : BlockInStreamSource.NODE_LOCAL : BlockInStreamSource.REMOTE;
}
}
// Can't get data from Alluxio, get it from the UFS instead
if (dataSource == null) {
dataSourceType = BlockInStreamSource.UFS;
Preconditions.checkNotNull(policy, PreconditionMessage.UFS_READ_LOCATION_POLICY_UNSPECIFIED);
blockWorkerInfo = blockWorkerInfo.stream().filter(workerInfo -> workers.contains(workerInfo.getNetAddress())).collect(toList());
GetWorkerOptions getWorkerOptions = GetWorkerOptions.defaults().setBlockInfo(new BlockInfo().setBlockId(info.getBlockId()).setLength(info.getLength()).setLocations(locations)).setBlockWorkerInfos(blockWorkerInfo);
dataSource = policy.getWorker(getWorkerOptions);
if (dataSource != null) {
if (mContext.hasProcessLocalWorker() && dataSource.equals(mContext.getNodeLocalWorker())) {
dataSourceType = BlockInStreamSource.PROCESS_LOCAL;
LOG.debug("Create BlockInStream to read data from UFS through process local worker {}", dataSource);
} else {
LOG.debug("Create BlockInStream to read data from UFS through worker {} " + "(client embedded in local worker process: {}," + "client co-located with worker in different processes: {}, " + "local worker address: {})", dataSource, mContext.hasProcessLocalWorker(), mContext.hasNodeLocalWorker(), mContext.hasNodeLocalWorker() ? mContext.getNodeLocalWorker() : "N/A");
}
}
}
if (dataSource == null) {
throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
}
return new Pair<>(dataSource, dataSourceType);
}
use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class AbstractClient method connect.
/**
* Connects with the remote.
*/
@Override
public synchronized void connect() throws AlluxioStatusException {
if (mConnected) {
return;
}
disconnect();
Preconditions.checkState(!mClosed, "Client is closed, will not try to connect.");
IOException lastConnectFailure = null;
RetryPolicy retryPolicy = mRetryPolicySupplier.get();
while (retryPolicy.attempt()) {
if (mClosed) {
throw new FailedPreconditionException("Failed to connect: client has been closed");
}
// failover).
try {
mAddress = getAddress();
} catch (UnavailableException e) {
LOG.debug("Failed to determine {} rpc address ({}): {}", getServiceName(), retryPolicy.getAttemptCount(), e.toString());
continue;
}
try {
beforeConnect();
LOG.debug("Alluxio client (version {}) is trying to connect with {} @ {}", RuntimeConstants.VERSION, getServiceName(), mAddress);
mChannel = GrpcChannelBuilder.newBuilder(GrpcServerAddress.create(mAddress), mContext.getClusterConf()).setSubject(mContext.getSubject()).setClientType(getServiceName()).build();
// Create stub for version service on host
mVersionService = ServiceVersionClientServiceGrpc.newBlockingStub(mChannel);
mConnected = true;
afterConnect();
checkVersion(getServiceVersion());
LOG.debug("Alluxio client (version {}) is connected with {} @ {}", RuntimeConstants.VERSION, getServiceName(), mAddress);
return;
} catch (IOException e) {
LOG.debug("Failed to connect ({}) with {} @ {}", retryPolicy.getAttemptCount(), getServiceName(), mAddress, e);
lastConnectFailure = e;
if (e instanceof UnauthenticatedException) {
// If there has been a failure in opening GrpcChannel, it's possible because
// the authentication credential has expired. Relogin.
mContext.getUserState().relogin();
}
if (e instanceof NotFoundException) {
// service is not found in the server, skip retry
break;
}
}
}
if (mChannel != null) {
mChannel.shutdown();
}
if (mAddress == null) {
throw new UnavailableException(String.format("Failed to determine address for %s after %s attempts", getServiceName(), retryPolicy.getAttemptCount()));
}
/*
* Throw as-is if {@link UnauthenticatedException} occurred.
*/
if (lastConnectFailure instanceof UnauthenticatedException) {
throw (AlluxioStatusException) lastConnectFailure;
}
if (lastConnectFailure instanceof NotFoundException) {
throw new NotFoundException(lastConnectFailure.getMessage(), new ServiceNotFoundException(lastConnectFailure.getMessage(), lastConnectFailure));
}
throw new UnavailableException(String.format("Failed to connect to master (%s) after %s attempts." + "Please check if Alluxio master is currently running on \"%s\". Service=\"%s\"", mAddress, retryPolicy.getAttemptCount(), mAddress, getServiceName()), lastConnectFailure);
}
use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class AlluxioMasterProcessTest method startMastersThrowsUnavailableException.
@Test
public void startMastersThrowsUnavailableException() throws InterruptedException, IOException {
ControllablePrimarySelector primarySelector = new ControllablePrimarySelector();
primarySelector.setState(PrimarySelector.State.PRIMARY);
ServerConfiguration.set(PropertyKey.MASTER_JOURNAL_EXIT_ON_DEMOTION, true);
FaultTolerantAlluxioMasterProcess master = new FaultTolerantAlluxioMasterProcess(new NoopJournalSystem(), primarySelector);
FaultTolerantAlluxioMasterProcess spy = PowerMockito.spy(master);
PowerMockito.doAnswer(invocation -> {
throw new UnavailableException("unavailable");
}).when(spy).startMasters(true);
AtomicBoolean success = new AtomicBoolean(true);
Thread t = new Thread(() -> {
try {
spy.start();
} catch (UnavailableException ue) {
success.set(false);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
t.start();
// in ms
final int WAIT_TIME_TO_THROW_EXC = 500;
t.join(WAIT_TIME_TO_THROW_EXC);
t.interrupt();
Assert.assertTrue(success.get());
}
use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.
the class RpcContextTest method throwTwoUnavailableExceptions.
@Test
public void throwTwoUnavailableExceptions() throws Throwable {
Exception bdcException = new UnavailableException("block deletion context exception");
Exception jcException = new UnavailableException("journal context exception");
doThrow(bdcException).when(mMockBDC).close();
doThrow(jcException).when(mMockJC).close();
try {
mRpcContext.close();
fail("Expected an exception to be thrown");
} catch (UnavailableException e) {
assertEquals(jcException, e);
// journal context is closed first, so the block deletion context exception should be
// suppressed.
assertEquals(bdcException, e.getSuppressed()[0]);
}
}
Aggregations