use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.
the class AlluxioBlockStore method getDataSourceAndType.
/**
* Gets the data source and type of data source of a block. This method is primarily responsible
* for determining the data source and type of data source. It takes a map of failed workers and
* their most recently failed time and tries to update it when BlockInStream created failed,
* attempting to avoid reading from a recently failed worker.
*
* @param info the info of the block to read
* @param status the URIStatus associated with the read request
* @param policy the policy determining the Alluxio worker location
* @param failedWorkers the map of workers address to most recent failure time
* @return the data source and type of data source of the block
*/
public Pair<WorkerNetAddress, BlockInStreamSource> getDataSourceAndType(BlockInfo info, URIStatus status, BlockLocationPolicy policy, Map<WorkerNetAddress, Long> failedWorkers) throws IOException {
List<BlockLocation> locations = info.getLocations();
List<BlockWorkerInfo> blockWorkerInfo = Collections.EMPTY_LIST;
// Initial target workers to read the block given the block locations.
Set<WorkerNetAddress> workerPool;
// Note that, it is possible that the blocks have been written as UFS blocks
if (status.isPersisted() || status.getPersistenceState().equals("TO_BE_PERSISTED")) {
blockWorkerInfo = mContext.getCachedWorkers();
if (blockWorkerInfo.isEmpty()) {
throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
}
workerPool = blockWorkerInfo.stream().map(BlockWorkerInfo::getNetAddress).collect(toSet());
} else {
if (locations.isEmpty()) {
blockWorkerInfo = mContext.getCachedWorkers();
if (blockWorkerInfo.isEmpty()) {
throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
}
throw new UnavailableException(ExceptionMessage.BLOCK_UNAVAILABLE.getMessage(info.getBlockId()));
}
workerPool = locations.stream().map(BlockLocation::getWorkerAddress).collect(toSet());
}
// Workers to read the block, after considering failed workers.
Set<WorkerNetAddress> workers = handleFailedWorkers(workerPool, failedWorkers);
// TODO(calvin, jianjian): Consider containing these two variables in one object
BlockInStreamSource dataSourceType = null;
WorkerNetAddress dataSource = null;
locations = locations.stream().filter(location -> workers.contains(location.getWorkerAddress())).collect(toList());
// First try to read data from Alluxio
if (!locations.isEmpty()) {
// TODO(calvin): Get location via a policy
List<WorkerNetAddress> tieredLocations = locations.stream().map(location -> location.getWorkerAddress()).collect(toList());
Collections.shuffle(tieredLocations);
Optional<Pair<WorkerNetAddress, Boolean>> nearest = BlockLocationUtils.nearest(mTieredIdentity, tieredLocations, mContext.getClusterConf());
if (nearest.isPresent()) {
dataSource = nearest.get().getFirst();
dataSourceType = nearest.get().getSecond() ? mContext.hasProcessLocalWorker() ? BlockInStreamSource.PROCESS_LOCAL : BlockInStreamSource.NODE_LOCAL : BlockInStreamSource.REMOTE;
}
}
// Can't get data from Alluxio, get it from the UFS instead
if (dataSource == null) {
dataSourceType = BlockInStreamSource.UFS;
Preconditions.checkNotNull(policy, PreconditionMessage.UFS_READ_LOCATION_POLICY_UNSPECIFIED);
blockWorkerInfo = blockWorkerInfo.stream().filter(workerInfo -> workers.contains(workerInfo.getNetAddress())).collect(toList());
GetWorkerOptions getWorkerOptions = GetWorkerOptions.defaults().setBlockInfo(new BlockInfo().setBlockId(info.getBlockId()).setLength(info.getLength()).setLocations(locations)).setBlockWorkerInfos(blockWorkerInfo);
dataSource = policy.getWorker(getWorkerOptions);
if (dataSource != null) {
if (mContext.hasProcessLocalWorker() && dataSource.equals(mContext.getNodeLocalWorker())) {
dataSourceType = BlockInStreamSource.PROCESS_LOCAL;
LOG.debug("Create BlockInStream to read data from UFS through process local worker {}", dataSource);
} else {
LOG.debug("Create BlockInStream to read data from UFS through worker {} " + "(client embedded in local worker process: {}," + "client co-located with worker in different processes: {}, " + "local worker address: {})", dataSource, mContext.hasProcessLocalWorker(), mContext.hasNodeLocalWorker(), mContext.hasNodeLocalWorker() ? mContext.getNodeLocalWorker() : "N/A");
}
}
}
if (dataSource == null) {
throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
}
return new Pair<>(dataSource, dataSourceType);
}
use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.
the class RemoteReadIntegrationTest method readTest4.
/**
* Tests the single byte read API from a remote location when the data is in an Alluxio worker.
*/
@Test
public void readTest4() throws Exception {
String uniqPath = PathUtils.uniqPath();
for (int k = MIN_LEN + DELTA; k <= MAX_LEN; k += DELTA) {
AlluxioURI uri = new AlluxioURI(uniqPath + "/file_" + k);
FileSystemTestUtils.createByteFile(mFileSystem, uri, mWriteAlluxio, k);
URIStatus status = mFileSystem.getStatus(uri);
InStreamOptions options = new InStreamOptions(status, ServerConfiguration.global());
long blockId = status.getBlockIds().get(0);
AlluxioBlockStore blockStore = AlluxioBlockStore.create(FileSystemContext.create(ServerConfiguration.global()));
BlockInfo info = blockStore.getInfo(blockId);
WorkerNetAddress workerAddr = info.getLocations().get(0).getWorkerAddress();
BlockInStream is = BlockInStream.create(mFsContext, options.getBlockInfo(blockId), workerAddr, BlockInStreamSource.REMOTE, options);
byte[] ret = new byte[k];
int value = is.read();
int cnt = 0;
while (value != -1) {
Assert.assertTrue(value >= 0);
Assert.assertTrue(value < 256);
ret[cnt++] = (byte) value;
value = is.read();
}
Assert.assertEquals(cnt, k);
Assert.assertTrue(BufferUtils.equalIncreasingByteArray(k, ret));
is.close();
FileSystemUtils.waitForAlluxioPercentage(mFileSystem, uri, 100);
}
}
use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.
the class RemoteReadIntegrationTest method readTest6.
/**
* Tests the batch read API with offset and length from a remote location when the data is in an
* Alluxio worker.
*/
@Test
public void readTest6() throws Exception {
String uniqPath = PathUtils.uniqPath();
for (int k = MIN_LEN + DELTA; k <= MAX_LEN; k += DELTA) {
AlluxioURI uri = new AlluxioURI(uniqPath + "/file_" + k);
FileSystemTestUtils.createByteFile(mFileSystem, uri, mWriteAlluxio, k);
URIStatus status = mFileSystem.getStatus(uri);
InStreamOptions options = new InStreamOptions(status, ServerConfiguration.global());
long blockId = status.getBlockIds().get(0);
BlockInfo info = AlluxioBlockStore.create(FileSystemContext.create(ServerConfiguration.global())).getInfo(blockId);
WorkerNetAddress workerAddr = info.getLocations().get(0).getWorkerAddress();
BlockInStream is = BlockInStream.create(mFsContext, options.getBlockInfo(blockId), workerAddr, BlockInStreamSource.REMOTE, options);
byte[] ret = new byte[k / 2];
int read = 0;
while (read < k / 2) {
read += is.read(ret, read, k / 2 - read);
}
Assert.assertTrue(BufferUtils.equalIncreasingByteArray(read, ret));
is.close();
FileSystemUtils.waitForAlluxioPercentage(mFileSystem, uri, 100);
}
}
use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.
the class RemoteReadIntegrationTest method remoteReadLock.
/**
* Tests remote reads lock blocks correctly.
*/
@Test
public void remoteReadLock() throws Exception {
String uniqPath = PathUtils.uniqPath();
for (int k = MIN_LEN + DELTA; k <= MAX_LEN; k += DELTA) {
AlluxioURI uri = new AlluxioURI(uniqPath + "/file_" + k);
FileSystemTestUtils.createByteFile(mFileSystem, uri, mWriteAlluxio, k);
URIStatus status = mFileSystem.getStatus(uri);
InStreamOptions options = new InStreamOptions(status, ServerConfiguration.global());
long blockId = status.getBlockIds().get(0);
BlockInfo info = AlluxioBlockStore.create(FileSystemContext.create(ServerConfiguration.global())).getInfo(blockId);
WorkerNetAddress workerAddr = info.getLocations().get(0).getWorkerAddress();
BlockInStream is = BlockInStream.create(mFsContext, options.getBlockInfo(blockId), workerAddr, BlockInStreamSource.REMOTE, options);
Assert.assertEquals(0, is.read());
mFileSystem.delete(uri);
// The file has been deleted.
Assert.assertFalse(mFileSystem.exists(uri));
// Look! We can still read the deleted file since we have a lock!
byte[] ret = new byte[k / 2];
Assert.assertTrue(is.read(ret, 0, k / 2) > 0);
is.close();
Assert.assertFalse(mFileSystem.exists(uri));
// Try to create an in stream again, and it should fail.
BlockInStream is2 = null;
try {
is2 = BlockInStream.create(mFsContext, options.getBlockInfo(blockId), workerAddr, BlockInStreamSource.REMOTE, options);
} catch (NotFoundException e) {
// Expected since the file has been deleted.
} finally {
if (is2 != null) {
is2.close();
}
}
}
}
use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.
the class AlluxioFileInStreamTest method blockInStreamOutOfSync.
/**
* Tests that when the underlying blocks are inconsistent with the metadata in terms of block
* length, an exception is thrown rather than client hanging indefinitely. This case may happen if
* the file in Alluxio and UFS is out of sync.
*/
@Test
public void blockInStreamOutOfSync() throws Exception {
when(mBlockStore.getInStream(any(BlockInfo.class), any(InStreamOptions.class), any())).thenAnswer(new Answer<BlockInStream>() {
@Override
public BlockInStream answer(InvocationOnMock invocation) throws Throwable {
return new TestBlockInStream(new byte[1], 0, BLOCK_LENGTH, false, mBlockSource);
}
});
byte[] buffer = new byte[(int) BLOCK_LENGTH];
try {
mTestStream.read(buffer, 0, (int) BLOCK_LENGTH);
fail("BlockInStream is inconsistent, an Exception is expected");
} catch (IllegalStateException e) {
// expect an exception to throw
}
}
Aggregations