Search in sources :

Example 6 with BlockInStream

use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.

the class AlluxioBlockStore method getDataSourceAndType.

/**
 * Gets the data source and type of data source of a block. This method is primarily responsible
 * for determining the data source and type of data source. It takes a map of failed workers and
 * their most recently failed time and tries to update it when BlockInStream created failed,
 * attempting to avoid reading from a recently failed worker.
 *
 * @param info the info of the block to read
 * @param status the URIStatus associated with the read request
 * @param policy the policy determining the Alluxio worker location
 * @param failedWorkers the map of workers address to most recent failure time
 * @return the data source and type of data source of the block
 */
public Pair<WorkerNetAddress, BlockInStreamSource> getDataSourceAndType(BlockInfo info, URIStatus status, BlockLocationPolicy policy, Map<WorkerNetAddress, Long> failedWorkers) throws IOException {
    List<BlockLocation> locations = info.getLocations();
    List<BlockWorkerInfo> blockWorkerInfo = Collections.EMPTY_LIST;
    // Initial target workers to read the block given the block locations.
    Set<WorkerNetAddress> workerPool;
    // Note that, it is possible that the blocks have been written as UFS blocks
    if (status.isPersisted() || status.getPersistenceState().equals("TO_BE_PERSISTED")) {
        blockWorkerInfo = mContext.getCachedWorkers();
        if (blockWorkerInfo.isEmpty()) {
            throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
        }
        workerPool = blockWorkerInfo.stream().map(BlockWorkerInfo::getNetAddress).collect(toSet());
    } else {
        if (locations.isEmpty()) {
            blockWorkerInfo = mContext.getCachedWorkers();
            if (blockWorkerInfo.isEmpty()) {
                throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
            }
            throw new UnavailableException(ExceptionMessage.BLOCK_UNAVAILABLE.getMessage(info.getBlockId()));
        }
        workerPool = locations.stream().map(BlockLocation::getWorkerAddress).collect(toSet());
    }
    // Workers to read the block, after considering failed workers.
    Set<WorkerNetAddress> workers = handleFailedWorkers(workerPool, failedWorkers);
    // TODO(calvin, jianjian): Consider containing these two variables in one object
    BlockInStreamSource dataSourceType = null;
    WorkerNetAddress dataSource = null;
    locations = locations.stream().filter(location -> workers.contains(location.getWorkerAddress())).collect(toList());
    // First try to read data from Alluxio
    if (!locations.isEmpty()) {
        // TODO(calvin): Get location via a policy
        List<WorkerNetAddress> tieredLocations = locations.stream().map(location -> location.getWorkerAddress()).collect(toList());
        Collections.shuffle(tieredLocations);
        Optional<Pair<WorkerNetAddress, Boolean>> nearest = BlockLocationUtils.nearest(mTieredIdentity, tieredLocations, mContext.getClusterConf());
        if (nearest.isPresent()) {
            dataSource = nearest.get().getFirst();
            dataSourceType = nearest.get().getSecond() ? mContext.hasProcessLocalWorker() ? BlockInStreamSource.PROCESS_LOCAL : BlockInStreamSource.NODE_LOCAL : BlockInStreamSource.REMOTE;
        }
    }
    // Can't get data from Alluxio, get it from the UFS instead
    if (dataSource == null) {
        dataSourceType = BlockInStreamSource.UFS;
        Preconditions.checkNotNull(policy, PreconditionMessage.UFS_READ_LOCATION_POLICY_UNSPECIFIED);
        blockWorkerInfo = blockWorkerInfo.stream().filter(workerInfo -> workers.contains(workerInfo.getNetAddress())).collect(toList());
        GetWorkerOptions getWorkerOptions = GetWorkerOptions.defaults().setBlockInfo(new BlockInfo().setBlockId(info.getBlockId()).setLength(info.getLength()).setLocations(locations)).setBlockWorkerInfos(blockWorkerInfo);
        dataSource = policy.getWorker(getWorkerOptions);
        if (dataSource != null) {
            if (mContext.hasProcessLocalWorker() && dataSource.equals(mContext.getNodeLocalWorker())) {
                dataSourceType = BlockInStreamSource.PROCESS_LOCAL;
                LOG.debug("Create BlockInStream to read data from UFS through process local worker {}", dataSource);
            } else {
                LOG.debug("Create BlockInStream to read data from UFS through worker {} " + "(client embedded in local worker process: {}," + "client co-located with worker in different processes: {}, " + "local worker address: {})", dataSource, mContext.hasProcessLocalWorker(), mContext.hasNodeLocalWorker(), mContext.hasNodeLocalWorker() ? mContext.getNodeLocalWorker() : "N/A");
            }
        }
    }
    if (dataSource == null) {
        throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
    }
    return new Pair<>(dataSource, dataSourceType);
}
Also used : BlockLocationPolicy(alluxio.client.block.policy.BlockLocationPolicy) WorkerNetAddress(alluxio.wire.WorkerNetAddress) LoggerFactory(org.slf4j.LoggerFactory) BlockInfo(alluxio.wire.BlockInfo) TieredIdentity(alluxio.wire.TieredIdentity) HashMap(java.util.HashMap) BlockOutStream(alluxio.client.block.stream.BlockOutStream) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) CloseableResource(alluxio.resource.CloseableResource) Map(java.util.Map) BlockLocationUtils(alluxio.client.block.util.BlockLocationUtils) DataWriter(alluxio.client.block.stream.DataWriter) PreconditionMessage(alluxio.exception.PreconditionMessage) TieredIdentityFactory(alluxio.network.TieredIdentityFactory) Collectors.toSet(java.util.stream.Collectors.toSet) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) BlockInStream(alluxio.client.block.stream.BlockInStream) InStreamOptions(alluxio.client.file.options.InStreamOptions) ExceptionMessage(alluxio.exception.ExceptionMessage) OutStreamOptions(alluxio.client.file.options.OutStreamOptions) Set(java.util.Set) IOException(java.io.IOException) ThreadSafe(javax.annotation.concurrent.ThreadSafe) Pair(alluxio.collections.Pair) GetWorkerOptions(alluxio.client.block.policy.options.GetWorkerOptions) BlockLocation(alluxio.wire.BlockLocation) Collectors.toList(java.util.stream.Collectors.toList) URIStatus(alluxio.client.file.URIStatus) List(java.util.List) FileSystemContext(alluxio.client.file.FileSystemContext) BlockInStreamSource(alluxio.client.block.stream.BlockInStream.BlockInStreamSource) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) WriteType(alluxio.client.WriteType) Collections(java.util.Collections) UnavailableException(alluxio.exception.status.UnavailableException) UnavailableException(alluxio.exception.status.UnavailableException) BlockInStreamSource(alluxio.client.block.stream.BlockInStream.BlockInStreamSource) GetWorkerOptions(alluxio.client.block.policy.options.GetWorkerOptions) BlockLocation(alluxio.wire.BlockLocation) WorkerNetAddress(alluxio.wire.WorkerNetAddress) BlockInfo(alluxio.wire.BlockInfo) Pair(alluxio.collections.Pair)

Example 7 with BlockInStream

use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.

the class RemoteReadIntegrationTest method readTest4.

/**
 * Tests the single byte read API from a remote location when the data is in an Alluxio worker.
 */
@Test
public void readTest4() throws Exception {
    String uniqPath = PathUtils.uniqPath();
    for (int k = MIN_LEN + DELTA; k <= MAX_LEN; k += DELTA) {
        AlluxioURI uri = new AlluxioURI(uniqPath + "/file_" + k);
        FileSystemTestUtils.createByteFile(mFileSystem, uri, mWriteAlluxio, k);
        URIStatus status = mFileSystem.getStatus(uri);
        InStreamOptions options = new InStreamOptions(status, ServerConfiguration.global());
        long blockId = status.getBlockIds().get(0);
        AlluxioBlockStore blockStore = AlluxioBlockStore.create(FileSystemContext.create(ServerConfiguration.global()));
        BlockInfo info = blockStore.getInfo(blockId);
        WorkerNetAddress workerAddr = info.getLocations().get(0).getWorkerAddress();
        BlockInStream is = BlockInStream.create(mFsContext, options.getBlockInfo(blockId), workerAddr, BlockInStreamSource.REMOTE, options);
        byte[] ret = new byte[k];
        int value = is.read();
        int cnt = 0;
        while (value != -1) {
            Assert.assertTrue(value >= 0);
            Assert.assertTrue(value < 256);
            ret[cnt++] = (byte) value;
            value = is.read();
        }
        Assert.assertEquals(cnt, k);
        Assert.assertTrue(BufferUtils.equalIncreasingByteArray(k, ret));
        is.close();
        FileSystemUtils.waitForAlluxioPercentage(mFileSystem, uri, 100);
    }
}
Also used : BlockInStream(alluxio.client.block.stream.BlockInStream) BlockInfo(alluxio.wire.BlockInfo) WorkerNetAddress(alluxio.wire.WorkerNetAddress) URIStatus(alluxio.client.file.URIStatus) AlluxioBlockStore(alluxio.client.block.AlluxioBlockStore) AlluxioURI(alluxio.AlluxioURI) InStreamOptions(alluxio.client.file.options.InStreamOptions) BaseIntegrationTest(alluxio.testutils.BaseIntegrationTest) Test(org.junit.Test)

Example 8 with BlockInStream

use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.

the class RemoteReadIntegrationTest method readTest6.

/**
 * Tests the batch read API with offset and length from a remote location when the data is in an
 * Alluxio worker.
 */
@Test
public void readTest6() throws Exception {
    String uniqPath = PathUtils.uniqPath();
    for (int k = MIN_LEN + DELTA; k <= MAX_LEN; k += DELTA) {
        AlluxioURI uri = new AlluxioURI(uniqPath + "/file_" + k);
        FileSystemTestUtils.createByteFile(mFileSystem, uri, mWriteAlluxio, k);
        URIStatus status = mFileSystem.getStatus(uri);
        InStreamOptions options = new InStreamOptions(status, ServerConfiguration.global());
        long blockId = status.getBlockIds().get(0);
        BlockInfo info = AlluxioBlockStore.create(FileSystemContext.create(ServerConfiguration.global())).getInfo(blockId);
        WorkerNetAddress workerAddr = info.getLocations().get(0).getWorkerAddress();
        BlockInStream is = BlockInStream.create(mFsContext, options.getBlockInfo(blockId), workerAddr, BlockInStreamSource.REMOTE, options);
        byte[] ret = new byte[k / 2];
        int read = 0;
        while (read < k / 2) {
            read += is.read(ret, read, k / 2 - read);
        }
        Assert.assertTrue(BufferUtils.equalIncreasingByteArray(read, ret));
        is.close();
        FileSystemUtils.waitForAlluxioPercentage(mFileSystem, uri, 100);
    }
}
Also used : BlockInStream(alluxio.client.block.stream.BlockInStream) BlockInfo(alluxio.wire.BlockInfo) WorkerNetAddress(alluxio.wire.WorkerNetAddress) URIStatus(alluxio.client.file.URIStatus) AlluxioURI(alluxio.AlluxioURI) InStreamOptions(alluxio.client.file.options.InStreamOptions) BaseIntegrationTest(alluxio.testutils.BaseIntegrationTest) Test(org.junit.Test)

Example 9 with BlockInStream

use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.

the class RemoteReadIntegrationTest method remoteReadLock.

/**
 * Tests remote reads lock blocks correctly.
 */
@Test
public void remoteReadLock() throws Exception {
    String uniqPath = PathUtils.uniqPath();
    for (int k = MIN_LEN + DELTA; k <= MAX_LEN; k += DELTA) {
        AlluxioURI uri = new AlluxioURI(uniqPath + "/file_" + k);
        FileSystemTestUtils.createByteFile(mFileSystem, uri, mWriteAlluxio, k);
        URIStatus status = mFileSystem.getStatus(uri);
        InStreamOptions options = new InStreamOptions(status, ServerConfiguration.global());
        long blockId = status.getBlockIds().get(0);
        BlockInfo info = AlluxioBlockStore.create(FileSystemContext.create(ServerConfiguration.global())).getInfo(blockId);
        WorkerNetAddress workerAddr = info.getLocations().get(0).getWorkerAddress();
        BlockInStream is = BlockInStream.create(mFsContext, options.getBlockInfo(blockId), workerAddr, BlockInStreamSource.REMOTE, options);
        Assert.assertEquals(0, is.read());
        mFileSystem.delete(uri);
        // The file has been deleted.
        Assert.assertFalse(mFileSystem.exists(uri));
        // Look! We can still read the deleted file since we have a lock!
        byte[] ret = new byte[k / 2];
        Assert.assertTrue(is.read(ret, 0, k / 2) > 0);
        is.close();
        Assert.assertFalse(mFileSystem.exists(uri));
        // Try to create an in stream again, and it should fail.
        BlockInStream is2 = null;
        try {
            is2 = BlockInStream.create(mFsContext, options.getBlockInfo(blockId), workerAddr, BlockInStreamSource.REMOTE, options);
        } catch (NotFoundException e) {
        // Expected since the file has been deleted.
        } finally {
            if (is2 != null) {
                is2.close();
            }
        }
    }
}
Also used : BlockInStream(alluxio.client.block.stream.BlockInStream) BlockInfo(alluxio.wire.BlockInfo) WorkerNetAddress(alluxio.wire.WorkerNetAddress) NotFoundException(alluxio.exception.status.NotFoundException) URIStatus(alluxio.client.file.URIStatus) AlluxioURI(alluxio.AlluxioURI) InStreamOptions(alluxio.client.file.options.InStreamOptions) BaseIntegrationTest(alluxio.testutils.BaseIntegrationTest) Test(org.junit.Test)

Example 10 with BlockInStream

use of alluxio.client.block.stream.BlockInStream in project alluxio by Alluxio.

the class AlluxioFileInStreamTest method blockInStreamOutOfSync.

/**
 * Tests that when the underlying blocks are inconsistent with the metadata in terms of block
 * length, an exception is thrown rather than client hanging indefinitely. This case may happen if
 * the file in Alluxio and UFS is out of sync.
 */
@Test
public void blockInStreamOutOfSync() throws Exception {
    when(mBlockStore.getInStream(any(BlockInfo.class), any(InStreamOptions.class), any())).thenAnswer(new Answer<BlockInStream>() {

        @Override
        public BlockInStream answer(InvocationOnMock invocation) throws Throwable {
            return new TestBlockInStream(new byte[1], 0, BLOCK_LENGTH, false, mBlockSource);
        }
    });
    byte[] buffer = new byte[(int) BLOCK_LENGTH];
    try {
        mTestStream.read(buffer, 0, (int) BLOCK_LENGTH);
        fail("BlockInStream is inconsistent, an Exception is expected");
    } catch (IllegalStateException e) {
    // expect an exception to throw
    }
}
Also used : TestBlockInStream(alluxio.client.block.stream.TestBlockInStream) BlockInStream(alluxio.client.block.stream.BlockInStream) BlockInfo(alluxio.wire.BlockInfo) FileBlockInfo(alluxio.wire.FileBlockInfo) InvocationOnMock(org.mockito.invocation.InvocationOnMock) TestBlockInStream(alluxio.client.block.stream.TestBlockInStream) InStreamOptions(alluxio.client.file.options.InStreamOptions) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

BlockInStream (alluxio.client.block.stream.BlockInStream)12 InStreamOptions (alluxio.client.file.options.InStreamOptions)11 BlockInfo (alluxio.wire.BlockInfo)11 Test (org.junit.Test)11 URIStatus (alluxio.client.file.URIStatus)10 WorkerNetAddress (alluxio.wire.WorkerNetAddress)10 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)7 TieredIdentityFactory (alluxio.network.TieredIdentityFactory)6 FileBlockInfo (alluxio.wire.FileBlockInfo)6 FileInfo (alluxio.wire.FileInfo)5 BlockOutStream (alluxio.client.block.stream.BlockOutStream)4 OpenFilePOptions (alluxio.grpc.OpenFilePOptions)4 BlockLocation (alluxio.wire.BlockLocation)4 BlockWorker (alluxio.worker.block.BlockWorker)4 AlluxioURI (alluxio.AlluxioURI)3 WriteType (alluxio.client.WriteType)3 BlockLocationPolicy (alluxio.client.block.policy.BlockLocationPolicy)3 GetWorkerOptions (alluxio.client.block.policy.options.GetWorkerOptions)3 FileSystemContext (alluxio.client.file.FileSystemContext)3 OutStreamOptions (alluxio.client.file.options.OutStreamOptions)3