Search in sources :

Example 1 with BlockInStreamSource

use of alluxio.client.block.stream.BlockInStream.BlockInStreamSource in project alluxio by Alluxio.

the class AlluxioBlockStore method getInStream.

/**
 * {@link #getInStream(long, InStreamOptions, Map)}.
 *
 * @param info the block info
 * @param options the options associated with the read request
 * @param failedWorkers the map of workers address to most recent failure time
 * @return a stream which reads from the beginning of the block
 */
public BlockInStream getInStream(BlockInfo info, InStreamOptions options, Map<WorkerNetAddress, Long> failedWorkers) throws IOException {
    Pair<WorkerNetAddress, BlockInStreamSource> dataSourceAndType = getDataSourceAndType(info, options.getStatus(), options.getUfsReadLocationPolicy(), failedWorkers);
    WorkerNetAddress dataSource = dataSourceAndType.getFirst();
    BlockInStreamSource dataSourceType = dataSourceAndType.getSecond();
    try {
        return BlockInStream.create(mContext, info, dataSource, dataSourceType, options);
    } catch (UnavailableException e) {
        // When BlockInStream created failed, it will update the passed-in failedWorkers
        // to attempt to avoid reading from this failed worker in next try.
        failedWorkers.put(dataSource, System.currentTimeMillis());
        throw e;
    }
}
Also used : WorkerNetAddress(alluxio.wire.WorkerNetAddress) BlockInStreamSource(alluxio.client.block.stream.BlockInStream.BlockInStreamSource) UnavailableException(alluxio.exception.status.UnavailableException)

Example 2 with BlockInStreamSource

use of alluxio.client.block.stream.BlockInStream.BlockInStreamSource in project alluxio by Alluxio.

the class AlluxioBlockStore method getDataSourceAndType.

/**
 * Gets the data source and type of data source of a block. This method is primarily responsible
 * for determining the data source and type of data source. It takes a map of failed workers and
 * their most recently failed time and tries to update it when BlockInStream created failed,
 * attempting to avoid reading from a recently failed worker.
 *
 * @param info the info of the block to read
 * @param status the URIStatus associated with the read request
 * @param policy the policy determining the Alluxio worker location
 * @param failedWorkers the map of workers address to most recent failure time
 * @return the data source and type of data source of the block
 */
public Pair<WorkerNetAddress, BlockInStreamSource> getDataSourceAndType(BlockInfo info, URIStatus status, BlockLocationPolicy policy, Map<WorkerNetAddress, Long> failedWorkers) throws IOException {
    List<BlockLocation> locations = info.getLocations();
    List<BlockWorkerInfo> blockWorkerInfo = Collections.EMPTY_LIST;
    // Initial target workers to read the block given the block locations.
    Set<WorkerNetAddress> workerPool;
    // Note that, it is possible that the blocks have been written as UFS blocks
    if (status.isPersisted() || status.getPersistenceState().equals("TO_BE_PERSISTED")) {
        blockWorkerInfo = mContext.getCachedWorkers();
        if (blockWorkerInfo.isEmpty()) {
            throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
        }
        workerPool = blockWorkerInfo.stream().map(BlockWorkerInfo::getNetAddress).collect(toSet());
    } else {
        if (locations.isEmpty()) {
            blockWorkerInfo = mContext.getCachedWorkers();
            if (blockWorkerInfo.isEmpty()) {
                throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
            }
            throw new UnavailableException(ExceptionMessage.BLOCK_UNAVAILABLE.getMessage(info.getBlockId()));
        }
        workerPool = locations.stream().map(BlockLocation::getWorkerAddress).collect(toSet());
    }
    // Workers to read the block, after considering failed workers.
    Set<WorkerNetAddress> workers = handleFailedWorkers(workerPool, failedWorkers);
    // TODO(calvin, jianjian): Consider containing these two variables in one object
    BlockInStreamSource dataSourceType = null;
    WorkerNetAddress dataSource = null;
    locations = locations.stream().filter(location -> workers.contains(location.getWorkerAddress())).collect(toList());
    // First try to read data from Alluxio
    if (!locations.isEmpty()) {
        // TODO(calvin): Get location via a policy
        List<WorkerNetAddress> tieredLocations = locations.stream().map(location -> location.getWorkerAddress()).collect(toList());
        Collections.shuffle(tieredLocations);
        Optional<Pair<WorkerNetAddress, Boolean>> nearest = BlockLocationUtils.nearest(mTieredIdentity, tieredLocations, mContext.getClusterConf());
        if (nearest.isPresent()) {
            dataSource = nearest.get().getFirst();
            dataSourceType = nearest.get().getSecond() ? mContext.hasProcessLocalWorker() ? BlockInStreamSource.PROCESS_LOCAL : BlockInStreamSource.NODE_LOCAL : BlockInStreamSource.REMOTE;
        }
    }
    // Can't get data from Alluxio, get it from the UFS instead
    if (dataSource == null) {
        dataSourceType = BlockInStreamSource.UFS;
        Preconditions.checkNotNull(policy, PreconditionMessage.UFS_READ_LOCATION_POLICY_UNSPECIFIED);
        blockWorkerInfo = blockWorkerInfo.stream().filter(workerInfo -> workers.contains(workerInfo.getNetAddress())).collect(toList());
        GetWorkerOptions getWorkerOptions = GetWorkerOptions.defaults().setBlockInfo(new BlockInfo().setBlockId(info.getBlockId()).setLength(info.getLength()).setLocations(locations)).setBlockWorkerInfos(blockWorkerInfo);
        dataSource = policy.getWorker(getWorkerOptions);
        if (dataSource != null) {
            if (mContext.hasProcessLocalWorker() && dataSource.equals(mContext.getNodeLocalWorker())) {
                dataSourceType = BlockInStreamSource.PROCESS_LOCAL;
                LOG.debug("Create BlockInStream to read data from UFS through process local worker {}", dataSource);
            } else {
                LOG.debug("Create BlockInStream to read data from UFS through worker {} " + "(client embedded in local worker process: {}," + "client co-located with worker in different processes: {}, " + "local worker address: {})", dataSource, mContext.hasProcessLocalWorker(), mContext.hasNodeLocalWorker(), mContext.hasNodeLocalWorker() ? mContext.getNodeLocalWorker() : "N/A");
            }
        }
    }
    if (dataSource == null) {
        throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage());
    }
    return new Pair<>(dataSource, dataSourceType);
}
Also used : BlockLocationPolicy(alluxio.client.block.policy.BlockLocationPolicy) WorkerNetAddress(alluxio.wire.WorkerNetAddress) LoggerFactory(org.slf4j.LoggerFactory) BlockInfo(alluxio.wire.BlockInfo) TieredIdentity(alluxio.wire.TieredIdentity) HashMap(java.util.HashMap) BlockOutStream(alluxio.client.block.stream.BlockOutStream) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) CloseableResource(alluxio.resource.CloseableResource) Map(java.util.Map) BlockLocationUtils(alluxio.client.block.util.BlockLocationUtils) DataWriter(alluxio.client.block.stream.DataWriter) PreconditionMessage(alluxio.exception.PreconditionMessage) TieredIdentityFactory(alluxio.network.TieredIdentityFactory) Collectors.toSet(java.util.stream.Collectors.toSet) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) BlockInStream(alluxio.client.block.stream.BlockInStream) InStreamOptions(alluxio.client.file.options.InStreamOptions) ExceptionMessage(alluxio.exception.ExceptionMessage) OutStreamOptions(alluxio.client.file.options.OutStreamOptions) Set(java.util.Set) IOException(java.io.IOException) ThreadSafe(javax.annotation.concurrent.ThreadSafe) Pair(alluxio.collections.Pair) GetWorkerOptions(alluxio.client.block.policy.options.GetWorkerOptions) BlockLocation(alluxio.wire.BlockLocation) Collectors.toList(java.util.stream.Collectors.toList) URIStatus(alluxio.client.file.URIStatus) List(java.util.List) FileSystemContext(alluxio.client.file.FileSystemContext) BlockInStreamSource(alluxio.client.block.stream.BlockInStream.BlockInStreamSource) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) WriteType(alluxio.client.WriteType) Collections(java.util.Collections) UnavailableException(alluxio.exception.status.UnavailableException) UnavailableException(alluxio.exception.status.UnavailableException) BlockInStreamSource(alluxio.client.block.stream.BlockInStream.BlockInStreamSource) GetWorkerOptions(alluxio.client.block.policy.options.GetWorkerOptions) BlockLocation(alluxio.wire.BlockLocation) WorkerNetAddress(alluxio.wire.WorkerNetAddress) BlockInfo(alluxio.wire.BlockInfo) Pair(alluxio.collections.Pair)

Aggregations

BlockInStreamSource (alluxio.client.block.stream.BlockInStream.BlockInStreamSource)2 UnavailableException (alluxio.exception.status.UnavailableException)2 WorkerNetAddress (alluxio.wire.WorkerNetAddress)2 WriteType (alluxio.client.WriteType)1 BlockLocationPolicy (alluxio.client.block.policy.BlockLocationPolicy)1 GetWorkerOptions (alluxio.client.block.policy.options.GetWorkerOptions)1 BlockInStream (alluxio.client.block.stream.BlockInStream)1 BlockOutStream (alluxio.client.block.stream.BlockOutStream)1 DataWriter (alluxio.client.block.stream.DataWriter)1 BlockLocationUtils (alluxio.client.block.util.BlockLocationUtils)1 FileSystemContext (alluxio.client.file.FileSystemContext)1 URIStatus (alluxio.client.file.URIStatus)1 InStreamOptions (alluxio.client.file.options.InStreamOptions)1 OutStreamOptions (alluxio.client.file.options.OutStreamOptions)1 Pair (alluxio.collections.Pair)1 ExceptionMessage (alluxio.exception.ExceptionMessage)1 PreconditionMessage (alluxio.exception.PreconditionMessage)1 TieredIdentityFactory (alluxio.network.TieredIdentityFactory)1 CloseableResource (alluxio.resource.CloseableResource)1 BlockInfo (alluxio.wire.BlockInfo)1