Search in sources :

Example 71 with LockResource

use of alluxio.resource.LockResource in project alluxio by Alluxio.

the class BufferedJournalApplier method resume.

/**
 * Resumes the applier. This method will apply all buffered entries before returning.
 *
 * @throws IOException
 */
public void resume() throws IOException {
    try (LockResource stateLock = new LockResource(mStateLock)) {
        Preconditions.checkState(mSuspended, "Not suspended");
        Preconditions.checkState(!mResumeInProgress, "Resume in progress");
        mResumeInProgress = true;
        LOG.info("Resuming state machine from sequence: {}", mLastAppliedSequence);
    }
    cancelCatchup();
    /**
     * Applies all buffered entries.
     *
     * It doesn't block state until
     *   -> buffer contains few elements ( RESUME_LOCK_BUFFER_SIZE_WATERMARK )
     *   -> was running for a long time  ( RESUME_LOCK_TIME_LIMIT_MS         )
     */
    try {
        // Mark resume start time.
        long resumeStartTimeMs = System.currentTimeMillis();
        // Lock initially if few or none elements in the queue.
        if (mSuspendBuffer.size() <= RESUME_LOCK_BUFFER_SIZE_WATERMARK) {
            mStateLock.lock();
        }
        while (!mSuspendBuffer.isEmpty()) {
            applyToMaster(mSuspendBuffer.remove());
            // Check whether to lock the state now.
            boolean lockSubmission = !mStateLock.isHeldByCurrentThread() && (mSuspendBuffer.size() <= RESUME_LOCK_BUFFER_SIZE_WATERMARK || (System.currentTimeMillis() - resumeStartTimeMs) > RESUME_LOCK_TIME_LIMIT_MS);
            if (lockSubmission) {
                mStateLock.lock();
            }
        }
    } finally {
        mSuspended = false;
        mResumeInProgress = false;
        mCatchupThread = null;
        mStateLock.unlock();
    }
}
Also used : LockResource(alluxio.resource.LockResource)

Example 72 with LockResource

use of alluxio.resource.LockResource in project alluxio by Alluxio.

the class BufferedJournalApplier method suspend.

/**
 * Suspend the applier.
 *
 * After this call, journal entries will be buffered until {@link #resume()} or
 * {@link #catchup(long)} is called.
 *
 * @throws IOException
 */
public void suspend() throws IOException {
    try (LockResource stateLock = new LockResource(mStateLock)) {
        Preconditions.checkState(!mSuspended, "Already suspended");
        mSuspended = true;
        LOG.info("Suspended state machine at sequence: {}", mLastAppliedSequence);
    }
}
Also used : LockResource(alluxio.resource.LockResource)

Example 73 with LockResource

use of alluxio.resource.LockResource in project alluxio by Alluxio.

the class DefaultBlockMaster method generateBlockInfo.

/**
 * Generates block info, including worker locations, for a block id.
 * This requires no locks on the {@link MasterWorkerInfo} because it is only reading
 * final fields.
 *
 * @param blockId a block id
 * @return optional block info, empty if the block does not exist
 */
private Optional<BlockInfo> generateBlockInfo(long blockId) throws UnavailableException {
    if (mSafeModeManager.isInSafeMode()) {
        throw new UnavailableException(ExceptionMessage.MASTER_IN_SAFEMODE.getMessage());
    }
    BlockMeta block;
    List<BlockLocation> blockLocations;
    try (LockResource r = lockBlock(blockId)) {
        Optional<BlockMeta> blockOpt = mBlockStore.getBlock(blockId);
        if (!blockOpt.isPresent()) {
            return Optional.empty();
        }
        block = blockOpt.get();
        blockLocations = new ArrayList<>(mBlockStore.getLocations(blockId));
    }
    // Sort the block locations by their alias ordinal in the master storage tier mapping
    Collections.sort(blockLocations, Comparator.comparingInt(o -> mGlobalStorageTierAssoc.getOrdinal(o.getTier())));
    List<alluxio.wire.BlockLocation> locations = new ArrayList<>();
    for (BlockLocation location : blockLocations) {
        MasterWorkerInfo workerInfo = mWorkers.getFirstByField(ID_INDEX, location.getWorkerId());
        if (workerInfo != null) {
            // worker metadata is intentionally not locked here because:
            // - it would be an incorrect order (correct order is lock worker first, then block)
            // - only uses getters of final variables
            locations.add(new alluxio.wire.BlockLocation().setWorkerId(location.getWorkerId()).setWorkerAddress(workerInfo.getWorkerAddress()).setTierAlias(location.getTier()).setMediumType(location.getMediumType()));
        }
    }
    return Optional.of(new BlockInfo().setBlockId(blockId).setLength(block.getLength()).setLocations(locations));
}
Also used : Arrays(java.util.Arrays) SystemClock(alluxio.clock.SystemClock) LoadingCache(com.google.common.cache.LoadingCache) Server(alluxio.Server) BlockInfo(alluxio.wire.BlockInfo) PropertyKey(alluxio.conf.PropertyKey) GrpcService(alluxio.grpc.GrpcService) Future(java.util.concurrent.Future) WorkerInfo(alluxio.wire.WorkerInfo) Map(java.util.Map) MetricsMaster(alluxio.master.metrics.MetricsMaster) GetWorkerReportOptions(alluxio.client.block.options.GetWorkerReportOptions) EnumSet(java.util.EnumSet) RegisterLease(alluxio.wire.RegisterLease) MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) IndexedSet(alluxio.collections.IndexedSet) ConcurrentHashSet(alluxio.collections.ConcurrentHashSet) Set(java.util.Set) ConfigProperty(alluxio.grpc.ConfigProperty) Block(alluxio.master.metastore.BlockStore.Block) Command(alluxio.grpc.Command) GuardedBy(javax.annotation.concurrent.GuardedBy) ServiceType(alluxio.grpc.ServiceType) BlockStore(alluxio.master.metastore.BlockStore) Metric(alluxio.metrics.Metric) ArrayList(java.util.ArrayList) GrpcUtils(alluxio.grpc.GrpcUtils) BlockInfoException(alluxio.exception.BlockInfoException) BiConsumer(java.util.function.BiConsumer) MetricsSystem(alluxio.metrics.MetricsSystem) Nullable(javax.annotation.Nullable) IdUtils(alluxio.util.IdUtils) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) ExecutorServiceFactory(alluxio.util.executor.ExecutorServiceFactory) Lock(java.util.concurrent.locks.Lock) RegisterWorkerPOptions(alluxio.grpc.RegisterWorkerPOptions) Preconditions(com.google.common.base.Preconditions) ExecutorServiceFactories(alluxio.util.executor.ExecutorServiceFactories) StorageList(alluxio.grpc.StorageList) CommonUtils(alluxio.util.CommonUtils) Address(alluxio.wire.Address) NotThreadSafe(javax.annotation.concurrent.NotThreadSafe) CloseableIterator(alluxio.resource.CloseableIterator) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) HeartbeatThread(alluxio.heartbeat.HeartbeatThread) MasterStorageTierAssoc(alluxio.MasterStorageTierAssoc) GetRegisterLeasePRequest(alluxio.grpc.GetRegisterLeasePRequest) MetricKey(alluxio.metrics.MetricKey) BlockInfoEntry(alluxio.proto.journal.Block.BlockInfoEntry) InvalidArgumentException(alluxio.exception.status.InvalidArgumentException) IndexDefinition(alluxio.collections.IndexDefinition) ImmutableSet(com.google.common.collect.ImmutableSet) ServerConfiguration(alluxio.conf.ServerConfiguration) StorageTierAssoc(alluxio.StorageTierAssoc) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SuppressFBWarnings(alluxio.annotation.SuppressFBWarnings) CheckpointName(alluxio.master.journal.checkpoint.CheckpointName) Collectors(java.util.stream.Collectors) CacheLoader(com.google.common.cache.CacheLoader) List(java.util.List) CoreMasterContext(alluxio.master.CoreMasterContext) Optional(java.util.Optional) WorkerRange(alluxio.client.block.options.GetWorkerReportOptions.WorkerRange) Gauge(com.codahale.metrics.Gauge) CacheBuilder(com.google.common.cache.CacheBuilder) RegisterWorkerPRequest(alluxio.grpc.RegisterWorkerPRequest) BlockContainerIdGeneratorEntry(alluxio.proto.journal.Block.BlockContainerIdGeneratorEntry) JournalContext(alluxio.master.journal.JournalContext) UnavailableException(alluxio.exception.status.UnavailableException) WorkerLostStorageInfo(alluxio.grpc.WorkerLostStorageInfo) WorkerNetAddress(alluxio.wire.WorkerNetAddress) HashMap(java.util.HashMap) CommandType(alluxio.grpc.CommandType) NetworkAddressUtils(alluxio.util.network.NetworkAddressUtils) Function(java.util.function.Function) MetricInfo(alluxio.metrics.MetricInfo) JournalEntry(alluxio.proto.journal.Journal.JournalEntry) HashSet(java.util.HashSet) Constants(alluxio.Constants) NoSuchElementException(java.util.NoSuchElementException) CoreMaster(alluxio.master.CoreMaster) Striped(com.google.common.util.concurrent.Striped) Logger(org.slf4j.Logger) HeartbeatContext(alluxio.heartbeat.HeartbeatContext) Iterator(java.util.Iterator) BlockMeta(alluxio.proto.meta.Block.BlockMeta) ExceptionMessage(alluxio.exception.ExceptionMessage) HeartbeatExecutor(alluxio.heartbeat.HeartbeatExecutor) NotFoundException(alluxio.exception.status.NotFoundException) LockResource(alluxio.resource.LockResource) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) WorkerMetaLockSection(alluxio.master.block.meta.WorkerMetaLockSection) DeleteBlockEntry(alluxio.proto.journal.Block.DeleteBlockEntry) Clock(java.time.Clock) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) BlockLocation(alluxio.proto.meta.Block.BlockLocation) Collections(java.util.Collections) UnavailableException(alluxio.exception.status.UnavailableException) ArrayList(java.util.ArrayList) BlockLocation(alluxio.proto.meta.Block.BlockLocation) LockResource(alluxio.resource.LockResource) BlockInfo(alluxio.wire.BlockInfo) MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) BlockMeta(alluxio.proto.meta.Block.BlockMeta)

Example 74 with LockResource

use of alluxio.resource.LockResource in project alluxio by Alluxio.

the class DefaultBlockMaster method processWorkerAddedBlocks.

/**
 * Updates the worker and block metadata for blocks added to a worker.
 *
 * You should lock externally with {@link MasterWorkerInfo#lockWorkerMeta(EnumSet, boolean)}
 * with {@link WorkerMetaLockSection#BLOCKS} specified.
 * An exclusive lock is required.
 *
 * @param workerInfo The worker metadata object
 * @param addedBlockIds A mapping from storage tier alias to a list of block ids added
 */
private void processWorkerAddedBlocks(MasterWorkerInfo workerInfo, Map<BlockLocation, List<Long>> addedBlockIds) {
    long invalidBlockCount = 0;
    for (Map.Entry<BlockLocation, List<Long>> entry : addedBlockIds.entrySet()) {
        for (long blockId : entry.getValue()) {
            try (LockResource r = lockBlock(blockId)) {
                Optional<BlockMeta> block = mBlockStore.getBlock(blockId);
                if (block.isPresent()) {
                    workerInfo.addBlock(blockId);
                    BlockLocation location = entry.getKey();
                    Preconditions.checkState(location.getWorkerId() == workerInfo.getId(), "BlockLocation has a different workerId %s from the request sender's workerId %s", location.getWorkerId(), workerInfo.getId());
                    mBlockStore.addLocation(blockId, location);
                    mLostBlocks.remove(blockId);
                } else {
                    invalidBlockCount++;
                    // The block is not recognized and should therefore be purged from the worker
                    // The file may have been removed when the worker was lost
                    workerInfo.scheduleRemoveFromWorker(blockId);
                    LOG.debug("Invalid block: {} from worker {}.", blockId, workerInfo.getWorkerAddress().getHost());
                }
            }
        }
    }
    if (invalidBlockCount > 0) {
        LOG.warn("{} invalid blocks found on worker {} in total", invalidBlockCount, workerInfo.getWorkerAddress().getHost());
    }
}
Also used : LockResource(alluxio.resource.LockResource) ArrayList(java.util.ArrayList) StorageList(alluxio.grpc.StorageList) List(java.util.List) BlockLocation(alluxio.proto.meta.Block.BlockLocation) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) BlockMeta(alluxio.proto.meta.Block.BlockMeta)

Example 75 with LockResource

use of alluxio.resource.LockResource in project alluxio by Alluxio.

the class DefaultBlockMaster method commitBlock.

// TODO(binfan): check the logic is correct or not when commitBlock is a retry
@Override
public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, String mediumType, long blockId, long length) throws NotFoundException, UnavailableException {
    LOG.debug("Commit block from workerId: {}, usedBytesOnTier: {}, blockId: {}, length: {}", workerId, usedBytesOnTier, blockId, length);
    MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
    // TODO(peis): Check lost workers as well.
    if (worker == null) {
        throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId));
    }
    try (JournalContext journalContext = createJournalContext()) {
        // The worker metadata must be locked before the blocks
        try (LockResource lr = worker.lockWorkerMeta(EnumSet.of(WorkerMetaLockSection.USAGE, WorkerMetaLockSection.BLOCKS), false)) {
            try (LockResource r = lockBlock(blockId)) {
                Optional<BlockMeta> block = mBlockStore.getBlock(blockId);
                if (!block.isPresent() || block.get().getLength() != length) {
                    if (block.isPresent() && block.get().getLength() != Constants.UNKNOWN_SIZE) {
                        LOG.warn("Rejecting attempt to change block length from {} to {}", block.get().getLength(), length);
                    } else {
                        mBlockStore.putBlock(blockId, BlockMeta.newBuilder().setLength(length).build());
                        BlockInfoEntry blockInfo = BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build();
                        journalContext.append(JournalEntry.newBuilder().setBlockInfo(blockInfo).build());
                    }
                }
                // Update the block metadata with the new worker location.
                mBlockStore.addLocation(blockId, BlockLocation.newBuilder().setWorkerId(workerId).setTier(tierAlias).setMediumType(mediumType).build());
                // This worker has this block, so it is no longer lost.
                mLostBlocks.remove(blockId);
                // Update the worker information for this new block.
                // TODO(binfan): when retry commitBlock on master is expected, make sure metrics are not
                // double counted.
                worker.addBlock(blockId);
                worker.updateUsedBytes(tierAlias, usedBytesOnTier);
            }
        }
        worker.updateLastUpdatedTimeMs();
    }
}
Also used : LockResource(alluxio.resource.LockResource) JournalContext(alluxio.master.journal.JournalContext) MasterWorkerInfo(alluxio.master.block.meta.MasterWorkerInfo) NotFoundException(alluxio.exception.status.NotFoundException) BlockMeta(alluxio.proto.meta.Block.BlockMeta) BlockInfoEntry(alluxio.proto.journal.Block.BlockInfoEntry)

Aggregations

LockResource (alluxio.resource.LockResource)116 IOException (java.io.IOException)14 TempBlockMeta (alluxio.worker.block.meta.TempBlockMeta)13 HashMap (java.util.HashMap)12 BlockAlreadyExistsException (alluxio.exception.BlockAlreadyExistsException)11 BlockDoesNotExistException (alluxio.exception.BlockDoesNotExistException)11 Map (java.util.Map)11 AlluxioURI (alluxio.AlluxioURI)10 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)10 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)10 WorkerOutOfSpaceException (alluxio.exception.WorkerOutOfSpaceException)9 ArrayList (java.util.ArrayList)9 MasterWorkerInfo (alluxio.master.block.meta.MasterWorkerInfo)8 InvalidPathException (alluxio.exception.InvalidPathException)7 NotFoundException (alluxio.exception.status.NotFoundException)7 List (java.util.List)7 Lock (java.util.concurrent.locks.Lock)7 ConcurrentHashSet (alluxio.collections.ConcurrentHashSet)6 InvalidWorkerStateException (alluxio.exception.InvalidWorkerStateException)6 UnavailableException (alluxio.exception.status.UnavailableException)6