Search in sources :

Example 31 with UnavailableException

use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.

the class AlluxioFileInStreamTest method positionedReadRetry.

@Test
public void positionedReadRetry() throws Exception {
    TestBlockInStream workingStream = mInStreams.get(0);
    TestBlockInStream brokenStream = mock(TestBlockInStream.class);
    when(mBlockStore.getInStream(eq(0L), any(InStreamOptions.class), any())).thenReturn(brokenStream).thenReturn(workingStream);
    when(brokenStream.positionedRead(anyLong(), any(byte[].class), anyInt(), anyInt())).thenThrow(new UnavailableException("test exception"));
    byte[] b = new byte[(int) BLOCK_LENGTH * 2];
    mTestStream.positionedRead(BLOCK_LENGTH / 2, b, 0, b.length);
    doReturn(0).when(brokenStream).positionedRead(anyLong(), any(byte[].class), anyInt(), anyInt());
    verify(brokenStream, times(1)).positionedRead(anyLong(), any(byte[].class), anyInt(), anyInt());
    assertArrayEquals(BufferUtils.getIncreasingByteArray((int) BLOCK_LENGTH / 2, (int) BLOCK_LENGTH * 2), b);
}
Also used : UnavailableException(alluxio.exception.status.UnavailableException) TestBlockInStream(alluxio.client.block.stream.TestBlockInStream) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 32 with UnavailableException

use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.

the class RaftJournalSystem method getQuorumServerInfoList.

/**
 * Used to get information of internal RAFT quorum.
 *
 * @return list of information for participating servers in RAFT quorum
 */
public synchronized List<QuorumServerInfo> getQuorumServerInfoList() throws IOException {
    List<QuorumServerInfo> quorumMemberStateList = new LinkedList<>();
    GroupInfoReply groupInfo = getGroupInfo();
    if (groupInfo == null) {
        throw new UnavailableException("Cannot get raft group info");
    }
    if (groupInfo.getException() != null) {
        throw groupInfo.getException();
    }
    RaftProtos.RoleInfoProto roleInfo = groupInfo.getRoleInfoProto();
    if (roleInfo == null) {
        throw new UnavailableException("Cannot get server role info");
    }
    RaftProtos.LeaderInfoProto leaderInfo = roleInfo.getLeaderInfo();
    if (leaderInfo == null) {
        throw new UnavailableException("Cannot get server leader info");
    }
    for (RaftProtos.ServerRpcProto member : leaderInfo.getFollowerInfoList()) {
        HostAndPort hp = HostAndPort.fromString(member.getId().getAddress());
        NetAddress memberAddress = NetAddress.newBuilder().setHost(hp.getHost()).setRpcPort(hp.getPort()).build();
        quorumMemberStateList.add(QuorumServerInfo.newBuilder().setIsLeader(false).setPriority(member.getId().getPriority()).setServerAddress(memberAddress).setServerState(member.getLastRpcElapsedTimeMs() > mConf.getMaxElectionTimeoutMs() ? QuorumServerState.UNAVAILABLE : QuorumServerState.AVAILABLE).build());
    }
    InetSocketAddress localAddress = mConf.getLocalAddress();
    NetAddress self = NetAddress.newBuilder().setHost(localAddress.getHostString()).setRpcPort(localAddress.getPort()).build();
    quorumMemberStateList.add(QuorumServerInfo.newBuilder().setIsLeader(true).setPriority(roleInfo.getSelf().getPriority()).setServerAddress(self).setServerState(QuorumServerState.AVAILABLE).build());
    quorumMemberStateList.sort(Comparator.comparing(info -> info.getServerAddress().toString()));
    return quorumMemberStateList;
}
Also used : Arrays(java.util.Arrays) GroupInfoReply(org.apache.ratis.protocol.GroupInfoReply) RaftGroup(org.apache.ratis.protocol.RaftGroup) LeaderNotReadyException(org.apache.ratis.protocol.exceptions.LeaderNotReadyException) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) PropertyKey(alluxio.conf.PropertyKey) GrpcService(alluxio.grpc.GrpcService) LogUtils(alluxio.util.LogUtils) NetUtils(org.apache.ratis.util.NetUtils) JournalQueryRequest(alluxio.grpc.JournalQueryRequest) MetricKey(alluxio.metrics.MetricKey) Map(java.util.Map) RaftConfigKeys(org.apache.ratis.RaftConfigKeys) CancelledException(alluxio.exception.status.CancelledException) UnsafeByteOperations(org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations) QuorumServerInfo(alluxio.grpc.QuorumServerInfo) ServerConfiguration(alluxio.conf.ServerConfiguration) RaftPeer(org.apache.ratis.protocol.RaftPeer) RetryPolicy(org.apache.ratis.retry.RetryPolicy) Master(alluxio.master.Master) Collection(java.util.Collection) AbstractJournalSystem(alluxio.master.journal.AbstractJournalSystem) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) SupportedRpcType(org.apache.ratis.rpc.SupportedRpcType) CompletionException(java.util.concurrent.CompletionException) ThreadSafe(javax.annotation.concurrent.ThreadSafe) UUID(java.util.UUID) InetSocketAddress(java.net.InetSocketAddress) Collectors(java.util.stream.Collectors) List(java.util.List) ClientId(org.apache.ratis.protocol.ClientId) RaftClientReply(org.apache.ratis.protocol.RaftClientReply) RaftProperties(org.apache.ratis.conf.RaftProperties) ServiceType(alluxio.grpc.ServiceType) ExponentialBackoffRetry(org.apache.ratis.retry.ExponentialBackoffRetry) Optional(java.util.Optional) PrimarySelector(alluxio.master.PrimarySelector) RatisDropwizardExports(alluxio.metrics.sink.RatisDropwizardExports) AccessDeniedException(java.nio.file.AccessDeniedException) RaftClientConfigKeys(org.apache.ratis.client.RaftClientConfigKeys) UnavailableException(alluxio.exception.status.UnavailableException) TimeDuration(org.apache.ratis.util.TimeDuration) AsyncJournalWriter(alluxio.master.journal.AsyncJournalWriter) GroupInfoRequest(org.apache.ratis.protocol.GroupInfoRequest) CatchupFuture(alluxio.master.journal.CatchupFuture) SetConfigurationRequest(org.apache.ratis.protocol.SetConfigurationRequest) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) JournalEntry(alluxio.proto.journal.Journal.JournalEntry) WaitForOptions(alluxio.util.WaitForOptions) RaftGroupId(org.apache.ratis.protocol.RaftGroupId) AddQuorumServerRequest(alluxio.grpc.AddQuorumServerRequest) Message(org.apache.ratis.protocol.Message) OptionalLong(java.util.OptionalLong) Constants(alluxio.Constants) QuorumServerState(alluxio.grpc.QuorumServerState) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) MetricsSystem(alluxio.metrics.MetricsSystem) LinkedList(java.util.LinkedList) SizeInBytes(org.apache.ratis.util.SizeInBytes) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) NetAddress(alluxio.grpc.NetAddress) Iterator(java.util.Iterator) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) RaftServerConfigKeys(org.apache.ratis.server.RaftServerConfigKeys) ExceptionMessage(alluxio.exception.ExceptionMessage) RaftProtos(org.apache.ratis.proto.RaftProtos) FileUtils(org.apache.commons.io.FileUtils) GrpcConfigKeys(org.apache.ratis.grpc.GrpcConfigKeys) RaftClientRequest(org.apache.ratis.protocol.RaftClientRequest) IOException(java.io.IOException) TransferLeaderMessage(alluxio.grpc.TransferLeaderMessage) HostAndPort(com.google.common.net.HostAndPort) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Parameters(org.apache.ratis.conf.Parameters) AtomicLong(java.util.concurrent.atomic.AtomicLong) LifeCycle(org.apache.ratis.util.LifeCycle) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) RaftServer(org.apache.ratis.server.RaftServer) RaftClient(org.apache.ratis.client.RaftClient) Comparator(java.util.Comparator) Journal(alluxio.master.journal.Journal) Collections(java.util.Collections) CommonUtils(alluxio.util.CommonUtils) InetSocketAddress(java.net.InetSocketAddress) UnavailableException(alluxio.exception.status.UnavailableException) QuorumServerInfo(alluxio.grpc.QuorumServerInfo) LinkedList(java.util.LinkedList) NetAddress(alluxio.grpc.NetAddress) HostAndPort(com.google.common.net.HostAndPort) RaftProtos(org.apache.ratis.proto.RaftProtos) GroupInfoReply(org.apache.ratis.protocol.GroupInfoReply)

Example 33 with UnavailableException

use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.

the class MasterJournalContext method waitForJournalFlush.

/**
 * Waits for the flush counter to be flushed to the journal. If the counter is
 * {@link #INVALID_FLUSH_COUNTER}, this is a noop.
 */
private void waitForJournalFlush() throws UnavailableException {
    if (mFlushCounter == INVALID_FLUSH_COUNTER) {
        // Check this before the precondition.
        return;
    }
    RetryPolicy retry = new TimeoutRetry(FLUSH_RETRY_TIMEOUT_MS, FLUSH_RETRY_INTERVAL_MS);
    while (retry.attempt()) {
        try {
            mAsyncJournalWriter.flush(mFlushCounter);
            return;
        } catch (NotLeaderException | JournalClosedException e) {
            throw new UnavailableException(String.format("Failed to complete request: %s", e.getMessage()), e);
        } catch (AlluxioStatusException e) {
            // written already
            if (e.getStatus().equals(Status.CANCELLED)) {
                LOG.warn("Journal flush interrupted because the RPC was cancelled. ", e);
            } else {
                LOG.warn("Journal flush failed. retrying...", e);
            }
        } catch (IOException e) {
            if (e instanceof AlluxioStatusException && ((AlluxioStatusException) e).getStatusCode() == Status.Code.CANCELLED) {
                throw new UnavailableException(String.format("Failed to complete request: %s", e.getMessage()), e);
            }
            LOG.warn("Journal flush failed. retrying...", e);
        } catch (Throwable e) {
            ProcessUtils.fatalError(LOG, e, "Journal flush failed");
        }
    }
    ProcessUtils.fatalError(LOG, "Journal flush failed after %d attempts", retry.getAttemptCount());
}
Also used : NotLeaderException(org.apache.ratis.protocol.exceptions.NotLeaderException) JournalClosedException(alluxio.exception.JournalClosedException) UnavailableException(alluxio.exception.status.UnavailableException) AlluxioStatusException(alluxio.exception.status.AlluxioStatusException) TimeoutRetry(alluxio.retry.TimeoutRetry) IOException(java.io.IOException) RetryPolicy(alluxio.retry.RetryPolicy)

Example 34 with UnavailableException

use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.

the class ReplicationChecker method check.

private Set<Long> check(Set<Long> inodes, ReplicationHandler handler, Mode mode) throws InterruptedException {
    Set<Long> processedFileIds = new HashSet<>();
    for (long inodeId : inodes) {
        if (mActiveJobToInodeID.size() >= mMaxActiveJobs) {
            return processedFileIds;
        }
        if (mActiveJobToInodeID.containsValue(inodeId)) {
            continue;
        }
        Set<Triple<AlluxioURI, Long, Integer>> requests = new HashSet<>();
        // Throw if interrupted.
        if (Thread.interrupted()) {
            throw new InterruptedException("ReplicationChecker interrupted.");
        }
        // locking the entire path but just the inode file since this access is read-only.
        try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(inodeId, LockPattern.READ)) {
            InodeFile file = inodePath.getInodeFile();
            for (long blockId : file.getBlockIds()) {
                BlockInfo blockInfo = null;
                try {
                    blockInfo = mBlockMaster.getBlockInfo(blockId);
                } catch (BlockInfoException e) {
                // Cannot find this block in Alluxio from BlockMaster, possibly persisted in UFS
                } catch (UnavailableException e) {
                    // The block master is not available, wait for the next heartbeat
                    LOG.warn("The block master is not available: {}", e.toString());
                    return processedFileIds;
                }
                int currentReplicas = (blockInfo == null) ? 0 : blockInfo.getLocations().size();
                switch(mode) {
                    case EVICT:
                        int maxReplicas = file.getReplicationMax();
                        if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > maxReplicas) {
                            maxReplicas = file.getReplicationDurable();
                        }
                        if (currentReplicas > maxReplicas) {
                            requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, currentReplicas - maxReplicas));
                        }
                        break;
                    case REPLICATE:
                        int minReplicas = file.getReplicationMin();
                        if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > minReplicas) {
                            minReplicas = file.getReplicationDurable();
                        }
                        if (currentReplicas < minReplicas) {
                            // if this file is not persisted and block master thinks it is lost, no effort made
                            if (!file.isPersisted() && mBlockMaster.isBlockLost(blockId)) {
                                continue;
                            }
                            requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, minReplicas - currentReplicas));
                        }
                        break;
                    default:
                        LOG.warn("Unexpected replication mode {}.", mode);
                }
            }
        } catch (FileDoesNotExistException e) {
            LOG.warn("Failed to check replication level for inode id {} : {}", inodeId, e.toString());
        }
        for (Triple<AlluxioURI, Long, Integer> entry : requests) {
            AlluxioURI uri = entry.getLeft();
            long blockId = entry.getMiddle();
            int numReplicas = entry.getRight();
            try {
                long jobId;
                switch(mode) {
                    case EVICT:
                        jobId = handler.evict(uri, blockId, numReplicas);
                        break;
                    case REPLICATE:
                        jobId = handler.replicate(uri, blockId, numReplicas);
                        break;
                    default:
                        throw new RuntimeException(String.format("Unexpected replication mode {}.", mode));
                }
                processedFileIds.add(inodeId);
                mActiveJobToInodeID.put(jobId, inodeId);
            } catch (JobDoesNotExistException | ResourceExhaustedException e) {
                LOG.warn("The job service is busy, will retry later. {}", e.toString());
                return processedFileIds;
            } catch (UnavailableException e) {
                LOG.warn("Unable to complete the replication check: {}, will retry later.", e.toString());
                return processedFileIds;
            } catch (Exception e) {
                SAMPLING_LOG.warn("Unexpected exception encountered when starting a {} job (uri={}," + " block ID={}, num replicas={}) : {}", mode, uri, blockId, numReplicas, e.toString());
                LOG.debug("Job service unexpected exception: ", e);
            }
        }
    }
    return processedFileIds;
}
Also used : FileDoesNotExistException(alluxio.exception.FileDoesNotExistException) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) UnavailableException(alluxio.exception.status.UnavailableException) BlockInfoException(alluxio.exception.BlockInfoException) InodeFile(alluxio.master.file.meta.InodeFile) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) BlockInfoException(alluxio.exception.BlockInfoException) IOException(java.io.IOException) FileDoesNotExistException(alluxio.exception.FileDoesNotExistException) UnavailableException(alluxio.exception.status.UnavailableException) Triple(org.apache.commons.lang3.tuple.Triple) ImmutableTriple(org.apache.commons.lang3.tuple.ImmutableTriple) LockedInodePath(alluxio.master.file.meta.LockedInodePath) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) BlockInfo(alluxio.wire.BlockInfo) HashSet(java.util.HashSet) AlluxioURI(alluxio.AlluxioURI)

Example 35 with UnavailableException

use of alluxio.exception.status.UnavailableException in project alluxio by Alluxio.

the class ReplicationChecker method checkMisreplicated.

private void checkMisreplicated(Set<Long> inodes, ReplicationHandler handler) throws InterruptedException {
    for (long inodeId : inodes) {
        if (mActiveJobToInodeID.size() >= mMaxActiveJobs) {
            return;
        }
        if (mActiveJobToInodeID.containsValue(inodeId)) {
            continue;
        }
        // Throw if interrupted.
        if (Thread.interrupted()) {
            throw new InterruptedException("ReplicationChecker interrupted.");
        }
        try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(inodeId, LockPattern.READ)) {
            InodeFile file = inodePath.getInodeFile();
            for (long blockId : file.getBlockIds()) {
                BlockInfo blockInfo = null;
                try {
                    blockInfo = mBlockMaster.getBlockInfo(blockId);
                } catch (BlockInfoException e) {
                // Cannot find this block in Alluxio from BlockMaster, possibly persisted in UFS
                } catch (UnavailableException e) {
                    // The block master is not available, wait for the next heartbeat
                    LOG.warn("The block master is not available: {}", e.toString());
                    return;
                }
                if (blockInfo == null) {
                    // no block info available, we simply log and return;
                    LOG.warn("Block info is null");
                    return;
                }
                for (Map.Entry<String, String> entry : findMisplacedBlock(file, blockInfo).entrySet()) {
                    try {
                        final long jobId = handler.migrate(inodePath.getUri(), blockId, entry.getKey(), entry.getValue());
                        mActiveJobToInodeID.put(jobId, inodeId);
                    } catch (Exception e) {
                        LOG.warn("Unexpected exception encountered when starting a migration job (uri={}," + " block ID={}, workerHost= {}) : {}", inodePath.getUri(), blockId, entry.getKey(), e.toString());
                        LOG.debug("Exception: ", e);
                    }
                }
            }
        } catch (FileDoesNotExistException e) {
            LOG.warn("Failed to check replication level for inode id {} : {}", inodeId, e.toString());
        }
    }
}
Also used : LockedInodePath(alluxio.master.file.meta.LockedInodePath) FileDoesNotExistException(alluxio.exception.FileDoesNotExistException) BlockInfo(alluxio.wire.BlockInfo) UnavailableException(alluxio.exception.status.UnavailableException) BlockInfoException(alluxio.exception.BlockInfoException) InodeFile(alluxio.master.file.meta.InodeFile) HashMap(java.util.HashMap) Map(java.util.Map) HashBiMap(com.google.common.collect.HashBiMap) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) BlockInfoException(alluxio.exception.BlockInfoException) IOException(java.io.IOException) FileDoesNotExistException(alluxio.exception.FileDoesNotExistException) UnavailableException(alluxio.exception.status.UnavailableException)

Aggregations

UnavailableException (alluxio.exception.status.UnavailableException)58 IOException (java.io.IOException)25 Test (org.junit.Test)14 ArrayList (java.util.ArrayList)10 AlluxioURI (alluxio.AlluxioURI)9 AlluxioStatusException (alluxio.exception.status.AlluxioStatusException)9 NotFoundException (alluxio.exception.status.NotFoundException)9 InetSocketAddress (java.net.InetSocketAddress)9 BlockInfo (alluxio.wire.BlockInfo)8 FileDoesNotExistException (alluxio.exception.FileDoesNotExistException)7 WorkerNetAddress (alluxio.wire.WorkerNetAddress)7 WorkerInfo (alluxio.wire.WorkerInfo)6 HashMap (java.util.HashMap)6 Map (java.util.Map)6 Set (java.util.Set)6 RetryPolicy (alluxio.retry.RetryPolicy)5 TimeoutException (java.util.concurrent.TimeoutException)5 BlockInfoException (alluxio.exception.BlockInfoException)4 ExceptionMessage (alluxio.exception.ExceptionMessage)4 InodeFile (alluxio.master.file.meta.InodeFile)4