Search in sources :

Example 1 with RetryableHazelcastException

use of com.hazelcast.spi.exception.RetryableHazelcastException in project hazelcast-jet by hazelcast.

the class JobExecutionService method verifyClusterInformation.

private void verifyClusterInformation(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants) {
    Address masterAddress = nodeEngine.getMasterAddress();
    if (!coordinator.equals(masterAddress)) {
        failIfNotRunning();
        throw new IllegalStateException(String.format("Coordinator %s cannot initialize %s. Reason: it is not the master, the master is %s", coordinator, jobAndExecutionId(jobId, executionId), masterAddress));
    }
    ClusterServiceImpl clusterService = (ClusterServiceImpl) nodeEngine.getClusterService();
    MembershipManager membershipManager = clusterService.getMembershipManager();
    int localMemberListVersion = membershipManager.getMemberListVersion();
    Address thisAddress = nodeEngine.getThisAddress();
    if (coordinatorMemberListVersion > localMemberListVersion) {
        assert !masterAddress.equals(thisAddress) : String.format("Local node: %s is master but InitOperation has coordinator member list version: %s larger than " + " local member list version: %s", thisAddress, coordinatorMemberListVersion, localMemberListVersion);
        nodeEngine.getOperationService().send(new TriggerMemberListPublishOp(), masterAddress);
        throw new RetryableHazelcastException(String.format("Cannot initialize %s for coordinator %s, local member list version %s," + " coordinator member list version %s", jobAndExecutionId(jobId, executionId), coordinator, localMemberListVersion, coordinatorMemberListVersion));
    }
    boolean isLocalMemberParticipant = false;
    for (MemberInfo participant : participants) {
        if (participant.getAddress().equals(thisAddress)) {
            isLocalMemberParticipant = true;
        }
        if (membershipManager.getMember(participant.getAddress(), participant.getUuid()) == null) {
            throw new TopologyChangedException(String.format("Cannot initialize %s for coordinator %s: participant %s not found in local member list." + " Local member list version: %s, coordinator member list version: %s", jobAndExecutionId(jobId, executionId), coordinator, participant, localMemberListVersion, coordinatorMemberListVersion));
        }
    }
    if (!isLocalMemberParticipant) {
        throw new IllegalArgumentException(String.format("Cannot initialize %s since member %s is not in participants: %s", jobAndExecutionId(jobId, executionId), thisAddress, participants));
    }
}
Also used : Address(com.hazelcast.nio.Address) RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) MembershipManager(com.hazelcast.internal.cluster.impl.MembershipManager) TriggerMemberListPublishOp(com.hazelcast.internal.cluster.impl.operations.TriggerMemberListPublishOp) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Example 2 with RetryableHazelcastException

use of com.hazelcast.spi.exception.RetryableHazelcastException in project hazelcast-jet by hazelcast.

the class JobExecutionService method initExecution.

/**
 * Initiates the given execution if the local node accepts the coordinator
 * as its master, and has an up-to-date member list information.
 * <ul><li>
 *   If the local node has a stale member list, it retries the init operation
 *   until it receives the new member list from the master.
 * </li><li>
 *     If the local node detects that the member list changed after the init
 *     operation is sent but before executed, then it sends a graceful failure
 *     so that the job init will be retried properly.
 * </li><li>
 *     If there is an already ongoing execution for the given job, then the
 *     init execution is retried.
 * </li></ul>
 */
public void initExecution(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants, ExecutionPlan plan) {
    verifyClusterInformation(jobId, executionId, coordinator, coordinatorMemberListVersion, participants);
    failIfNotRunning();
    if (!executionContextJobIds.add(jobId)) {
        ExecutionContext current = executionContexts.get(executionId);
        if (current != null) {
            throw new IllegalStateException(String.format("Execution context for %s for coordinator %s already exists for coordinator %s", jobAndExecutionId(jobId, executionId), coordinator, current.coordinator()));
        }
        executionContexts.values().stream().filter(e -> e.jobId() == jobId).forEach(e -> logger.fine(String.format("Execution context for %s for coordinator %s already exists" + " with local execution %s for coordinator %s", jobAndExecutionId(jobId, executionId), coordinator, idToString(e.jobId()), e.coordinator())));
        throw new RetryableHazelcastException();
    }
    Set<Address> addresses = participants.stream().map(MemberInfo::getAddress).collect(toSet());
    ExecutionContext created = new ExecutionContext(nodeEngine, taskletExecutionService, jobId, executionId, coordinator, addresses);
    try {
        created.initialize(plan);
    } finally {
        executionContexts.put(executionId, created);
    }
    logger.info("Execution plan for " + jobAndExecutionId(jobId, executionId) + " initialized");
}
Also used : HazelcastInstanceNotActiveException(com.hazelcast.core.HazelcastInstanceNotActiveException) DistributedFunctions.entryValue(com.hazelcast.jet.function.DistributedFunctions.entryValue) MembershipManager(com.hazelcast.internal.cluster.impl.MembershipManager) Address(com.hazelcast.nio.Address) SenderTasklet(com.hazelcast.jet.impl.execution.SenderTasklet) Util.jobAndExecutionId(com.hazelcast.jet.impl.util.Util.jobAndExecutionId) Supplier(java.util.function.Supplier) ConcurrentMap(java.util.concurrent.ConcurrentMap) Util.idToString(com.hazelcast.jet.impl.util.Util.idToString) Collections.newSetFromMap(java.util.Collections.newSetFromMap) Collectors.toMap(java.util.stream.Collectors.toMap) ILogger(com.hazelcast.logging.ILogger) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) TaskletExecutionService(com.hazelcast.jet.impl.execution.TaskletExecutionService) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) Collectors.toSet(java.util.stream.Collectors.toSet) NodeEngineImpl(com.hazelcast.spi.impl.NodeEngineImpl) ExecutionContext(com.hazelcast.jet.impl.execution.ExecutionContext) RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) Operation(com.hazelcast.spi.Operation) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) PrivilegedAction(java.security.PrivilegedAction) DistributedFunctions.entryKey(com.hazelcast.jet.function.DistributedFunctions.entryKey) TriggerMemberListPublishOp(com.hazelcast.internal.cluster.impl.operations.TriggerMemberListPublishOp) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) JetClassLoader(com.hazelcast.jet.impl.deployment.JetClassLoader) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) AccessController(java.security.AccessController) ExecutionContext(com.hazelcast.jet.impl.execution.ExecutionContext) RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) Address(com.hazelcast.nio.Address)

Example 3 with RetryableHazelcastException

use of com.hazelcast.spi.exception.RetryableHazelcastException in project orientdb by orientechnologies.

the class OHazelcastPlugin method entryUpdated.

@Override
public void entryUpdated(final EntryEvent<String, Object> iEvent) {
    if (hazelcastInstance == null || !hazelcastInstance.getLifecycleService().isRunning())
        return;
    try {
        final String key = iEvent.getKey();
        final String eventNodeName = getNodeName(iEvent.getMember());
        if ("?".equals(eventNodeName))
            // MOM ALWAYS SAYS: DON'T ACCEPT CHANGES FROM STRANGERS NODES
            return;
        if (key.startsWith(CONFIG_NODE_PREFIX)) {
            ODistributedServerLog.debug(this, nodeName, eventNodeName, DIRECTION.NONE, "Updated node configuration id=%s name=%s", iEvent.getMember(), eventNodeName);
            final ODocument cfg = (ODocument) iEvent.getValue();
            if (!activeNodes.containsKey((String) cfg.field("name")))
                updateLastClusterChange();
            activeNodes.put((String) cfg.field("name"), (Member) iEvent.getMember());
            if (iEvent.getMember().getUuid() != null) {
                activeNodesNamesByUuid.put(iEvent.getMember().getUuid(), (String) cfg.field("name"));
                activeNodesUuidByName.put((String) cfg.field("name"), iEvent.getMember().getUuid());
            }
            dumpServersStatus();
        } else if (key.startsWith(CONFIG_DBSTATUS_PREFIX)) {
            ODistributedServerLog.info(this, nodeName, eventNodeName, DIRECTION.IN, "Received updated status %s=%s", key.substring(CONFIG_DBSTATUS_PREFIX.length()), iEvent.getValue());
            // CALL DATABASE EVENT
            final String dbNode = key.substring(CONFIG_DBSTATUS_PREFIX.length());
            final String nodeName = dbNode.substring(0, dbNode.indexOf("."));
            final String databaseName = dbNode.substring(dbNode.indexOf(".") + 1);
            onDatabaseEvent(nodeName, databaseName, (DB_STATUS) iEvent.getValue());
            invokeOnDatabaseStatusChange(nodeName, databaseName, (DB_STATUS) iEvent.getValue());
            if (!iEvent.getMember().equals(hazelcastInstance.getCluster().getLocalMember()) && DB_STATUS.ONLINE.equals(iEvent.getValue())) {
                final DB_STATUS s = getDatabaseStatus(getLocalNodeName(), databaseName);
                if (s == DB_STATUS.NOT_AVAILABLE) {
                    // INSTALL THE DATABASE
                    installDatabase(false, databaseName, false, OGlobalConfiguration.DISTRIBUTED_BACKUP_TRY_INCREMENTAL_FIRST.getValueAsBoolean());
                }
            }
        } else if (key.startsWith(CONFIG_REGISTEREDNODES)) {
            ODistributedServerLog.info(this, nodeName, eventNodeName, DIRECTION.IN, "Received updated about registered nodes");
            reloadRegisteredNodes((String) iEvent.getValue());
        } else if (key.startsWith(CONFIG_LOCKMANAGER)) {
            getLockManagerRequester().setServer((String) iEvent.getValue());
        }
    } catch (HazelcastInstanceNotActiveException e) {
        OLogManager.instance().error(this, "Hazelcast is not running");
    } catch (RetryableHazelcastException e) {
        OLogManager.instance().error(this, "Hazelcast is not running");
    }
}
Also used : RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) ODocument(com.orientechnologies.orient.core.record.impl.ODocument)

Example 4 with RetryableHazelcastException

use of com.hazelcast.spi.exception.RetryableHazelcastException in project hazelcast by hazelcast.

the class BaseMigrationOperation method verifyPartitionOwner.

/**
 * Verifies that this node is the owner of the partition.
 */
private void verifyPartitionOwner() {
    InternalPartition partition = getPartition();
    PartitionReplica owner = partition.getOwnerReplicaOrNull();
    if (owner == null) {
        throw new RetryableHazelcastException("Cannot migrate at the moment! Owner of the partition is null => " + migrationInfo);
    }
    if (!owner.isIdentical(getNodeEngine().getLocalMember())) {
        throw new RetryableHazelcastException("Owner of partition is not this node! => " + toString());
    }
}
Also used : RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) PartitionReplica(com.hazelcast.internal.partition.PartitionReplica) InternalPartition(com.hazelcast.internal.partition.InternalPartition)

Example 5 with RetryableHazelcastException

use of com.hazelcast.spi.exception.RetryableHazelcastException in project hazelcast by hazelcast.

the class FetchPartitionStateOperation method beforeRun.

@Override
public void beforeRun() {
    Address caller = getCallerAddress();
    Address masterAddress = getNodeEngine().getMasterAddress();
    ILogger logger = getLogger();
    if (!caller.equals(masterAddress)) {
        String msg = caller + " requested our partition table but it's not our known master. " + "Master: " + masterAddress;
        logger.warning(msg);
        // Master address should be already updated after mastership claim.
        throw new IllegalStateException(msg);
    }
    InternalPartitionServiceImpl service = getService();
    if (!service.isMemberMaster(caller)) {
        String msg = caller + " requested our partition table but it's not the master known by migration system.";
        logger.warning(msg);
        // It will learn eventually.
        throw new RetryableHazelcastException(msg);
    }
}
Also used : Address(com.hazelcast.cluster.Address) RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) InternalPartitionServiceImpl(com.hazelcast.internal.partition.impl.InternalPartitionServiceImpl) ILogger(com.hazelcast.logging.ILogger)

Aggregations

RetryableHazelcastException (com.hazelcast.spi.exception.RetryableHazelcastException)22 InternalPartitionServiceImpl (com.hazelcast.internal.partition.impl.InternalPartitionServiceImpl)6 Address (com.hazelcast.cluster.Address)5 TopologyChangedException (com.hazelcast.jet.core.TopologyChangedException)4 HazelcastInstanceNotActiveException (com.hazelcast.core.HazelcastInstanceNotActiveException)3 MemberInfo (com.hazelcast.internal.cluster.MemberInfo)3 ClusterServiceImpl (com.hazelcast.internal.cluster.impl.ClusterServiceImpl)3 MembershipManager (com.hazelcast.internal.cluster.impl.MembershipManager)3 TriggerMemberListPublishOp (com.hazelcast.internal.cluster.impl.operations.TriggerMemberListPublishOp)3 ILogger (com.hazelcast.logging.ILogger)3 Address (com.hazelcast.nio.Address)3 NodeEngineImpl (com.hazelcast.spi.impl.NodeEngineImpl)3 ParallelJVMTest (com.hazelcast.test.annotation.ParallelJVMTest)3 MemberImpl (com.hazelcast.cluster.impl.MemberImpl)2 MemberLeftException (com.hazelcast.core.MemberLeftException)2 MigrationInfo (com.hazelcast.internal.partition.MigrationInfo)2 Predicate (com.hazelcast.query.Predicate)2 NodeEngine (com.hazelcast.spi.impl.NodeEngine)2 QuickTest (com.hazelcast.test.annotation.QuickTest)2 Test (org.junit.Test)2