Examples with TopologyChangedException - com.hazelcast.jet.core.TopologyChangedException

Example 6 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast by hazelcast.

the class MasterJobContext method getErrorFromResponses.

/**
 * <ul>
 * <li>Returns {@code null} if there is no failure
 * <li>Returns a {@link CancellationException} if the job is cancelled
 *     forcefully.
 * <li>Returns a {@link JobTerminateRequestedException} if the current
 *     execution is stopped due to a requested termination, except for
 *     CANCEL_GRACEFUL, in which case CancellationException is returned.
 * <li>If there is at least one user failure, such as an exception in user
 *     code (restartable or not), then returns that failure.
 * <li>Otherwise, the failure is because a job participant has left the
 *     cluster. In that case, it returns {@code TopologyChangeException} so
 *     that the job will be restarted
 * </ul>
 */
private Throwable getErrorFromResponses(String opName, Collection<Map.Entry<MemberInfo, Object>> responses) {
    if (isCancelled()) {
        logger.fine(mc.jobIdString() + " to be cancelled after " + opName);
        return new CancellationException();
    }
    Map<Boolean, List<Entry<Address, Object>>> grouped = responses.stream().map(en -> entry(en.getKey().getAddress(), en.getValue())).collect(partitioningBy(e1 -> e1.getValue() instanceof Throwable));
    int successfulMembersCount = grouped.getOrDefault(false, emptyList()).size();
    if (successfulMembersCount == mc.executionPlanMap().size()) {
        logger.fine(opName + " of " + mc.jobIdString() + " was successful");
        return null;
    }
    List<Entry<Address, Object>> failures = grouped.getOrDefault(true, emptyList());
    if (!failures.isEmpty()) {
        logger.fine(opName + " of " + mc.jobIdString() + " has failures: " + failures);
    }
    // other exceptions, ignore this and handle the other exception.
    if (failures.stream().allMatch(entry -> entry.getValue() instanceof TerminatedWithSnapshotException)) {
        assert opName.equals("Execution") : "opName is '" + opName + "', expected 'Execution'";
        logger.fine(opName + " of " + mc.jobIdString() + " terminated after a terminal snapshot");
        TerminationMode mode = requestedTerminationMode;
        assert mode != null && mode.isWithTerminalSnapshot() : "mode=" + mode;
        return mode == CANCEL_GRACEFUL ? new CancellationException() : new JobTerminateRequestedException(mode);
    }
    // If all exceptions are of certain type, treat it as TopologyChangedException
    Map<Boolean, List<Entry<Address, Object>>> splitFailures = failures.stream().collect(Collectors.partitioningBy(e -> e.getValue() instanceof CancellationException || e.getValue() instanceof TerminatedWithSnapshotException || isTopologyException((Throwable) e.getValue())));
    List<Entry<Address, Object>> topologyFailures = splitFailures.getOrDefault(true, emptyList());
    List<Entry<Address, Object>> otherFailures = splitFailures.getOrDefault(false, emptyList());
    if (!otherFailures.isEmpty()) {
        return (Throwable) otherFailures.get(0).getValue();
    } else {
        return new TopologyChangedException("Causes from members: " + topologyFailures);
    }
}

Also used : Address(com.hazelcast.cluster.Address) SUSPEND(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate.SUSPEND) NOT_RUNNING(com.hazelcast.jet.core.JobStatus.NOT_RUNNING) GetLocalJobMetricsOperation(com.hazelcast.jet.impl.operation.GetLocalJobMetricsOperation) CompletableFuture.completedFuture(java.util.concurrent.CompletableFuture.completedFuture) NonCompletableFuture(com.hazelcast.jet.impl.util.NonCompletableFuture) ExceptionUtil.isTopologyException(com.hazelcast.jet.impl.util.ExceptionUtil.isTopologyException) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) RESTART(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate.RESTART) JetDelegatingClassLoader(com.hazelcast.jet.impl.deployment.JetDelegatingClassLoader) TerminatedWithSnapshotException(com.hazelcast.jet.impl.exception.TerminatedWithSnapshotException) Collectors.toMap(java.util.stream.Collectors.toMap) Functions.entryKey(com.hazelcast.function.Functions.entryKey) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) SUSPENDED(com.hazelcast.jet.core.JobStatus.SUSPENDED) DAG(com.hazelcast.jet.core.DAG) JobStatus(com.hazelcast.jet.core.JobStatus) ExceptionUtil(com.hazelcast.jet.impl.util.ExceptionUtil) JobMetrics(com.hazelcast.jet.core.metrics.JobMetrics) CancellationException(java.util.concurrent.CancellationException) CANCEL_GRACEFUL(com.hazelcast.jet.impl.TerminationMode.CANCEL_GRACEFUL) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Set(java.util.Set) UUID(java.util.UUID) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors(java.util.stream.Collectors) CANCEL_FORCEFUL(com.hazelcast.jet.impl.TerminationMode.CANCEL_FORCEFUL) Objects(java.util.Objects) Util(com.hazelcast.jet.impl.util.Util) List(java.util.List) Util.idToString(com.hazelcast.jet.Util.idToString) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) MetricNames(com.hazelcast.jet.core.metrics.MetricNames) Entry(java.util.Map.Entry) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) JetDisabledException(com.hazelcast.jet.impl.exception.JetDisabledException) LoggingUtil(com.hazelcast.jet.impl.util.LoggingUtil) ExecutionPlanBuilder.createExecutionPlans(com.hazelcast.jet.impl.execution.init.ExecutionPlanBuilder.createExecutionPlans) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) TerminateExecutionOperation(com.hazelcast.jet.impl.operation.TerminateExecutionOperation) ExceptionUtil.isRestartableException(com.hazelcast.jet.impl.util.ExceptionUtil.isRestartableException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) LoggingUtil.logFinest(com.hazelcast.jet.impl.util.LoggingUtil.logFinest) Util.doWithClassLoader(com.hazelcast.jet.impl.util.Util.doWithClassLoader) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionService(com.hazelcast.spi.impl.executionservice.ExecutionService) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) Function(java.util.function.Function) Supplier(java.util.function.Supplier) Util.formatJobDuration(com.hazelcast.jet.impl.util.Util.formatJobDuration) ActionAfterTerminate(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) ArrayList(java.util.ArrayList) JetException(com.hazelcast.jet.JetException) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) COORDINATOR(com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.COORDINATOR) ILogger(com.hazelcast.logging.ILogger) SnapshotValidator.validateSnapshot(com.hazelcast.jet.impl.SnapshotValidator.validateSnapshot) ExceptionUtil.rethrow(com.hazelcast.jet.impl.util.ExceptionUtil.rethrow) Operation(com.hazelcast.spi.impl.operationservice.Operation) Util.entry(com.hazelcast.jet.Util.entry) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) BiConsumer(java.util.function.BiConsumer) MembersView(com.hazelcast.internal.cluster.impl.MembersView) LocalMemberResetException(com.hazelcast.core.LocalMemberResetException) RESTART_GRACEFUL(com.hazelcast.jet.impl.TerminationMode.RESTART_GRACEFUL) Edge(com.hazelcast.jet.core.Edge) Version(com.hazelcast.version.Version) EXPORTED_SNAPSHOTS_PREFIX(com.hazelcast.jet.impl.JobRepository.EXPORTED_SNAPSHOTS_PREFIX) Nonnull(javax.annotation.Nonnull) Tuple2(com.hazelcast.jet.datamodel.Tuple2) Nullable(javax.annotation.Nullable) Job(com.hazelcast.jet.Job) Measurement(com.hazelcast.jet.core.metrics.Measurement) SUSPENDED_EXPORTING_SNAPSHOT(com.hazelcast.jet.core.JobStatus.SUSPENDED_EXPORTING_SNAPSHOT) Util.toList(com.hazelcast.jet.impl.util.Util.toList) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) MetricTags(com.hazelcast.jet.core.metrics.MetricTags) NONE(com.hazelcast.jet.config.ProcessingGuarantee.NONE) Consumer(java.util.function.Consumer) Vertex(com.hazelcast.jet.core.Vertex) Tuple2.tuple2(com.hazelcast.jet.datamodel.Tuple2.tuple2) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) Collections(java.util.Collections) IMap(com.hazelcast.map.IMap) Edge.between(com.hazelcast.jet.core.Edge.between) TerminatedWithSnapshotException(com.hazelcast.jet.impl.exception.TerminatedWithSnapshotException) Address(com.hazelcast.cluster.Address) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) Entry(java.util.Map.Entry) CancellationException(java.util.concurrent.CancellationException) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ArrayList(java.util.ArrayList) Util.toList(com.hazelcast.jet.impl.util.Util.toList) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException)

Example 7 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast by hazelcast.

the class ExecutionPlan method initialize.

/**
 * A method called on the members as part of the InitExecutionOperation.
 * Creates tasklets, inboxes/outboxes and connects these to make them ready
 * for a later StartExecutionOperation.
 */
public void initialize(NodeEngineImpl nodeEngine, long jobId, long executionId, @Nonnull SnapshotContext snapshotContext, ConcurrentHashMap<String, File> tempDirectories, InternalSerializationService jobSerializationService) {
    this.nodeEngine = nodeEngine;
    this.jobClassLoaderService = ((JetServiceBackend) nodeEngine.getService(JetServiceBackend.SERVICE_NAME)).getJobClassLoaderService();
    this.executionId = executionId;
    initProcSuppliers(jobId, tempDirectories, jobSerializationService);
    initDag(jobSerializationService);
    this.ptionArrgmt = new PartitionArrangement(partitionAssignment, nodeEngine.getThisAddress());
    Set<Integer> higherPriorityVertices = VertexDef.getHigherPriorityVertices(vertices);
    for (Address destAddr : remoteMembers.get()) {
        Connection conn = getMemberConnection(nodeEngine, destAddr);
        if (conn == null) {
            throw new TopologyChangedException("no connection to job participant: " + destAddr);
        }
        memberConnections.put(destAddr, conn);
    }
    for (VertexDef vertex : vertices) {
        ClassLoader processorClassLoader = isLightJob ? null : jobClassLoaderService.getProcessorClassLoader(jobId, vertex.name());
        Collection<? extends Processor> processors = doWithClassLoader(processorClassLoader, () -> createProcessors(vertex, vertex.localParallelism()));
        String jobPrefix = prefix(jobConfig.getName(), jobId, vertex.name());
        // create StoreSnapshotTasklet and the queues to it
        ConcurrentConveyor<Object> ssConveyor = null;
        if (!isLightJob) {
            // Note that we create the snapshot queues for all non-light jobs, even if they don't have
            // processing guarantee enabled, because in EE one can request a snapshot also for
            // non-snapshotted jobs.
            @SuppressWarnings("unchecked") QueuedPipe<Object>[] snapshotQueues = new QueuedPipe[vertex.localParallelism()];
            Arrays.setAll(snapshotQueues, i -> new OneToOneConcurrentArrayQueue<>(SNAPSHOT_QUEUE_SIZE));
            ssConveyor = ConcurrentConveyor.concurrentConveyor(null, snapshotQueues);
            ILogger storeSnapshotLogger = prefixedLogger(nodeEngine.getLogger(StoreSnapshotTasklet.class), jobPrefix);
            StoreSnapshotTasklet ssTasklet = new StoreSnapshotTasklet(snapshotContext, ConcurrentInboundEdgeStream.create(ssConveyor, 0, 0, true, jobPrefix + "/ssFrom", null), new AsyncSnapshotWriterImpl(nodeEngine, snapshotContext, vertex.name(), memberIndex, memberCount, jobSerializationService), storeSnapshotLogger, vertex.name(), higherPriorityVertices.contains(vertex.vertexId()));
            tasklets.add(ssTasklet);
        }
        int localProcessorIdx = 0;
        for (Processor processor : processors) {
            int globalProcessorIndex = memberIndex * vertex.localParallelism() + localProcessorIdx;
            String processorPrefix = prefix(jobConfig.getName(), jobId, vertex.name(), globalProcessorIndex);
            ILogger logger = prefixedLogger(nodeEngine.getLogger(processor.getClass()), processorPrefix);
            ProcCtx context = new ProcCtx(nodeEngine, jobId, executionId, getJobConfig(), logger, vertex.name(), localProcessorIdx, globalProcessorIndex, isLightJob, partitionAssignment, vertex.localParallelism(), memberIndex, memberCount, tempDirectories, jobSerializationService, subject, processorClassLoader);
            // createOutboundEdgeStreams() populates localConveyorMap and edgeSenderConveyorMap.
            // Also populates instance fields: senderMap, receiverMap, tasklets.
            List<OutboundEdgeStream> outboundStreams = createOutboundEdgeStreams(vertex, localProcessorIdx, jobPrefix, jobSerializationService);
            List<InboundEdgeStream> inboundStreams = createInboundEdgeStreams(vertex, localProcessorIdx, jobPrefix, globalProcessorIndex);
            OutboundCollector snapshotCollector = ssConveyor == null ? null : new ConveyorCollector(ssConveyor, localProcessorIdx, null);
            // vertices which are only used for snapshot restore will not be marked as "source=true" in metrics
            // also do not consider snapshot restore edges for determining source tag
            boolean isSource = vertex.inboundEdges().stream().allMatch(EdgeDef::isSnapshotRestoreEdge) && !vertex.isSnapshotVertex();
            ProcessorTasklet processorTasklet = new ProcessorTasklet(context, nodeEngine.getExecutionService().getExecutor(TASKLET_INIT_CLOSE_EXECUTOR_NAME), jobSerializationService, processor, inboundStreams, outboundStreams, snapshotContext, snapshotCollector, isSource);
            tasklets.add(processorTasklet);
            this.processors.add(processor);
            localProcessorIdx++;
        }
    }
    List<ReceiverTasklet> allReceivers = receiverMap.values().stream().flatMap(o -> o.values().stream()).flatMap(a -> a.values().stream()).collect(toList());
    tasklets.addAll(allReceivers);
}

Also used : Address(com.hazelcast.cluster.Address) ImdgUtil.getMemberConnection(com.hazelcast.jet.impl.util.ImdgUtil.getMemberConnection) Arrays(java.util.Arrays) SnapshotContext(com.hazelcast.jet.impl.execution.SnapshotContext) Collections.unmodifiableList(java.util.Collections.unmodifiableList) ConcurrentConveyor.concurrentConveyor(com.hazelcast.internal.util.concurrent.ConcurrentConveyor.concurrentConveyor) Processor(com.hazelcast.jet.core.Processor) OutboundCollector.compositeCollector(com.hazelcast.jet.impl.execution.OutboundCollector.compositeCollector) ObjectWithPartitionId(com.hazelcast.jet.impl.util.ObjectWithPartitionId) ProcessorTasklet(com.hazelcast.jet.impl.execution.ProcessorTasklet) ImdgUtil(com.hazelcast.jet.impl.util.ImdgUtil) Collectors.toMap(java.util.stream.Collectors.toMap) ConcurrentConveyor(com.hazelcast.internal.util.concurrent.ConcurrentConveyor) Map(java.util.Map) Util.memoize(com.hazelcast.jet.impl.util.Util.memoize) SerializationServiceAware(com.hazelcast.internal.serialization.SerializationServiceAware) DISTRIBUTE_TO_ALL(com.hazelcast.jet.core.Edge.DISTRIBUTE_TO_ALL) ObjectDataInput(com.hazelcast.nio.ObjectDataInput) TASKLET_INIT_CLOSE_EXECUTOR_NAME(com.hazelcast.jet.impl.execution.TaskletExecutionService.TASKLET_INIT_CLOSE_EXECUTOR_NAME) InboundEdgeStream(com.hazelcast.jet.impl.execution.InboundEdgeStream) PrefixedLogger.prefix(com.hazelcast.jet.impl.util.PrefixedLogger.prefix) Collection(java.util.Collection) IPartitionService(com.hazelcast.internal.partition.IPartitionService) JobConfig(com.hazelcast.jet.config.JobConfig) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ConcurrentInboundEdgeStream(com.hazelcast.jet.impl.execution.ConcurrentInboundEdgeStream) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) DEFAULT_QUEUE_SIZE(com.hazelcast.jet.config.EdgeConfig.DEFAULT_QUEUE_SIZE) StoreSnapshotTasklet(com.hazelcast.jet.impl.execution.StoreSnapshotTasklet) ObjectDataOutput(com.hazelcast.nio.ObjectDataOutput) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) IntStream(java.util.stream.IntStream) ComparatorEx(com.hazelcast.function.ComparatorEx) IdentifiedDataSerializable(com.hazelcast.nio.serialization.IdentifiedDataSerializable) ImdgUtil.writeList(com.hazelcast.jet.impl.util.ImdgUtil.writeList) OutboundEdgeStream(com.hazelcast.jet.impl.execution.OutboundEdgeStream) RoutingPolicy(com.hazelcast.jet.core.Edge.RoutingPolicy) Util.doWithClassLoader(com.hazelcast.jet.impl.util.Util.doWithClassLoader) SenderTasklet(com.hazelcast.jet.impl.execution.SenderTasklet) HashMap(java.util.HashMap) Supplier(java.util.function.Supplier) ProcSupplierCtx(com.hazelcast.jet.impl.execution.init.Contexts.ProcSupplierCtx) ArrayList(java.util.ArrayList) PrefixedLogger.prefixedLogger(com.hazelcast.jet.impl.util.PrefixedLogger.prefixedLogger) JetException(com.hazelcast.jet.JetException) ConveyorCollector(com.hazelcast.jet.impl.execution.ConveyorCollector) ReceiverTasklet(com.hazelcast.jet.impl.execution.ReceiverTasklet) ILogger(com.hazelcast.logging.ILogger) InternalSerializationService(com.hazelcast.internal.serialization.InternalSerializationService) Nonnull(javax.annotation.Nonnull) ProcessorSupplier(com.hazelcast.jet.core.ProcessorSupplier) QueuedPipe(com.hazelcast.internal.util.concurrent.QueuedPipe) IntFunction(java.util.function.IntFunction) JetConfig(com.hazelcast.jet.config.JetConfig) NodeEngineImpl(com.hazelcast.spi.impl.NodeEngineImpl) OneToOneConcurrentArrayQueue(com.hazelcast.internal.util.concurrent.OneToOneConcurrentArrayQueue) Connection(com.hazelcast.internal.nio.Connection) Tasklet(com.hazelcast.jet.impl.execution.Tasklet) ProcCtx(com.hazelcast.jet.impl.execution.init.Contexts.ProcCtx) AsyncSnapshotWriterImpl(com.hazelcast.jet.impl.util.AsyncSnapshotWriterImpl) IOException(java.io.IOException) ConveyorCollectorWithPartition(com.hazelcast.jet.impl.execution.ConveyorCollectorWithPartition) Subject(javax.security.auth.Subject) File(java.io.File) ImdgUtil.readList(com.hazelcast.jet.impl.util.ImdgUtil.readList) Collectors.toList(java.util.stream.Collectors.toList) OutboundCollector(com.hazelcast.jet.impl.execution.OutboundCollector) JobClassLoaderService(com.hazelcast.jet.impl.JobClassLoaderService) ProcessingGuarantee(com.hazelcast.jet.config.ProcessingGuarantee) JetServiceBackend(com.hazelcast.jet.impl.JetServiceBackend) Processor(com.hazelcast.jet.core.Processor) Address(com.hazelcast.cluster.Address) AsyncSnapshotWriterImpl(com.hazelcast.jet.impl.util.AsyncSnapshotWriterImpl) OutboundCollector(com.hazelcast.jet.impl.execution.OutboundCollector) ConveyorCollector(com.hazelcast.jet.impl.execution.ConveyorCollector) ProcCtx(com.hazelcast.jet.impl.execution.init.Contexts.ProcCtx) InboundEdgeStream(com.hazelcast.jet.impl.execution.InboundEdgeStream) ConcurrentInboundEdgeStream(com.hazelcast.jet.impl.execution.ConcurrentInboundEdgeStream) Util.doWithClassLoader(com.hazelcast.jet.impl.util.Util.doWithClassLoader) ILogger(com.hazelcast.logging.ILogger) StoreSnapshotTasklet(com.hazelcast.jet.impl.execution.StoreSnapshotTasklet) ProcessorTasklet(com.hazelcast.jet.impl.execution.ProcessorTasklet) ImdgUtil.getMemberConnection(com.hazelcast.jet.impl.util.ImdgUtil.getMemberConnection) Connection(com.hazelcast.internal.nio.Connection) OutboundEdgeStream(com.hazelcast.jet.impl.execution.OutboundEdgeStream) ReceiverTasklet(com.hazelcast.jet.impl.execution.ReceiverTasklet) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) QueuedPipe(com.hazelcast.internal.util.concurrent.QueuedPipe)

Example 8 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast by hazelcast.

the class JobExecutionService method verifyClusterInformation.

private void verifyClusterInformation(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants) {
    Address masterAddress = nodeEngine.getMasterAddress();
    ClusterServiceImpl clusterService = (ClusterServiceImpl) nodeEngine.getClusterService();
    MembershipManager membershipManager = clusterService.getMembershipManager();
    int localMemberListVersion = membershipManager.getMemberListVersion();
    Address thisAddress = nodeEngine.getThisAddress();
    if (coordinatorMemberListVersion > localMemberListVersion) {
        if (masterAddress == null) {
            // elected or split brain merge will happen).
            throw new RetryableHazelcastException(String.format("Cannot initialize %s for coordinator %s, local member list version %s," + " coordinator member list version %s. And also, since the master address" + " is not known to this member, cannot request a new member list from master.", jobIdAndExecutionId(jobId, executionId), coordinator, localMemberListVersion, coordinatorMemberListVersion));
        }
        assert !masterAddress.equals(thisAddress) : String.format("Local node: %s is master but InitOperation has coordinator member list version: %s larger than " + " local member list version: %s", thisAddress, coordinatorMemberListVersion, localMemberListVersion);
        nodeEngine.getOperationService().send(new TriggerMemberListPublishOp(), masterAddress);
        throw new RetryableHazelcastException(String.format("Cannot initialize %s for coordinator %s, local member list version %s," + " coordinator member list version %s", jobIdAndExecutionId(jobId, executionId), coordinator, localMemberListVersion, coordinatorMemberListVersion));
    }
    // If the participant members can receive the new member list before the
    // coordinator, and we can also get into the
    // "coordinatorMemberListVersion < localMemberListVersion" case. If this
    // situation occurs when a job participant leaves, then the job start will
    // fail. Since the unknown participating member situation couldn't
    // be resolved with retrying the InitExecutionOperation for this
    // case, we do nothing here and let it fail below if some participant
    // isn't found.
    // The job start won't fail if this situation occurs when a new member
    // is added to the cluster, because all job participants are known to the
    // other participating members. The only disadvantage of this is that a
    // newly added member will not be a job participant and partition mapping
    // may not be completely proper in this case.
    boolean isLocalMemberParticipant = false;
    for (MemberInfo participant : participants) {
        if (participant.getAddress().equals(thisAddress)) {
            isLocalMemberParticipant = true;
        }
        if (membershipManager.getMember(participant.getAddress(), participant.getUuid()) == null) {
            throw new TopologyChangedException(String.format("Cannot initialize %s for coordinator %s: participant %s not found in local member list." + " Local member list version: %s, coordinator member list version: %s", jobIdAndExecutionId(jobId, executionId), coordinator, participant, localMemberListVersion, coordinatorMemberListVersion));
        }
    }
    if (!isLocalMemberParticipant) {
        throw new IllegalArgumentException(String.format("Cannot initialize %s since member %s is not in participants: %s", jobIdAndExecutionId(jobId, executionId), thisAddress, participants));
    }
}

Also used : Address(com.hazelcast.cluster.Address) RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) MembershipManager(com.hazelcast.internal.cluster.impl.MembershipManager) TriggerMemberListPublishOp(com.hazelcast.internal.cluster.impl.operations.TriggerMemberListPublishOp) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Example 9 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast by hazelcast.

the class AbstractJetMessageTask method getInvocationBuilder.

@Override
protected InvocationBuilder getInvocationBuilder(Operation operation) {
    Address address;
    if (getLightJobCoordinator() != null) {
        MemberImpl member = nodeEngine.getClusterService().getMember(getLightJobCoordinator());
        if (member == null) {
            throw new TopologyChangedException("Light job coordinator left the cluster");
        }
        address = member.getAddress();
    } else {
        address = nodeEngine.getMasterAddress();
        if (address == null) {
            throw new RetryableHazelcastException("master not yet known");
        }
    }
    return nodeEngine.getOperationService().createInvocationBuilder(JetServiceBackend.SERVICE_NAME, operation, address);
}

Also used : Address(com.hazelcast.cluster.Address) RetryableHazelcastException(com.hazelcast.spi.exception.RetryableHazelcastException) MemberImpl(com.hazelcast.cluster.impl.MemberImpl) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Example 10 with TopologyChangedException

use of com.hazelcast.jet.core.TopologyChangedException in project hazelcast-jet by hazelcast.

the class MasterContext method getInitResult.

/**
 * If there is no failure, then returns null. If the job is cancelled, then returns CancellationException.
 * If there is at least one non-restartable failure, such as an exception in user code, then returns that failure.
 * Otherwise, the failure is because a job participant has left the cluster.
 * In that case, TopologyChangeException is returned so that the job will be restarted.
 */
private Throwable getInitResult(Map<MemberInfo, Object> responses) {
    if (cancellationToken.isCompleted()) {
        logger.fine(jobIdString() + " to be cancelled after init");
        return new CancellationException();
    }
    Map<Boolean, List<Entry<MemberInfo, Object>>> grouped = groupResponses(responses);
    Collection<MemberInfo> successfulMembers = grouped.get(false).stream().map(Entry::getKey).collect(toList());
    if (successfulMembers.size() == executionPlanMap.size()) {
        logger.fine("Init of " + jobIdString() + " is successful.");
        return null;
    }
    List<Entry<MemberInfo, Object>> failures = grouped.get(true);
    logger.fine("Init of " + jobIdString() + " failed with: " + failures);
    // otherwise, return TopologyChangedException so that the job will be restarted
    return failures.stream().map(e -> (Throwable) e.getValue()).filter(t -> !isTopologicalFailure(t)).findFirst().map(ExceptionUtil::peel).orElse(new TopologyChangedException());
}

Also used : NO_SNAPSHOT(com.hazelcast.jet.impl.execution.SnapshotContext.NO_SNAPSHOT) SnapshotRepository.snapshotDataMapName(com.hazelcast.jet.impl.SnapshotRepository.snapshotDataMapName) NonCompletableFuture(com.hazelcast.jet.impl.util.NonCompletableFuture) Address(com.hazelcast.nio.Address) Util.jobAndExecutionId(com.hazelcast.jet.impl.util.Util.jobAndExecutionId) Processors.mapP(com.hazelcast.jet.core.processor.Processors.mapP) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) CompletionToken(com.hazelcast.jet.impl.util.CompletionToken) Util.idToString(com.hazelcast.jet.impl.util.Util.idToString) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) DAG(com.hazelcast.jet.core.DAG) JobStatus(com.hazelcast.jet.core.JobStatus) ExceptionUtil(com.hazelcast.jet.impl.util.ExceptionUtil) ExecutionService(com.hazelcast.spi.ExecutionService) Operation(com.hazelcast.spi.Operation) CancellationException(java.util.concurrent.CancellationException) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Util.getJetInstance(com.hazelcast.jet.impl.util.Util.getJetInstance) JobConfig(com.hazelcast.jet.config.JobConfig) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) Collectors(java.util.stream.Collectors) BroadcastKey(com.hazelcast.jet.core.BroadcastKey) List(java.util.List) ExecutionCallback(com.hazelcast.core.ExecutionCallback) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) Entry(java.util.Map.Entry) CancelExecutionOperation(com.hazelcast.jet.impl.operation.CancelExecutionOperation) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) InternalCompletableFuture(com.hazelcast.spi.InternalCompletableFuture) ExecutionPlanBuilder.createExecutionPlans(com.hazelcast.jet.impl.execution.init.ExecutionPlanBuilder.createExecutionPlans) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) ILogger(com.hazelcast.logging.ILogger) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) NOT_STARTED(com.hazelcast.jet.core.JobStatus.NOT_STARTED) MembersView(com.hazelcast.internal.cluster.impl.MembersView) DistributedFunction(com.hazelcast.jet.function.DistributedFunction) Edge(com.hazelcast.jet.core.Edge) ClusterServiceImpl(com.hazelcast.internal.cluster.impl.ClusterServiceImpl) Nullable(javax.annotation.Nullable) NodeEngineImpl(com.hazelcast.spi.impl.NodeEngineImpl) RESTARTING(com.hazelcast.jet.core.JobStatus.RESTARTING) BroadcastEntry(com.hazelcast.jet.impl.execution.BroadcastEntry) ExceptionUtil.isTopologicalFailure(com.hazelcast.jet.impl.util.ExceptionUtil.isTopologicalFailure) DistributedFunctions.entryKey(com.hazelcast.jet.function.DistributedFunctions.entryKey) Consumer(java.util.function.Consumer) Vertex(com.hazelcast.jet.core.Vertex) Collectors.toList(java.util.stream.Collectors.toList) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader) CompleteExecutionOperation(com.hazelcast.jet.impl.operation.CompleteExecutionOperation) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) ProcessingGuarantee(com.hazelcast.jet.config.ProcessingGuarantee) JobRestartRequestedException(com.hazelcast.jet.impl.exception.JobRestartRequestedException) SnapshotOperation(com.hazelcast.jet.impl.operation.SnapshotOperation) Edge.between(com.hazelcast.jet.core.Edge.between) Entry(java.util.Map.Entry) BroadcastEntry(com.hazelcast.jet.impl.execution.BroadcastEntry) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) CancellationException(java.util.concurrent.CancellationException) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException)

Aggregations

TopologyChangedException (com.hazelcast.jet.core.TopologyChangedException)12 MemberInfo (com.hazelcast.internal.cluster.MemberInfo)7 Address (com.hazelcast.nio.Address)6 ClusterServiceImpl (com.hazelcast.internal.cluster.impl.ClusterServiceImpl)5 ILogger (com.hazelcast.logging.ILogger)5 Map (java.util.Map)5 Set (java.util.Set)5 Address (com.hazelcast.cluster.Address)4 ExecutionPlan (com.hazelcast.jet.impl.execution.init.ExecutionPlan)4 ExceptionUtil.withTryCatch (com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch)4 Util.idToString (com.hazelcast.jet.impl.util.Util.idToString)4 NodeEngineImpl (com.hazelcast.spi.impl.NodeEngineImpl)4 MembersView (com.hazelcast.internal.cluster.impl.MembersView)3 JobConfig (com.hazelcast.jet.config.JobConfig)3 ProcessingGuarantee (com.hazelcast.jet.config.ProcessingGuarantee)3 DAG (com.hazelcast.jet.core.DAG)3 Edge (com.hazelcast.jet.core.Edge)3 Edge.between (com.hazelcast.jet.core.Edge.between)3 JobStatus (com.hazelcast.jet.core.JobStatus)3 COMPLETED (com.hazelcast.jet.core.JobStatus.COMPLETED)3