Search in sources :

Example 6 with STARTING

use of com.hazelcast.jet.core.JobStatus.STARTING in project hazelcast by hazelcast.

the class TopologyChangeTest method when_jobParticipantReceivesStaleInitOperation_then_jobRestarts.

@Test
public void when_jobParticipantReceivesStaleInitOperation_then_jobRestarts() {
    // Given
    HazelcastInstance newInstance = createHazelcastInstance(config);
    for (HazelcastInstance instance : instances) {
        assertClusterSizeEventually(NODE_COUNT + 1, instance);
    }
    rejectOperationsBetween(instances[0], instances[2], JetInitDataSerializerHook.FACTORY_ID, singletonList(INIT_EXECUTION_OP));
    DAG dag = new DAG().vertex(new Vertex("test", new MockPS(TestProcessors.Identity::new, nodeCount + 1)));
    Job job = instances[0].getJet().newJob(dag);
    JetServiceBackend jetServiceBackend = getJetServiceBackend(instances[0]);
    assertTrueEventually(() -> assertFalse(jetServiceBackend.getJobCoordinationService().getMasterContexts().isEmpty()));
    MasterContext masterContext = jetServiceBackend.getJobCoordinationService().getMasterContext(job.getId());
    assertTrueEventually(() -> {
        assertEquals(STARTING, masterContext.jobStatus());
        assertNotEquals(0, masterContext.executionId());
    });
    // When
    long executionId = masterContext.executionId();
    assertTrueEventually(() -> {
        Arrays.stream(instances).filter(instance -> !instance.getCluster().getLocalMember().isLiteMember()).filter(instance -> instance != instances[2]).map(JetTestSupport::getJetServiceBackend).map(service -> service.getJobExecutionService().getExecutionContext(executionId)).forEach(Assert::assertNotNull);
    });
    newInstance.getLifecycleService().terminate();
    for (HazelcastInstance instance : instances) {
        assertClusterSizeEventually(NODE_COUNT, instance);
    }
    resetPacketFiltersFrom(instances[0]);
    // Then
    job.join();
    assertNotEquals(executionId, masterContext.executionId());
}
Also used : HazelcastInstanceNotActiveException(com.hazelcast.core.HazelcastInstanceNotActiveException) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) Arrays(java.util.Arrays) INIT_EXECUTION_OP(com.hazelcast.jet.impl.execution.init.JetInitDataSerializerHook.INIT_EXECUTION_OP) Collections.singletonList(java.util.Collections.singletonList) Assert.assertThat(org.junit.Assert.assertThat) PacketFiltersUtil.dropOperationsBetween(com.hazelcast.test.PacketFiltersUtil.dropOperationsBetween) PacketFiltersUtil.rejectOperationsBetween(com.hazelcast.test.PacketFiltersUtil.rejectOperationsBetween) Future(java.util.concurrent.Future) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) SUSPENDED(com.hazelcast.jet.core.JobStatus.SUSPENDED) Assert.fail(org.junit.Assert.fail) ClusterDataSerializerHook(com.hazelcast.internal.cluster.impl.ClusterDataSerializerHook) Parameterized(org.junit.runners.Parameterized) MockPS(com.hazelcast.jet.core.TestProcessors.MockPS) HazelcastParametrizedRunner(com.hazelcast.test.HazelcastParametrizedRunner) CancellationException(java.util.concurrent.CancellationException) MEMBER_INFO_UPDATE(com.hazelcast.internal.cluster.impl.ClusterDataSerializerHook.MEMBER_INFO_UPDATE) Collection(java.util.Collection) START_EXECUTION_OP(com.hazelcast.jet.impl.execution.init.JetInitDataSerializerHook.START_EXECUTION_OP) JobConfig(com.hazelcast.jet.config.JobConfig) Set(java.util.Set) PartitionDataSerializerHook(com.hazelcast.internal.partition.impl.PartitionDataSerializerHook) JobResult(com.hazelcast.jet.impl.JobResult) TargetNotMemberException(com.hazelcast.spi.exception.TargetNotMemberException) Category(org.junit.experimental.categories.Category) NoOutputSourceP(com.hazelcast.jet.core.TestProcessors.NoOutputSourceP) Assert.assertFalse(org.junit.Assert.assertFalse) MasterContext(com.hazelcast.jet.impl.MasterContext) RunWith(org.junit.runner.RunWith) HazelcastSerialParametersRunnerFactory(com.hazelcast.test.HazelcastSerialParametersRunnerFactory) JetInitDataSerializerHook(com.hazelcast.jet.impl.execution.init.JetInitDataSerializerHook) Accessors(com.hazelcast.test.Accessors) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) Version(com.hazelcast.version.Version) ExpectedException(org.junit.rules.ExpectedException) Job(com.hazelcast.jet.Job) Before(org.junit.Before) UseParametersRunnerFactory(org.junit.runners.Parameterized.UseParametersRunnerFactory) JobRepository(com.hazelcast.jet.impl.JobRepository) Config(com.hazelcast.config.Config) HazelcastInstance(com.hazelcast.core.HazelcastInstance) Matchers.empty(org.hamcrest.Matchers.empty) Assert.assertNotNull(org.junit.Assert.assertNotNull) EXACTLY_ONCE(com.hazelcast.jet.config.ProcessingGuarantee.EXACTLY_ONCE) MemberLeftException(com.hazelcast.core.MemberLeftException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) SlowTest(com.hazelcast.test.annotation.SlowTest) NONE(com.hazelcast.jet.config.ProcessingGuarantee.NONE) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) ExecutionException(java.util.concurrent.ExecutionException) Rule(org.junit.Rule) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) JOB_RECORDS_MAP_NAME(com.hazelcast.jet.impl.JobRepository.JOB_RECORDS_MAP_NAME) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) Assert(org.junit.Assert) PacketFiltersUtil.resetPacketFiltersFrom(com.hazelcast.test.PacketFiltersUtil.resetPacketFiltersFrom) Collections(java.util.Collections) JobRecord(com.hazelcast.jet.impl.JobRecord) Assert.assertEquals(org.junit.Assert.assertEquals) JetServiceBackend(com.hazelcast.jet.impl.JetServiceBackend) SHUTDOWN_REQUEST(com.hazelcast.internal.partition.impl.PartitionDataSerializerHook.SHUTDOWN_REQUEST) MockPS(com.hazelcast.jet.core.TestProcessors.MockPS) HazelcastInstance(com.hazelcast.core.HazelcastInstance) Assert(org.junit.Assert) Job(com.hazelcast.jet.Job) MasterContext(com.hazelcast.jet.impl.MasterContext) JetServiceBackend(com.hazelcast.jet.impl.JetServiceBackend) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) Test(org.junit.Test) SlowTest(com.hazelcast.test.annotation.SlowTest)

Example 7 with STARTING

use of com.hazelcast.jet.core.JobStatus.STARTING in project hazelcast by hazelcast.

the class SplitBrainTest method when_minorityMasterBecomesMajorityMaster_then_jobKeepsRunning.

@Test
public void when_minorityMasterBecomesMajorityMaster_then_jobKeepsRunning() {
    int firstSubClusterSize = 2;
    int secondSubClusterSize = 1;
    int clusterSize = firstSubClusterSize + secondSubClusterSize;
    NoOutputSourceP.executionStarted = new CountDownLatch(secondSubClusterSize * PARALLELISM);
    Job[] jobRef = new Job[1];
    Consumer<HazelcastInstance[]> beforeSplit = instances -> {
        MockPS processorSupplier = new MockPS(NoOutputSourceP::new, clusterSize);
        DAG dag = new DAG().vertex(new Vertex("test", processorSupplier));
        jobRef[0] = instances[2].getJet().newJob(dag);
        assertOpenEventually(NoOutputSourceP.executionStarted);
    };
    Consumer<HazelcastInstance[]> afterMerge = instances -> {
        assertEquals(clusterSize, instances.length);
        logger.info("Shutting down 1st instance");
        instances[0].shutdown();
        logger.info("1st instance down, starting another instance");
        createHazelcastInstance(createConfig());
        logger.info("Shutting down 2nd instance");
        instances[1].shutdown();
        assertTrue(((ClusterService) instances[2].getCluster()).isMaster());
        assertJobStatusEventually(jobRef[0], RUNNING, 10);
        assertTrueAllTheTime(() -> assertEquals(RUNNING, jobRef[0].getStatus()), 5);
    };
    testSplitBrain(firstSubClusterSize, secondSubClusterSize, beforeSplit, null, afterMerge);
}
Also used : MasterContext(com.hazelcast.jet.impl.MasterContext) NOT_RUNNING(com.hazelcast.jet.core.JobStatus.NOT_RUNNING) RunWith(org.junit.runner.RunWith) HazelcastSerialClassRunner(com.hazelcast.test.HazelcastSerialClassRunner) ClusterService(com.hazelcast.internal.cluster.ClusterService) Future(java.util.concurrent.Future) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) BiConsumer(java.util.function.BiConsumer) Assert.fail(org.junit.Assert.fail) ExpectedException(org.junit.rules.ExpectedException) Job(com.hazelcast.jet.Job) JobRepository(com.hazelcast.jet.impl.JobRepository) Config(com.hazelcast.config.Config) HazelcastInstance(com.hazelcast.core.HazelcastInstance) MockPS(com.hazelcast.jet.core.TestProcessors.MockPS) NightlyTest(com.hazelcast.test.annotation.NightlyTest) CancellationException(java.util.concurrent.CancellationException) Assert.assertNotNull(org.junit.Assert.assertNotNull) JobConfig(com.hazelcast.jet.config.JobConfig) MAX_BACKUP_COUNT(com.hazelcast.internal.partition.IPartition.MAX_BACKUP_COUNT) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Category(org.junit.experimental.categories.Category) NoOutputSourceP(com.hazelcast.jet.core.TestProcessors.NoOutputSourceP) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) CountDownLatch(java.util.concurrent.CountDownLatch) Rule(org.junit.Rule) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) JobExecutionRecord(com.hazelcast.jet.impl.JobExecutionRecord) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) Lists.newArrayList(org.assertj.core.util.Lists.newArrayList) Assert.assertEquals(org.junit.Assert.assertEquals) JetServiceBackend(com.hazelcast.jet.impl.JetServiceBackend) MockPS(com.hazelcast.jet.core.TestProcessors.MockPS) ClusterService(com.hazelcast.internal.cluster.ClusterService) CountDownLatch(java.util.concurrent.CountDownLatch) Job(com.hazelcast.jet.Job) NightlyTest(com.hazelcast.test.annotation.NightlyTest) Test(org.junit.Test)

Example 8 with STARTING

use of com.hazelcast.jet.core.JobStatus.STARTING in project hazelcast by hazelcast.

the class MasterJobContext method onStartExecutionComplete.

private void onStartExecutionComplete(Throwable error, Collection<Entry<MemberInfo, Object>> responses) {
    JobStatus status = mc.jobStatus();
    if (status != STARTING && status != RUNNING) {
        logCannotComplete(error);
        error = new IllegalStateException("Job coordination failed");
    }
    setJobMetrics(responses.stream().filter(en -> en.getValue() instanceof RawJobMetrics).map(e1 -> (RawJobMetrics) e1.getValue()).collect(Collectors.toList()));
    if (error instanceof JobTerminateRequestedException && ((JobTerminateRequestedException) error).mode().isWithTerminalSnapshot()) {
        Throwable finalError = error;
        // The terminal snapshot on members is always completed before replying to StartExecutionOp.
        // However, the response to snapshot operations can be processed after the response to
        // StartExecutionOp, so wait for that too.
        mc.snapshotContext().terminalSnapshotFuture().whenCompleteAsync(withTryCatch(logger, (r, e) -> finalizeJob(finalError)));
    } else {
        if (error instanceof ExecutionNotFoundException) {
            // If the StartExecutionOperation didn't find the execution, it means that it was cancelled.
            if (requestedTerminationMode != null) {
                // This cancellation can be because the master cancelled it. If that's the case, convert the exception
                // to JobTerminateRequestedException.
                error = new JobTerminateRequestedException(requestedTerminationMode).initCause(error);
            }
        // The cancellation can also happen if some participant left and
        // the target cancelled the execution locally in JobExecutionService.onMemberRemoved().
        // We keep this (and possibly other) exceptions as they are
        // and let the execution complete with failure.
        }
        finalizeJob(error);
    }
}
Also used : JobStatus(com.hazelcast.jet.core.JobStatus) Address(com.hazelcast.cluster.Address) SUSPEND(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate.SUSPEND) NOT_RUNNING(com.hazelcast.jet.core.JobStatus.NOT_RUNNING) GetLocalJobMetricsOperation(com.hazelcast.jet.impl.operation.GetLocalJobMetricsOperation) CompletableFuture.completedFuture(java.util.concurrent.CompletableFuture.completedFuture) NonCompletableFuture(com.hazelcast.jet.impl.util.NonCompletableFuture) ExceptionUtil.isTopologyException(com.hazelcast.jet.impl.util.ExceptionUtil.isTopologyException) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) RESTART(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate.RESTART) JetDelegatingClassLoader(com.hazelcast.jet.impl.deployment.JetDelegatingClassLoader) TerminatedWithSnapshotException(com.hazelcast.jet.impl.exception.TerminatedWithSnapshotException) Collectors.toMap(java.util.stream.Collectors.toMap) Functions.entryKey(com.hazelcast.function.Functions.entryKey) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) SUSPENDED(com.hazelcast.jet.core.JobStatus.SUSPENDED) DAG(com.hazelcast.jet.core.DAG) JobStatus(com.hazelcast.jet.core.JobStatus) ExceptionUtil(com.hazelcast.jet.impl.util.ExceptionUtil) JobMetrics(com.hazelcast.jet.core.metrics.JobMetrics) CancellationException(java.util.concurrent.CancellationException) CANCEL_GRACEFUL(com.hazelcast.jet.impl.TerminationMode.CANCEL_GRACEFUL) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Set(java.util.Set) UUID(java.util.UUID) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors(java.util.stream.Collectors) CANCEL_FORCEFUL(com.hazelcast.jet.impl.TerminationMode.CANCEL_FORCEFUL) Objects(java.util.Objects) Util(com.hazelcast.jet.impl.util.Util) List(java.util.List) Util.idToString(com.hazelcast.jet.Util.idToString) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) MetricNames(com.hazelcast.jet.core.metrics.MetricNames) Entry(java.util.Map.Entry) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) JetDisabledException(com.hazelcast.jet.impl.exception.JetDisabledException) LoggingUtil(com.hazelcast.jet.impl.util.LoggingUtil) ExecutionPlanBuilder.createExecutionPlans(com.hazelcast.jet.impl.execution.init.ExecutionPlanBuilder.createExecutionPlans) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) TerminateExecutionOperation(com.hazelcast.jet.impl.operation.TerminateExecutionOperation) ExceptionUtil.isRestartableException(com.hazelcast.jet.impl.util.ExceptionUtil.isRestartableException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) LoggingUtil.logFinest(com.hazelcast.jet.impl.util.LoggingUtil.logFinest) Util.doWithClassLoader(com.hazelcast.jet.impl.util.Util.doWithClassLoader) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionService(com.hazelcast.spi.impl.executionservice.ExecutionService) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) Function(java.util.function.Function) Supplier(java.util.function.Supplier) Util.formatJobDuration(com.hazelcast.jet.impl.util.Util.formatJobDuration) ActionAfterTerminate(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) ArrayList(java.util.ArrayList) JetException(com.hazelcast.jet.JetException) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) COORDINATOR(com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.COORDINATOR) ILogger(com.hazelcast.logging.ILogger) SnapshotValidator.validateSnapshot(com.hazelcast.jet.impl.SnapshotValidator.validateSnapshot) ExceptionUtil.rethrow(com.hazelcast.jet.impl.util.ExceptionUtil.rethrow) Operation(com.hazelcast.spi.impl.operationservice.Operation) Util.entry(com.hazelcast.jet.Util.entry) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) BiConsumer(java.util.function.BiConsumer) MembersView(com.hazelcast.internal.cluster.impl.MembersView) LocalMemberResetException(com.hazelcast.core.LocalMemberResetException) RESTART_GRACEFUL(com.hazelcast.jet.impl.TerminationMode.RESTART_GRACEFUL) Edge(com.hazelcast.jet.core.Edge) Version(com.hazelcast.version.Version) EXPORTED_SNAPSHOTS_PREFIX(com.hazelcast.jet.impl.JobRepository.EXPORTED_SNAPSHOTS_PREFIX) Nonnull(javax.annotation.Nonnull) Tuple2(com.hazelcast.jet.datamodel.Tuple2) Nullable(javax.annotation.Nullable) Job(com.hazelcast.jet.Job) Measurement(com.hazelcast.jet.core.metrics.Measurement) SUSPENDED_EXPORTING_SNAPSHOT(com.hazelcast.jet.core.JobStatus.SUSPENDED_EXPORTING_SNAPSHOT) Util.toList(com.hazelcast.jet.impl.util.Util.toList) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) MetricTags(com.hazelcast.jet.core.metrics.MetricTags) NONE(com.hazelcast.jet.config.ProcessingGuarantee.NONE) Consumer(java.util.function.Consumer) Vertex(com.hazelcast.jet.core.Vertex) Tuple2.tuple2(com.hazelcast.jet.datamodel.Tuple2.tuple2) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) Collections(java.util.Collections) IMap(com.hazelcast.map.IMap) Edge.between(com.hazelcast.jet.core.Edge.between) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException)

Aggregations

STARTING (com.hazelcast.jet.core.JobStatus.STARTING)8 Job (com.hazelcast.jet.Job)7 JobConfig (com.hazelcast.jet.config.JobConfig)7 CancellationException (java.util.concurrent.CancellationException)6 COMPLETED (com.hazelcast.jet.core.JobStatus.COMPLETED)5 MockPS (com.hazelcast.jet.core.TestProcessors.MockPS)5 JobRepository (com.hazelcast.jet.impl.JobRepository)5 MasterContext (com.hazelcast.jet.impl.MasterContext)5 Collection (java.util.Collection)5 Future (java.util.concurrent.Future)5 Assert.assertEquals (org.junit.Assert.assertEquals)5 Assert.assertNotNull (org.junit.Assert.assertNotNull)5 Assert.assertTrue (org.junit.Assert.assertTrue)5 Assert.fail (org.junit.Assert.fail)5 Rule (org.junit.Rule)5 Test (org.junit.Test)5 ExpectedException (org.junit.rules.ExpectedException)5 RunWith (org.junit.runner.RunWith)5 RUNNING (com.hazelcast.jet.core.JobStatus.RUNNING)4 HazelcastInstance (com.hazelcast.core.HazelcastInstance)3