Search in sources :

Example 51 with UUID

use of java.util.UUID in project flink by apache.

the class JobClientActorTest method testConnectionTimeoutAfterJobSubmission.

/** Tests that a {@link org.apache.flink.runtime.client.JobClientActorConnectionTimeoutException}
	 * is thrown after a successful job submission if the JobManager dies.
	 *
	 * @throws Exception
	 */
@Test(expected = JobClientActorConnectionTimeoutException.class)
public void testConnectionTimeoutAfterJobSubmission() throws Exception {
    FiniteDuration jobClientActorTimeout = new FiniteDuration(5, TimeUnit.SECONDS);
    FiniteDuration timeout = jobClientActorTimeout.$times(2);
    UUID leaderSessionID = UUID.randomUUID();
    ActorRef jobManager = system.actorOf(Props.create(JobAcceptingActor.class, leaderSessionID));
    TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService(jobManager.path().toString(), leaderSessionID);
    Props jobClientActorProps = JobSubmissionClientActor.createActorProps(testingLeaderRetrievalService, jobClientActorTimeout, false, clientConfig);
    ActorRef jobClientActor = system.actorOf(jobClientActorProps);
    Future<Object> jobExecutionResult = Patterns.ask(jobClientActor, new JobClientMessages.SubmitJobAndWait(testJobGraph), new Timeout(timeout));
    Future<Object> waitFuture = Patterns.ask(jobManager, new RegisterTest(), new Timeout(timeout));
    Await.result(waitFuture, timeout);
    jobManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
    Await.result(jobExecutionResult, timeout);
}
Also used : TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) ActorRef(akka.actor.ActorRef) JobClientMessages(org.apache.flink.runtime.messages.JobClientMessages) Timeout(akka.util.Timeout) FiniteDuration(scala.concurrent.duration.FiniteDuration) UUID(java.util.UUID) Props(akka.actor.Props) Test(org.junit.Test)

Example 52 with UUID

use of java.util.UUID in project flink by apache.

the class JobClientActorTest method testSubmissionTimeout.

/** Tests that a {@link JobClientActorSubmissionTimeoutException} is thrown when the job cannot
	 * be submitted by the JobSubmissionClientActor. This is here the case, because the started JobManager
	 * never replies to a {@link SubmitJob} message.
	 *
	 * @throws Exception
	 */
@Test(expected = JobClientActorSubmissionTimeoutException.class)
public void testSubmissionTimeout() throws Exception {
    FiniteDuration jobClientActorTimeout = new FiniteDuration(5, TimeUnit.SECONDS);
    FiniteDuration timeout = jobClientActorTimeout.$times(2);
    UUID leaderSessionID = UUID.randomUUID();
    ActorRef jobManager = system.actorOf(Props.create(PlainActor.class, leaderSessionID));
    TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService(jobManager.path().toString(), leaderSessionID);
    Props jobClientActorProps = JobSubmissionClientActor.createActorProps(testingLeaderRetrievalService, jobClientActorTimeout, false, clientConfig);
    ActorRef jobClientActor = system.actorOf(jobClientActorProps);
    Future<Object> jobExecutionResult = Patterns.ask(jobClientActor, new JobClientMessages.SubmitJobAndWait(testJobGraph), new Timeout(timeout));
    Await.result(jobExecutionResult, timeout);
}
Also used : TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) ActorRef(akka.actor.ActorRef) JobClientMessages(org.apache.flink.runtime.messages.JobClientMessages) Timeout(akka.util.Timeout) FiniteDuration(scala.concurrent.duration.FiniteDuration) UUID(java.util.UUID) Props(akka.actor.Props) Test(org.junit.Test)

Example 53 with UUID

use of java.util.UUID in project flink by apache.

the class AkkaKvStateLocationLookupServiceTest method testLeaderSessionIdChange.

/**
	 * Tests that messages are properly decorated with the leader session ID.
	 */
@Test
public void testLeaderSessionIdChange() throws Exception {
    TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
    Queue<LookupKvStateLocation> received = new LinkedBlockingQueue<>();
    AkkaKvStateLocationLookupService lookupService = new AkkaKvStateLocationLookupService(leaderRetrievalService, testActorSystem, TIMEOUT, new AkkaKvStateLocationLookupService.DisabledLookupRetryStrategyFactory());
    lookupService.start();
    // Create test actors with random leader session IDs
    KvStateLocation expected1 = new KvStateLocation(new JobID(), new JobVertexID(), 8282, "salt");
    UUID leaderSessionId1 = UUID.randomUUID();
    ActorRef testActor1 = LookupResponseActor.create(received, leaderSessionId1, expected1);
    String testActorAddress1 = AkkaUtils.getAkkaURL(testActorSystem, testActor1);
    KvStateLocation expected2 = new KvStateLocation(new JobID(), new JobVertexID(), 22321, "pepper");
    UUID leaderSessionId2 = UUID.randomUUID();
    ActorRef testActor2 = LookupResponseActor.create(received, leaderSessionId1, expected2);
    String testActorAddress2 = AkkaUtils.getAkkaURL(testActorSystem, testActor2);
    JobID jobId = new JobID();
    //
    // Notify about first leader
    //
    leaderRetrievalService.notifyListener(testActorAddress1, leaderSessionId1);
    KvStateLocation location = Await.result(lookupService.getKvStateLookupInfo(jobId, "rock"), TIMEOUT);
    assertEquals(expected1, location);
    assertEquals(1, received.size());
    verifyLookupMsg(received.poll(), jobId, "rock");
    //
    // Notify about second leader
    //
    leaderRetrievalService.notifyListener(testActorAddress2, leaderSessionId2);
    location = Await.result(lookupService.getKvStateLookupInfo(jobId, "roll"), TIMEOUT);
    assertEquals(expected2, location);
    assertEquals(1, received.size());
    verifyLookupMsg(received.poll(), jobId, "roll");
}
Also used : TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) LookupKvStateLocation(org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) UUID(java.util.UUID) LookupKvStateLocation(org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 54 with UUID

use of java.util.UUID in project flink by apache.

the class TaskExecutor method offerSlotsToJobManager.

// ------------------------------------------------------------------------
//  Internal job manager connection methods
// ------------------------------------------------------------------------
private void offerSlotsToJobManager(final JobID jobId) {
    final JobManagerConnection jobManagerConnection = jobManagerTable.get(jobId);
    if (jobManagerConnection == null) {
        log.debug("There is no job manager connection to the leader of job {}.", jobId);
    } else {
        if (taskSlotTable.hasAllocatedSlots(jobId)) {
            log.info("Offer reserved slots to the leader of job {}.", jobId);
            final JobMasterGateway jobMasterGateway = jobManagerConnection.getJobManagerGateway();
            final Iterator<TaskSlot> reservedSlotsIterator = taskSlotTable.getAllocatedSlots(jobId);
            final UUID leaderId = jobManagerConnection.getLeaderId();
            final Collection<SlotOffer> reservedSlots = new HashSet<>(2);
            while (reservedSlotsIterator.hasNext()) {
                SlotOffer offer = reservedSlotsIterator.next().generateSlotOffer();
                try {
                    if (!taskSlotTable.markSlotActive(offer.getAllocationId())) {
                        // the slot is either free or releasing at the moment
                        final String message = "Could not mark slot " + jobId + " active.";
                        log.debug(message);
                        jobMasterGateway.failSlot(getResourceID(), offer.getAllocationId(), leaderId, new Exception(message));
                    }
                } catch (SlotNotFoundException e) {
                    final String message = "Could not mark slot " + jobId + " active.";
                    jobMasterGateway.failSlot(getResourceID(), offer.getAllocationId(), leaderId, new Exception(message));
                    continue;
                }
                reservedSlots.add(offer);
            }
            Future<Iterable<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots(getResourceID(), reservedSlots, leaderId, taskManagerConfiguration.getTimeout());
            acceptedSlotsFuture.thenAcceptAsync(new AcceptFunction<Iterable<SlotOffer>>() {

                @Override
                public void accept(Iterable<SlotOffer> acceptedSlots) {
                    // check if the response is still valid
                    if (isJobManagerConnectionValid(jobId, leaderId)) {
                        // mark accepted slots active
                        for (SlotOffer acceptedSlot : acceptedSlots) {
                            reservedSlots.remove(acceptedSlot);
                        }
                        final Exception e = new Exception("The slot was rejected by the JobManager.");
                        for (SlotOffer rejectedSlot : reservedSlots) {
                            freeSlot(rejectedSlot.getAllocationId(), e);
                        }
                    } else {
                        // discard the response since there is a new leader for the job
                        log.debug("Discard offer slot response since there is a new leader " + "for the job {}.", jobId);
                    }
                }
            }, getMainThreadExecutor());
            acceptedSlotsFuture.exceptionallyAsync(new ApplyFunction<Throwable, Void>() {

                @Override
                public Void apply(Throwable throwable) {
                    if (throwable instanceof TimeoutException) {
                        // We ran into a timeout. Try again.
                        offerSlotsToJobManager(jobId);
                    } else {
                        // We encountered an exception. Free the slots and return them to the RM.
                        for (SlotOffer reservedSlot : reservedSlots) {
                            freeSlot(reservedSlot.getAllocationId(), throwable);
                        }
                    }
                    return null;
                }
            }, getMainThreadExecutor());
        } else {
            log.debug("There are no unassigned slots for the job {}.", jobId);
        }
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) TaskSlot(org.apache.flink.runtime.taskexecutor.slot.TaskSlot) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) TimeoutException(java.util.concurrent.TimeoutException) PartitionException(org.apache.flink.runtime.taskexecutor.exceptions.PartitionException) CheckpointException(org.apache.flink.runtime.taskexecutor.exceptions.CheckpointException) SlotAllocationException(org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) TaskException(org.apache.flink.runtime.taskexecutor.exceptions.TaskException) SlotNotActiveException(org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException) SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) IOException(java.io.IOException) UUID(java.util.UUID) HashSet(java.util.HashSet) TimeoutException(java.util.concurrent.TimeoutException)

Example 55 with UUID

use of java.util.UUID in project flink by apache.

the class ResourceManager method registerJobManager.

// ------------------------------------------------------------------------
//  RPC methods
// ------------------------------------------------------------------------
@RpcMethod
public Future<RegistrationResponse> registerJobManager(final UUID resourceManagerLeaderId, final UUID jobManagerLeaderId, final String jobManagerAddress, final JobID jobId) {
    checkNotNull(resourceManagerLeaderId);
    checkNotNull(jobManagerLeaderId);
    checkNotNull(jobManagerAddress);
    checkNotNull(jobId);
    if (isValid(resourceManagerLeaderId)) {
        if (!jobLeaderIdService.containsJob(jobId)) {
            try {
                jobLeaderIdService.addJob(jobId);
            } catch (Exception e) {
                ResourceManagerException exception = new ResourceManagerException("Could not add the job " + jobId + " to the job id leader service.", e);
                onFatalErrorAsync(exception);
                log.error("Could not add job {} to job leader id service.", jobId, e);
                return FlinkCompletableFuture.completedExceptionally(exception);
            }
        }
        log.info("Registering job manager {}@{} for job {}.", jobManagerLeaderId, jobManagerAddress, jobId);
        Future<UUID> jobLeaderIdFuture;
        try {
            jobLeaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
        } catch (Exception e) {
            // we cannot check the job leader id so let's fail
            // TODO: Maybe it's also ok to skip this check in case that we cannot check the leader id
            ResourceManagerException exception = new ResourceManagerException("Cannot obtain the " + "job leader id future to verify the correct job leader.", e);
            onFatalErrorAsync(exception);
            log.debug("Could not obtain the job leader id future to verify the correct job leader.");
            return FlinkCompletableFuture.completedExceptionally(exception);
        }
        Future<JobMasterGateway> jobMasterGatewayFuture = getRpcService().connect(jobManagerAddress, JobMasterGateway.class);
        Future<RegistrationResponse> registrationResponseFuture = jobMasterGatewayFuture.thenCombineAsync(jobLeaderIdFuture, new BiFunction<JobMasterGateway, UUID, RegistrationResponse>() {

            @Override
            public RegistrationResponse apply(JobMasterGateway jobMasterGateway, UUID jobLeaderId) {
                if (isValid(resourceManagerLeaderId)) {
                    if (jobLeaderId.equals(jobManagerLeaderId)) {
                        if (jobManagerRegistrations.containsKey(jobId)) {
                            JobManagerRegistration oldJobManagerRegistration = jobManagerRegistrations.get(jobId);
                            if (oldJobManagerRegistration.getLeaderID().equals(jobLeaderId)) {
                                // same registration
                                log.debug("Job manager {}@{} was already registered.", jobManagerLeaderId, jobManagerAddress);
                            } else {
                                // tell old job manager that he is no longer the job leader
                                disconnectJobManager(oldJobManagerRegistration.getJobID(), new Exception("New job leader for job " + jobId + " found."));
                                JobManagerRegistration jobManagerRegistration = new JobManagerRegistration(jobId, jobLeaderId, jobMasterGateway);
                                jobManagerRegistrations.put(jobId, jobManagerRegistration);
                            }
                        } else {
                            // new registration for the job
                            JobManagerRegistration jobManagerRegistration = new JobManagerRegistration(jobId, jobLeaderId, jobMasterGateway);
                            jobManagerRegistrations.put(jobId, jobManagerRegistration);
                        }
                        log.info("Registered job manager {}@{} for job {}.", jobManagerLeaderId, jobManagerAddress, jobId);
                        return new JobMasterRegistrationSuccess(resourceManagerConfiguration.getHeartbeatInterval().toMilliseconds(), getLeaderSessionId());
                    } else {
                        log.debug("The job manager leader id {} did not match the job " + "leader id {}.", jobManagerLeaderId, jobLeaderId);
                        return new RegistrationResponse.Decline("Job manager leader id did not match.");
                    }
                } else {
                    log.debug("The resource manager leader id changed {}. Discarding job " + "manager registration from {}.", getLeaderSessionId(), jobManagerAddress);
                    return new RegistrationResponse.Decline("Resource manager leader id changed.");
                }
            }
        }, getMainThreadExecutor());
        // handle exceptions which might have occurred in one of the futures inputs of combine
        return registrationResponseFuture.handleAsync(new BiFunction<RegistrationResponse, Throwable, RegistrationResponse>() {

            @Override
            public RegistrationResponse apply(RegistrationResponse registrationResponse, Throwable throwable) {
                if (throwable != null) {
                    if (log.isDebugEnabled()) {
                        log.debug("Registration of job manager {}@{} failed.", jobManagerLeaderId, jobManagerAddress, throwable);
                    } else {
                        log.info("Registration of job manager {}@{} failed.", jobManagerLeaderId, jobManagerAddress);
                    }
                    return new RegistrationResponse.Decline(throwable.getMessage());
                } else {
                    return registrationResponse;
                }
            }
        }, getRpcService().getExecutor());
    } else {
        log.debug("Discard register job manager message from {}, because the leader id " + "{} did not match the expected leader id {}.", jobManagerAddress, resourceManagerLeaderId, leaderSessionId);
        return FlinkCompletableFuture.<RegistrationResponse>completed(new RegistrationResponse.Decline("Resource manager leader id did not match."));
    }
}
Also used : JobMasterRegistrationSuccess(org.apache.flink.runtime.jobmaster.JobMasterRegistrationSuccess) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) LeaderIdMismatchException(org.apache.flink.runtime.highavailability.LeaderIdMismatchException) JobManagerRegistration(org.apache.flink.runtime.resourcemanager.registration.JobManagerRegistration) UUID(java.util.UUID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Aggregations

UUID (java.util.UUID)8697 Test (org.junit.Test)3030 ArrayList (java.util.ArrayList)1077 HashMap (java.util.HashMap)934 List (java.util.List)738 Map (java.util.Map)652 Test (org.testng.annotations.Test)630 HashSet (java.util.HashSet)436 IOException (java.io.IOException)397 LocalDate (org.joda.time.LocalDate)328 Set (java.util.Set)315 BigDecimal (java.math.BigDecimal)284 IdmIdentityDto (eu.bcvsolutions.idm.core.api.dto.IdmIdentityDto)283 UUID.randomUUID (java.util.UUID.randomUUID)281 Collectors (java.util.stream.Collectors)276 ClusterNode (org.apache.ignite.cluster.ClusterNode)265 Test (org.junit.jupiter.api.Test)248 Account (org.killbill.billing.account.api.Account)225 Collection (java.util.Collection)222 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)218