use of org.apache.hadoop.yarn.util.Clock in project apex-core by apache.
the class DelayOperatorTest method testCheckpointUpdate.
@Test
public void testCheckpointUpdate() {
LogicalPlan dag = StramTestSupport.createDAG(testMeta);
TestGeneratorInputOperator opA = dag.addOperator("A", TestGeneratorInputOperator.class);
GenericTestOperator opB = dag.addOperator("B", GenericTestOperator.class);
GenericTestOperator opC = dag.addOperator("C", GenericTestOperator.class);
GenericTestOperator opD = dag.addOperator("D", GenericTestOperator.class);
DefaultDelayOperator<Object> opDelay = dag.addOperator("opDelay", new DefaultDelayOperator<>());
dag.addStream("AtoB", opA.outport, opB.inport1);
dag.addStream("BtoC", opB.outport1, opC.inport1);
dag.addStream("CtoD", opC.outport1, opD.inport1);
dag.addStream("CtoDelay", opC.outport2, opDelay.input);
dag.addStream("DelayToB", opDelay.output, opB.inport2);
dag.validate();
dag.setAttribute(com.datatorrent.api.Context.OperatorContext.STORAGE_AGENT, new MemoryStorageAgent());
StreamingContainerManager scm = new StreamingContainerManager(dag);
PhysicalPlan plan = scm.getPhysicalPlan();
// set all operators as active to enable recovery window id update
for (PTOperator oper : plan.getAllOperators().values()) {
oper.setState(PTOperator.State.ACTIVE);
}
Clock clock = new SystemClock();
PTOperator opA1 = plan.getOperators(dag.getMeta(opA)).get(0);
PTOperator opB1 = plan.getOperators(dag.getMeta(opB)).get(0);
PTOperator opC1 = plan.getOperators(dag.getMeta(opC)).get(0);
PTOperator opDelay1 = plan.getOperators(dag.getMeta(opDelay)).get(0);
PTOperator opD1 = plan.getOperators(dag.getMeta(opD)).get(0);
Checkpoint cp3 = new Checkpoint(3L, 0, 0);
Checkpoint cp5 = new Checkpoint(5L, 0, 0);
Checkpoint cp4 = new Checkpoint(4L, 0, 0);
opB1.checkpoints.add(cp3);
opC1.checkpoints.add(cp3);
opC1.checkpoints.add(cp4);
opDelay1.checkpoints.add(cp3);
opDelay1.checkpoints.add(cp5);
opD1.checkpoints.add(cp5);
// construct grouping that would be supplied through LogicalPlan
Set<OperatorMeta> stronglyConnected = Sets.newHashSet(dag.getMeta(opB), dag.getMeta(opC), dag.getMeta(opDelay));
Map<OperatorMeta, Set<OperatorMeta>> groups = new HashMap<>();
for (OperatorMeta om : stronglyConnected) {
groups.put(om, stronglyConnected);
}
UpdateCheckpointsContext ctx = new UpdateCheckpointsContext(clock, false, groups);
scm.updateRecoveryCheckpoints(opB1, ctx, false);
Assert.assertEquals("checkpoint " + opA1, Checkpoint.INITIAL_CHECKPOINT, opA1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + opB1, cp3, opC1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + opC1, cp3, opC1.getRecoveryCheckpoint());
Assert.assertEquals("checkpoint " + opD1, cp5, opD1.getRecoveryCheckpoint());
}
use of org.apache.hadoop.yarn.util.Clock in project tez by apache.
the class TestTaskAttempt method testNoProgressFail.
@Test(timeout = 5000)
public void testNoProgressFail() throws Exception {
ApplicationId appId = ApplicationId.newInstance(1, 2);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
TezTaskID taskID = TezTaskID.getInstance(vertexID, 1);
MockEventHandler eventHandler = spy(new MockEventHandler());
TaskCommunicatorManagerInterface taListener = createMockTaskAttemptListener();
Configuration taskConf = new Configuration();
taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
taskConf.setBoolean("fs.file.impl.disable.cache", true);
taskConf.setLong(TezConfiguration.TEZ_TASK_PROGRESS_STUCK_INTERVAL_MS, 75);
locationHint = TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(new String[] { "127.0.0.1" })), null);
Resource resource = Resource.newInstance(1024, 1);
NodeId nid = NodeId.newInstance("127.0.0.1", 0);
@SuppressWarnings("deprecation") ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
Container container = mock(Container.class);
when(container.getId()).thenReturn(contId);
when(container.getNodeId()).thenReturn(nid);
when(container.getNodeHttpAddress()).thenReturn("localhost:0");
AMContainerMap containers = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appCtx);
containers.addContainerIfNew(container, 0, 0, 0);
doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
doReturn(containers).when(appCtx).getAllContainers();
TaskHeartbeatHandler mockHeartbeatHandler = mock(TaskHeartbeatHandler.class);
Clock mockClock = mock(Clock.class);
TaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, taListener, taskConf, mockClock, mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false);
TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID();
ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0));
taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId));
taImpl.handle(new TaskAttemptEventStartedRemotely(taskAttemptID));
assertEquals("Task attempt is not in the RUNNING state", taImpl.getState(), TaskAttemptState.RUNNING);
verify(mockHeartbeatHandler).register(taskAttemptID);
when(mockClock.getTime()).thenReturn(100l);
taImpl.handle(new TaskAttemptEventStatusUpdate(taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f, null, true)));
// invocations and time updated
assertEquals(100l, taImpl.lastNotifyProgressTimestamp);
when(mockClock.getTime()).thenReturn(150l);
taImpl.handle(new TaskAttemptEventStatusUpdate(taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f, null, true)));
// invocations and time updated
assertEquals(150l, taImpl.lastNotifyProgressTimestamp);
when(mockClock.getTime()).thenReturn(200l);
taImpl.handle(new TaskAttemptEventStatusUpdate(taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f, null, false)));
// invocations and time not updated
assertEquals(150l, taImpl.lastNotifyProgressTimestamp);
when(mockClock.getTime()).thenReturn(250l);
taImpl.handle(new TaskAttemptEventStatusUpdate(taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f, null, false)));
// invocations and time not updated
assertEquals(150l, taImpl.lastNotifyProgressTimestamp);
// failed event sent to self
verify(eventHandler, atLeast(1)).handle(arg.capture());
TaskAttemptEventAttemptFailed fEvent = (TaskAttemptEventAttemptFailed) arg.getValue();
assertEquals(taImpl.getTaskAttemptID(), fEvent.getTaskAttemptID());
assertEquals(TaskAttemptTerminationCause.NO_PROGRESS, fEvent.getTerminationCause());
assertEquals(TaskFailureType.NON_FATAL, fEvent.getTaskFailureType());
taImpl.handle(fEvent);
assertEquals("Task attempt is not in the FAIL_IN_PROGRESS state", taImpl.getInternalState(), TaskAttemptStateInternal.FAIL_IN_PROGRESS);
verify(mockHeartbeatHandler).unregister(taskAttemptID);
assertEquals(1, taImpl.getDiagnostics().size());
assertEquals(TaskAttemptTerminationCause.NO_PROGRESS, taImpl.getTerminationCause());
}
use of org.apache.hadoop.yarn.util.Clock in project hadoop by apache.
the class TestSimpleCapacityReplanner method testReplanningPlanCapacityLoss.
@Test
public void testReplanningPlanCapacityLoss() throws PlanningException {
Resource clusterCapacity = Resource.newInstance(100 * 1024, 10);
Resource minAlloc = Resource.newInstance(1024, 1);
Resource maxAlloc = Resource.newInstance(1024 * 8, 8);
ResourceCalculator res = new DefaultResourceCalculator();
long step = 1L;
Clock clock = mock(Clock.class);
ReservationAgent agent = mock(ReservationAgent.class);
SharingPolicy policy = new NoOverCommitPolicy();
policy.init("root.dedicated", null);
QueueMetrics queueMetrics = mock(QueueMetrics.class);
when(clock.getTime()).thenReturn(0L);
SimpleCapacityReplanner enf = new SimpleCapacityReplanner(clock);
ReservationSchedulerConfiguration conf = mock(ReservationSchedulerConfiguration.class);
when(conf.getEnforcementWindow(any(String.class))).thenReturn(6L);
enf.init("blah", conf);
// Initialize the plan with more resources
InMemoryPlan plan = new InMemoryPlan(queueMetrics, policy, agent, clusterCapacity, step, res, minAlloc, maxAlloc, "dedicated", enf, true, clock);
// add reservation filling the plan (separating them 1ms, so we are sure
// s2 follows s1 on acceptance
long ts = System.currentTimeMillis();
ReservationId r1 = ReservationId.newInstance(ts, 1);
int[] f5 = { 20, 20, 20, 20, 20 };
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r1, null, "u3", "dedicated", 0, 0 + f5.length, generateAllocation(0, f5), res, minAlloc)));
when(clock.getTime()).thenReturn(1L);
ReservationId r2 = ReservationId.newInstance(ts, 2);
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r2, null, "u4", "dedicated", 0, 0 + f5.length, generateAllocation(0, f5), res, minAlloc)));
when(clock.getTime()).thenReturn(2L);
ReservationId r3 = ReservationId.newInstance(ts, 3);
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r3, null, "u5", "dedicated", 0, 0 + f5.length, generateAllocation(0, f5), res, minAlloc)));
when(clock.getTime()).thenReturn(3L);
ReservationId r4 = ReservationId.newInstance(ts, 4);
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r4, null, "u6", "dedicated", 0, 0 + f5.length, generateAllocation(0, f5), res, minAlloc)));
when(clock.getTime()).thenReturn(4L);
ReservationId r5 = ReservationId.newInstance(ts, 5);
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r5, null, "u7", "dedicated", 0, 0 + f5.length, generateAllocation(0, f5), res, minAlloc)));
int[] f6 = { 50, 50, 50, 50, 50 };
ReservationId r6 = ReservationId.newInstance(ts, 6);
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r6, null, "u3", "dedicated", 10, 10 + f6.length, generateAllocation(10, f6), res, minAlloc)));
when(clock.getTime()).thenReturn(6L);
ReservationId r7 = ReservationId.newInstance(ts, 7);
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r7, null, "u4", "dedicated", 10, 10 + f6.length, generateAllocation(10, f6), res, minAlloc)));
// remove some of the resources (requires replanning)
plan.setTotalCapacity(Resource.newInstance(70 * 1024, 70));
when(clock.getTime()).thenReturn(0L);
// run the replanner
enf.plan(plan, null);
// check which reservation are still present
assertNotNull(plan.getReservationById(r1));
assertNotNull(plan.getReservationById(r2));
assertNotNull(plan.getReservationById(r3));
assertNotNull(plan.getReservationById(r6));
assertNotNull(plan.getReservationById(r7));
// and which ones are removed
assertNull(plan.getReservationById(r4));
assertNull(plan.getReservationById(r5));
// check resources at each moment in time no more exceed capacity
for (int i = 0; i < 20; i++) {
int tot = 0;
for (ReservationAllocation r : plan.getReservationsAtTime(i)) {
tot = r.getResourcesAtTime(i).getMemory();
}
assertTrue(tot <= 70 * 1024);
}
}
use of org.apache.hadoop.yarn.util.Clock in project hadoop by apache.
the class TestSpeculativeExecutionWithMRApp method testSpeculateSuccessfulWithoutUpdateEvents.
@Test
public void testSpeculateSuccessfulWithoutUpdateEvents() throws Exception {
Clock actualClock = new SystemClock();
final ControlledClock clock = new ControlledClock(actualClock);
clock.setTime(System.currentTimeMillis());
MRApp app = new MRApp(NUM_MAPPERS, NUM_REDUCERS, false, "test", true, clock);
Job job = app.submit(new Configuration(), true, true);
app.waitForState(job, JobState.RUNNING);
Map<TaskId, Task> tasks = job.getTasks();
Assert.assertEquals("Num tasks is not correct", NUM_MAPPERS + NUM_REDUCERS, tasks.size());
Iterator<Task> taskIter = tasks.values().iterator();
while (taskIter.hasNext()) {
app.waitForState(taskIter.next(), TaskState.RUNNING);
}
// Process the update events
clock.setTime(System.currentTimeMillis() + 2000);
EventHandler appEventHandler = app.getContext().getEventHandler();
for (Map.Entry<TaskId, Task> mapTask : tasks.entrySet()) {
for (Map.Entry<TaskAttemptId, TaskAttempt> taskAttempt : mapTask.getValue().getAttempts().entrySet()) {
TaskAttemptStatus status = createTaskAttemptStatus(taskAttempt.getKey(), (float) 0.8, TaskAttemptState.RUNNING);
TaskAttemptStatusUpdateEvent event = new TaskAttemptStatusUpdateEvent(taskAttempt.getKey(), status);
appEventHandler.handle(event);
}
}
Random generator = new Random();
Object[] taskValues = tasks.values().toArray();
final Task taskToBeSpeculated = (Task) taskValues[generator.nextInt(taskValues.length)];
// Other than one random task, finish every other task.
for (Map.Entry<TaskId, Task> mapTask : tasks.entrySet()) {
for (Map.Entry<TaskAttemptId, TaskAttempt> taskAttempt : mapTask.getValue().getAttempts().entrySet()) {
if (mapTask.getKey() != taskToBeSpeculated.getID()) {
appEventHandler.handle(new TaskAttemptEvent(taskAttempt.getKey(), TaskAttemptEventType.TA_DONE));
appEventHandler.handle(new TaskAttemptEvent(taskAttempt.getKey(), TaskAttemptEventType.TA_CONTAINER_COMPLETED));
app.waitForState(taskAttempt.getValue(), TaskAttemptState.SUCCEEDED);
}
}
}
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
if (taskToBeSpeculated.getAttempts().size() != 2) {
clock.setTime(System.currentTimeMillis() + 1000);
return false;
} else {
return true;
}
}
}, 1000, 60000);
// finish 1st TA, 2nd will be killed
TaskAttempt[] ta = makeFirstAttemptWin(appEventHandler, taskToBeSpeculated);
verifySpeculationMessage(app, ta);
app.waitForState(Service.STATE.STOPPED);
}
use of org.apache.hadoop.yarn.util.Clock in project hadoop by apache.
the class TestCommitterEventHandler method testFailure.
@Test
public void testFailure() throws Exception {
AppContext mockContext = mock(AppContext.class);
OutputCommitter mockCommitter = mock(OutputCommitter.class);
Clock mockClock = mock(Clock.class);
CommitterEventHandler handler = new CommitterEventHandler(mockContext, mockCommitter, new TestingRMHeartbeatHandler());
YarnConfiguration conf = new YarnConfiguration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
JobContext mockJobContext = mock(JobContext.class);
ApplicationAttemptId attemptid = ConverterUtils.toApplicationAttemptId("appattempt_1234567890000_0001_0");
JobId jobId = TypeConverter.toYarn(TypeConverter.fromYarn(attemptid.getApplicationId()));
WaitForItHandler waitForItHandler = new WaitForItHandler();
when(mockContext.getApplicationID()).thenReturn(attemptid.getApplicationId());
when(mockContext.getApplicationAttemptId()).thenReturn(attemptid);
when(mockContext.getEventHandler()).thenReturn(waitForItHandler);
when(mockContext.getClock()).thenReturn(mockClock);
doThrow(new YarnRuntimeException("Intentional Failure")).when(mockCommitter).commitJob(any(JobContext.class));
handler.init(conf);
handler.start();
try {
handler.handle(new CommitterJobCommitEvent(jobId, mockJobContext));
String user = UserGroupInformation.getCurrentUser().getShortUserName();
Path startCommitFile = MRApps.getStartJobCommitFile(conf, user, jobId);
Path endCommitSuccessFile = MRApps.getEndJobCommitSuccessFile(conf, user, jobId);
Path endCommitFailureFile = MRApps.getEndJobCommitFailureFile(conf, user, jobId);
Event e = waitForItHandler.getAndClearEvent();
assertNotNull(e);
assertTrue(e instanceof JobCommitFailedEvent);
FileSystem fs = FileSystem.get(conf);
assertTrue(fs.exists(startCommitFile));
assertFalse(fs.exists(endCommitSuccessFile));
assertTrue(fs.exists(endCommitFailureFile));
verify(mockCommitter).commitJob(any(JobContext.class));
} finally {
handler.stop();
}
}
Aggregations