use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.
the class TestRecoveryParser method testLastCorruptedRecoveryRecord.
@Test(timeout = 5000)
public void testLastCorruptedRecoveryRecord() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
when(appContext.getApplicationID()).thenReturn(appId);
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// write data in attempt_1
RecoveryService rService = new RecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
// wait until DAGSubmittedEvent is handled in the RecoveryEventHandling thread
rService.await();
rService.outputStreamMap.get(dagID).writeUTF("INVALID_DATA");
rService.stop();
// write data in attempt_2
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/2"));
rService = new RecoveryService(appContext);
rService.init(conf);
rService.start();
rService.handle(new DAGHistoryEvent(dagID, new DAGInitializedEvent(dagID, 1L, "user", dagPlan.getName(), null)));
rService.await();
rService.outputStreamMap.get(dagID).writeUTF("INVALID_DATA");
rService.stop();
// corrupted last records will be skipped but the whole recovery logs will be read
DAGRecoveryData dagData = parser.parseRecoveryData();
assertEquals(false, dagData.isCompleted);
assertEquals(null, dagData.reason);
assertEquals(false, dagData.nonRecoverable);
// verify DAGSubmitedEvent & DAGInititlizedEvent is handled.
verify(mockAppMaster).createDAG(any(DAGPlan.class), any(TezDAGID.class));
assertNotNull(dagData.getDAGInitializedEvent());
}
use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.
the class TestRecoveryParser method testRecoveryData.
@Test(timeout = 5000)
public void testRecoveryData() throws IOException {
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
TezDAGID dagID = TezDAGID.getInstance(appId, 1);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
AppContext appContext = mock(AppContext.class);
when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath + "/1"));
when(appContext.getClock()).thenReturn(new SystemClock());
when(mockDAGImpl.getID()).thenReturn(dagID);
when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
when(appContext.getApplicationID()).thenReturn(appId);
RecoveryService rService = new RecoveryService(appContext);
Configuration conf = new Configuration();
conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
rService.init(conf);
rService.start();
DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
// DAG DAGSubmittedEvent -> DAGInitializedEvent -> DAGStartedEvent
rService.handle(new DAGHistoryEvent(dagID, new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), null, "user", new Configuration(), null, null)));
DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagID, 100L, "user", "dagName", null);
DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagID, 0L, "user", "dagName");
rService.handle(new DAGHistoryEvent(dagID, dagInitedEvent));
rService.handle(new DAGHistoryEvent(dagID, dagStartedEvent));
// 3 vertices of this dag: v0, v1, v2
TezVertexID v0Id = TezVertexID.getInstance(dagID, 0);
TezVertexID v1Id = TezVertexID.getInstance(dagID, 1);
TezVertexID v2Id = TezVertexID.getInstance(dagID, 2);
// v0 VertexInitializedEvent
VertexInitializedEvent v0InitedEvent = new VertexInitializedEvent(v0Id, "v0", 200L, 400L, 2, null, null, null, null);
rService.handle(new DAGHistoryEvent(dagID, v0InitedEvent));
// v1 VertexFinishedEvent(KILLED)
VertexFinishedEvent v1FinishedEvent = new VertexFinishedEvent(v1Id, "v1", 2, 300L, 400L, 500L, 600L, 700L, VertexState.KILLED, "", null, null, null, null);
rService.handle(new DAGHistoryEvent(dagID, v1FinishedEvent));
// v2 VertexInitializedEvent -> VertexStartedEvent
List<TezEvent> initGeneratedEvents = Lists.newArrayList(new TezEvent(DataMovementEvent.create(ByteBuffer.wrap(new byte[0])), null));
VertexInitializedEvent v2InitedEvent = new VertexInitializedEvent(v2Id, "v2", 200L, 300L, 2, null, null, initGeneratedEvents, null);
VertexStartedEvent v2StartedEvent = new VertexStartedEvent(v2Id, 0L, 0L);
rService.handle(new DAGHistoryEvent(dagID, v2InitedEvent));
rService.handle(new DAGHistoryEvent(dagID, v2StartedEvent));
// 3 tasks of v2
TezTaskID t0v2Id = TezTaskID.getInstance(v2Id, 0);
TezTaskID t1v2Id = TezTaskID.getInstance(v2Id, 1);
TezTaskID t2v2Id = TezTaskID.getInstance(v2Id, 2);
// t0v2 TaskStartedEvent
TaskStartedEvent t0v2StartedEvent = new TaskStartedEvent(t0v2Id, "v2", 400L, 5000L);
rService.handle(new DAGHistoryEvent(dagID, t0v2StartedEvent));
// t1v2 TaskFinishedEvent
TaskFinishedEvent t1v2FinishedEvent = new TaskFinishedEvent(t1v2Id, "v1", 0L, 0L, null, TaskState.KILLED, "", null, 4);
rService.handle(new DAGHistoryEvent(dagID, t1v2FinishedEvent));
// t2v2 TaskStartedEvent -> TaskFinishedEvent
TaskStartedEvent t2v2StartedEvent = new TaskStartedEvent(t2v2Id, "v2", 400L, 500L);
rService.handle(new DAGHistoryEvent(dagID, t2v2StartedEvent));
TaskFinishedEvent t2v2FinishedEvent = new TaskFinishedEvent(t2v2Id, "v1", 0L, 0L, null, TaskState.SUCCEEDED, "", null, 4);
rService.handle(new DAGHistoryEvent(dagID, t2v2FinishedEvent));
// attempts under t0v2
ContainerId containerId = ContainerId.newInstance(appAttemptId, 1);
NodeId nodeId = NodeId.newInstance("localhost", 9999);
TezTaskAttemptID ta0t0v2Id = TezTaskAttemptID.getInstance(t0v2Id, 0);
TaskAttemptStartedEvent ta0t0v2StartedEvent = new TaskAttemptStartedEvent(ta0t0v2Id, "v1", 0L, containerId, nodeId, "", "", "");
rService.handle(new DAGHistoryEvent(dagID, ta0t0v2StartedEvent));
// attempts under t2v2
TezTaskAttemptID ta0t2v2Id = TezTaskAttemptID.getInstance(t2v2Id, 0);
TaskAttemptStartedEvent ta0t2v2StartedEvent = new TaskAttemptStartedEvent(ta0t2v2Id, "v1", 500L, containerId, nodeId, "", "", "");
rService.handle(new DAGHistoryEvent(dagID, ta0t2v2StartedEvent));
TaskAttemptFinishedEvent ta0t2v2FinishedEvent = new TaskAttemptFinishedEvent(ta0t2v2Id, "v1", 500L, 600L, TaskAttemptState.SUCCEEDED, null, null, "", null, null, null, 0L, null, 0L, null, null, null, null, null);
rService.handle(new DAGHistoryEvent(dagID, ta0t2v2FinishedEvent));
rService.stop();
DAGRecoveryData dagData = parser.parseRecoveryData();
assertFalse(dagData.nonRecoverable);
// There's no equals method for the history event, so here only verify the init/start/finish time of each event for simplicity
assertEquals(dagInitedEvent.getInitTime(), dagData.getDAGInitializedEvent().getInitTime());
assertEquals(dagStartedEvent.getStartTime(), dagData.getDAGStartedEvent().getStartTime());
assertNull(dagData.getDAGFinishedEvent());
VertexRecoveryData v0Data = dagData.getVertexRecoveryData(v0Id);
VertexRecoveryData v1Data = dagData.getVertexRecoveryData(v1Id);
VertexRecoveryData v2Data = dagData.getVertexRecoveryData(v2Id);
assertNotNull(v0Data);
assertNotNull(v1Data);
assertNotNull(v2Data);
assertEquals(v0InitedEvent.getInitedTime(), v0Data.getVertexInitedEvent().getInitedTime());
assertNull(v0Data.getVertexStartedEvent());
assertNull(v1Data.getVertexInitedEvent());
assertEquals(v1FinishedEvent.getFinishTime(), v1Data.getVertexFinishedEvent().getFinishTime());
assertEquals(v2InitedEvent.getInitedTime(), v2Data.getVertexInitedEvent().getInitedTime());
assertEquals(v2StartedEvent.getStartTime(), v2Data.getVertexStartedEvent().getStartTime());
TaskRecoveryData t0v2Data = dagData.getTaskRecoveryData(t0v2Id);
TaskRecoveryData t1v2Data = dagData.getTaskRecoveryData(t1v2Id);
TaskRecoveryData t2v2Data = dagData.getTaskRecoveryData(t2v2Id);
assertNotNull(t0v2Data);
assertNotNull(t1v2Data);
assertNotNull(t2v2Data);
assertEquals(t0v2StartedEvent.getStartTime(), t0v2Data.getTaskStartedEvent().getStartTime());
assertNull(t0v2Data.getTaskFinishedEvent());
assertEquals(t1v2FinishedEvent.getFinishTime(), t1v2Data.getTaskFinishedEvent().getFinishTime());
assertNull(t1v2Data.getTaskStartedEvent());
assertEquals(t2v2StartedEvent.getStartTime(), t2v2Data.getTaskStartedEvent().getStartTime());
assertEquals(t2v2FinishedEvent.getFinishTime(), t2v2Data.getTaskFinishedEvent().getFinishTime());
TaskAttemptRecoveryData ta0t0v2Data = dagData.getTaskAttemptRecoveryData(ta0t0v2Id);
TaskAttemptRecoveryData ta0t2v2Data = dagData.getTaskAttemptRecoveryData(ta0t2v2Id);
assertNotNull(ta0t0v2Data);
assertNotNull(ta0t2v2Data);
assertEquals(ta0t0v2StartedEvent.getStartTime(), ta0t0v2Data.getTaskAttemptStartedEvent().getStartTime());
assertNull(ta0t0v2Data.getTaskAttemptFinishedEvent());
assertEquals(ta0t2v2StartedEvent.getStartTime(), ta0t2v2Data.getTaskAttemptStartedEvent().getStartTime());
assertEquals(ta0t2v2FinishedEvent.getFinishTime(), ta0t2v2Data.getTaskAttemptFinishedEvent().getFinishTime());
}
use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.
the class MapUtils method createLogicalTask.
public static LogicalIOProcessorRuntimeTask createLogicalTask(FileSystem fs, Path workDir, JobConf jobConf, int mapId, Path mapInput, TezUmbilical umbilical, String dagName, String vertexName, List<InputSpec> inputSpecs, List<OutputSpec> outputSpecs, TezSharedExecutor sharedExecutor) throws Exception {
jobConf.setInputFormat(SequenceFileInputFormat.class);
ProcessorDescriptor mapProcessorDesc = ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf));
Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>();
TaskSpec taskSpec = new TaskSpec(TezTestUtils.getMockTaskAttemptId(0, 0, mapId, 0), dagName, vertexName, -1, mapProcessorDesc, inputSpecs, outputSpecs, null, null);
Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
String auxiliaryService = jobConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
serviceConsumerMetadata.put(auxiliaryService, ShuffleUtils.convertJobTokenToBytes(shuffleToken));
Map<String, String> envMap = new HashMap<String, String>();
ByteBuffer shufflePortBb = ByteBuffer.allocate(4).putInt(0, 8000);
AuxiliaryServiceHelper.setServiceDataIntoEnv(auxiliaryService, shufflePortBb, envMap);
LogicalIOProcessorRuntimeTask task = new LogicalIOProcessorRuntimeTask(taskSpec, 0, jobConf, new String[] { workDir.toString() }, umbilical, serviceConsumerMetadata, envMap, HashMultimap.<String, String>create(), null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
return task;
}
use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.
the class TestReduceProcessor method testReduceProcessor.
@Test(timeout = 5000)
public void testReduceProcessor() throws Exception {
final String dagName = "mrdag0";
String mapVertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
String reduceVertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
JobConf jobConf = new JobConf(defaultConf);
setUpJobConf(jobConf);
MRHelpers.translateMRConfToTez(jobConf);
jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
Path mapInput = new Path(workDir, "map0");
MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10);
InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
// Run a map
TestUmbilical testUmbilical = new TestUmbilical();
TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
LogicalIOProcessorRuntimeTask mapTask = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, mapInput, testUmbilical, dagName, mapVertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
mapTask.initialize();
mapTask.run();
mapTask.close();
// One VME, One DME
Assert.assertEquals(2, testUmbilical.getEvents().size());
Assert.assertEquals(EventType.VERTEX_MANAGER_EVENT, testUmbilical.getEvents().get(0).getEventType());
Assert.assertEquals(EventType.COMPOSITE_DATA_MOVEMENT_EVENT, testUmbilical.getEvents().get(1).getEventType());
CompositeDataMovementEvent cdmEvent = (CompositeDataMovementEvent) testUmbilical.getEvents().get(1).getEvent();
Assert.assertEquals(1, cdmEvent.getCount());
DataMovementEvent dme = cdmEvent.getEvents().iterator().next();
dme.setTargetIndex(0);
LOG.info("Starting reduce...");
JobTokenIdentifier identifier = new JobTokenIdentifier(new Text(dagName));
JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>(identifier, jobTokenSecretManager);
shuffleToken.setService(identifier.getJobId());
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
FileOutputFormat.setOutputPath(jobConf, new Path(workDir, "output"));
ProcessorDescriptor reduceProcessorDesc = ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf));
InputSpec reduceInputSpec = new InputSpec(mapVertexName, InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
OutputSpec reduceOutputSpec = new OutputSpec("NullDestinationVertex", OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
// Now run a reduce
TaskSpec taskSpec = new TaskSpec(TezTestUtils.getMockTaskAttemptId(0, 1, 0, 0), dagName, reduceVertexName, -1, reduceProcessorDesc, Collections.singletonList(reduceInputSpec), Collections.singletonList(reduceOutputSpec), null, null);
Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
String auxiliaryService = jobConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
serviceConsumerMetadata.put(auxiliaryService, ShuffleUtils.convertJobTokenToBytes(shuffleToken));
Map<String, String> serviceProviderEnvMap = new HashMap<String, String>();
ByteBuffer shufflePortBb = ByteBuffer.allocate(4).putInt(0, 8000);
AuxiliaryServiceHelper.setServiceDataIntoEnv(auxiliaryService, shufflePortBb, serviceProviderEnvMap);
LogicalIOProcessorRuntimeTask task = new LogicalIOProcessorRuntimeTask(taskSpec, 0, jobConf, new String[] { workDir.toString() }, new TestUmbilical(), serviceConsumerMetadata, serviceProviderEnvMap, HashMultimap.<String, String>create(), null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
List<Event> destEvents = new LinkedList<Event>();
destEvents.add(dme);
task.initialize();
OrderedGroupedInputLegacy sortedOut = (OrderedGroupedInputLegacy) task.getInputs().values().iterator().next();
sortedOut.handleEvents(destEvents);
task.run();
task.close();
sharedExecutor.shutdownNow();
// MRTask mrTask = (MRTask)t.getProcessor();
// TODO NEWTEZ Verify the partitioner has not been created
// Likely not applicable anymore.
// Assert.assertNull(mrTask.getPartitioner());
// Only a task commit happens, hence the data is still in the temporary directory.
Path reduceOutputDir = new Path(new Path(workDir, "output"), "_temporary/0/" + IDConverter.toMRTaskIdForOutput(TezTestUtils.getMockTaskId(0, 1, 0)));
Path reduceOutputFile = new Path(reduceOutputDir, "part-v001-o000-00000");
SequenceFile.Reader reader = new SequenceFile.Reader(localFs, reduceOutputFile, jobConf);
LongWritable key = new LongWritable();
Text value = new Text();
long prev = Long.MIN_VALUE;
while (reader.next(key, value)) {
if (prev != Long.MIN_VALUE) {
Assert.assertTrue(prev < key.get());
prev = key.get();
}
}
reader.close();
}
use of org.apache.tez.hadoop.shim.DefaultHadoopShim in project tez by apache.
the class TestTaskExecution2 method createTaskRunner.
private TezTaskRunner2 createTaskRunner(ApplicationId appId, TaskExecutionTestHelpers.TezTaskUmbilicalForTest umbilical, TaskReporter taskReporter, ListeningExecutorService executor, String processorClass, byte[] processorConf, boolean testRunner, boolean updateSysCounters) throws IOException {
TezConfiguration tezConf = new TezConfiguration(defaultConf);
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
Path testDir = new Path(workDir, UUID.randomUUID().toString());
String[] localDirs = new String[] { testDir.toString() };
TezDAGID dagId = TezDAGID.getInstance(appId, 1);
TezVertexID vertexId = TezVertexID.getInstance(dagId, 1);
TezTaskID taskId = TezTaskID.getInstance(vertexId, 1);
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 1);
ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create(processorClass).setUserPayload(UserPayload.create(ByteBuffer.wrap(processorConf)));
TaskSpec taskSpec = new TaskSpec(taskAttemptId, "dagName", "vertexName", -1, processorDescriptor, new ArrayList<InputSpec>(), new ArrayList<OutputSpec>(), null, null);
TezExecutors sharedExecutor = new TezSharedExecutor(tezConf);
TezTaskRunner2 taskRunner;
if (testRunner) {
taskRunner = new TezTaskRunner2ForTest(tezConf, ugi, localDirs, taskSpec, 1, new HashMap<String, ByteBuffer>(), new HashMap<String, String>(), HashMultimap.<String, String>create(), taskReporter, executor, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), updateSysCounters, sharedExecutor);
} else {
taskRunner = new TezTaskRunner2(tezConf, ugi, localDirs, taskSpec, 1, new HashMap<String, ByteBuffer>(), new HashMap<String, String>(), HashMultimap.<String, String>create(), taskReporter, executor, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), updateSysCounters, new DefaultHadoopShim(), sharedExecutor);
}
return taskRunner;
}
Aggregations