use of org.apache.tez.runtime.api.impl.InputSpec in project tez by apache.
the class TestMapProcessor method testMapProcessor.
@Test(timeout = 5000)
public void testMapProcessor() throws Exception {
String dagName = "mrdag0";
String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
JobConf jobConf = new JobConf(defaultConf);
setUpJobConf(jobConf);
MRHelpers.translateMRConfToTez(jobConf);
jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
Path mapInput = new Path(workDir, "map0");
MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10);
InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
task.initialize();
task.run();
task.close();
sharedExecutor.shutdownNow();
OutputContext outputContext = task.getOutputContexts().iterator().next();
TezTaskOutput mapOutputs = new TezTaskOutputFiles(jobConf, outputContext.getUniqueIdentifier(), outputContext.getDagIdentifier());
// TODO NEWTEZ FIXME OutputCommitter verification
// MRTask mrTask = (MRTask)t.getProcessor();
// Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask
// .getCommitter().getClass().getName());
// t.close();
Path mapOutputFile = getMapOutputFile(jobConf, outputContext);
LOG.info("mapOutputFile = " + mapOutputFile);
IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1);
LongWritable key = new LongWritable();
Text value = new Text();
DataInputBuffer keyBuf = new DataInputBuffer();
DataInputBuffer valueBuf = new DataInputBuffer();
long prev = Long.MIN_VALUE;
while (reader.nextRawKey(keyBuf)) {
reader.nextRawValue(valueBuf);
key.readFields(keyBuf);
value.readFields(valueBuf);
if (prev != Long.MIN_VALUE) {
assert (prev <= key.get());
prev = key.get();
}
LOG.info("key = " + key.get() + "; value = " + value);
}
reader.close();
}
use of org.apache.tez.runtime.api.impl.InputSpec in project tez by apache.
the class TestReduceProcessor method testReduceProcessor.
@Test(timeout = 5000)
public void testReduceProcessor() throws Exception {
final String dagName = "mrdag0";
String mapVertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
String reduceVertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
JobConf jobConf = new JobConf(defaultConf);
setUpJobConf(jobConf);
MRHelpers.translateMRConfToTez(jobConf);
jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
Path mapInput = new Path(workDir, "map0");
MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10);
InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build().toByteArray()))), 1);
OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
// Run a map
TestUmbilical testUmbilical = new TestUmbilical();
TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
LogicalIOProcessorRuntimeTask mapTask = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, mapInput, testUmbilical, dagName, mapVertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor);
mapTask.initialize();
mapTask.run();
mapTask.close();
// One VME, One DME
Assert.assertEquals(2, testUmbilical.getEvents().size());
Assert.assertEquals(EventType.VERTEX_MANAGER_EVENT, testUmbilical.getEvents().get(0).getEventType());
Assert.assertEquals(EventType.COMPOSITE_DATA_MOVEMENT_EVENT, testUmbilical.getEvents().get(1).getEventType());
CompositeDataMovementEvent cdmEvent = (CompositeDataMovementEvent) testUmbilical.getEvents().get(1).getEvent();
Assert.assertEquals(1, cdmEvent.getCount());
DataMovementEvent dme = cdmEvent.getEvents().iterator().next();
dme.setTargetIndex(0);
LOG.info("Starting reduce...");
JobTokenIdentifier identifier = new JobTokenIdentifier(new Text(dagName));
JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>(identifier, jobTokenSecretManager);
shuffleToken.setService(identifier.getJobId());
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString());
jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
FileOutputFormat.setOutputPath(jobConf, new Path(workDir, "output"));
ProcessorDescriptor reduceProcessorDesc = ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf));
InputSpec reduceInputSpec = new InputSpec(mapVertexName, InputDescriptor.create(OrderedGroupedInputLegacy.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
OutputSpec reduceOutputSpec = new OutputSpec("NullDestinationVertex", OutputDescriptor.create(MROutputLegacy.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);
// Now run a reduce
TaskSpec taskSpec = new TaskSpec(TezTestUtils.getMockTaskAttemptId(0, 1, 0, 0), dagName, reduceVertexName, -1, reduceProcessorDesc, Collections.singletonList(reduceInputSpec), Collections.singletonList(reduceOutputSpec), null, null);
Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
String auxiliaryService = jobConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT);
serviceConsumerMetadata.put(auxiliaryService, ShuffleUtils.convertJobTokenToBytes(shuffleToken));
Map<String, String> serviceProviderEnvMap = new HashMap<String, String>();
ByteBuffer shufflePortBb = ByteBuffer.allocate(4).putInt(0, 8000);
AuxiliaryServiceHelper.setServiceDataIntoEnv(auxiliaryService, shufflePortBb, serviceProviderEnvMap);
LogicalIOProcessorRuntimeTask task = new LogicalIOProcessorRuntimeTask(taskSpec, 0, jobConf, new String[] { workDir.toString() }, new TestUmbilical(), serviceConsumerMetadata, serviceProviderEnvMap, HashMultimap.<String, String>create(), null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor);
List<Event> destEvents = new LinkedList<Event>();
destEvents.add(dme);
task.initialize();
OrderedGroupedInputLegacy sortedOut = (OrderedGroupedInputLegacy) task.getInputs().values().iterator().next();
sortedOut.handleEvents(destEvents);
task.run();
task.close();
sharedExecutor.shutdownNow();
// MRTask mrTask = (MRTask)t.getProcessor();
// TODO NEWTEZ Verify the partitioner has not been created
// Likely not applicable anymore.
// Assert.assertNull(mrTask.getPartitioner());
// Only a task commit happens, hence the data is still in the temporary directory.
Path reduceOutputDir = new Path(new Path(workDir, "output"), "_temporary/0/" + IDConverter.toMRTaskIdForOutput(TezTestUtils.getMockTaskId(0, 1, 0)));
Path reduceOutputFile = new Path(reduceOutputDir, "part-v001-o000-00000");
SequenceFile.Reader reader = new SequenceFile.Reader(localFs, reduceOutputFile, jobConf);
LongWritable key = new LongWritable();
Text value = new Text();
long prev = Long.MIN_VALUE;
while (reader.next(key, value)) {
if (prev != Long.MIN_VALUE) {
Assert.assertTrue(prev < key.get());
prev = key.get();
}
}
reader.close();
}
use of org.apache.tez.runtime.api.impl.InputSpec in project tez by apache.
the class LogicalIOProcessorRuntimeTask method initialize.
/**
* @throws Exception
*/
public void initialize() throws Exception {
Preconditions.checkState(this.state.get() == State.NEW, "Already initialized");
this.state.set(State.INITED);
this.processorContext = createProcessorContext();
this.processor = createProcessor(processorDescriptor.getClassName(), processorContext);
if (initializeProcessorFirst || initializeProcessorIOSerially) {
// Initialize processor in the current thread.
initializeLogicalIOProcessor();
}
int numTasks = 0;
int inputIndex = 0;
for (InputSpec inputSpec : taskSpec.getInputs()) {
this.initializerCompletionService.submit(new InitializeInputCallable(inputSpec, inputIndex++));
numTasks++;
}
int outputIndex = 0;
for (OutputSpec outputSpec : taskSpec.getOutputs()) {
this.initializerCompletionService.submit(new InitializeOutputCallable(outputSpec, outputIndex++));
numTasks++;
}
if (!(initializeProcessorFirst || initializeProcessorIOSerially)) {
// Initialize processor in the current thread.
initializeLogicalIOProcessor();
}
int completedTasks = 0;
while (completedTasks < numTasks) {
LOG.info("Waiting for " + (numTasks - completedTasks) + " initializers to finish");
Future<Void> future = initializerCompletionService.take();
try {
future.get();
completedTasks++;
} catch (ExecutionException e) {
if (e.getCause() instanceof Exception) {
throw (Exception) e.getCause();
} else {
throw new Exception(e);
}
}
}
LOG.info("All initializers finished");
// group inputs depend on inputs beings initialized. So must be done after.
initializeGroupInputs();
// Register the groups so that appropriate calls can be made.
this.inputReadyTracker.setGroupedInputs(groupInputsMap == null ? null : groupInputsMap.values());
// Grouped input start will be controlled by the start of the GroupedInput
// Construct the set of groupedInputs up front so that start is not invoked on them.
Set<String> groupInputs = Sets.newHashSet();
// first add the group inputs
if (groupInputSpecs != null && !groupInputSpecs.isEmpty()) {
for (GroupInputSpec groupInputSpec : groupInputSpecs) {
runInputMap.put(groupInputSpec.getGroupName(), groupInputsMap.get(groupInputSpec.getGroupName()));
groupInputs.addAll(groupInputSpec.getGroupVertices());
}
}
initialMemoryDistributor.makeInitialAllocations();
LOG.info("Starting Inputs/Outputs");
int numAutoStarts = 0;
for (InputSpec inputSpec : inputSpecs) {
if (groupInputs.contains(inputSpec.getSourceVertexName())) {
LOG.info("Ignoring " + inputSpec.getSourceVertexName() + " for start, since it will be controlled via it's Group");
continue;
}
if (!inputAlreadyStarted(taskSpec.getVertexName(), inputSpec.getSourceVertexName())) {
startedInputsMap.put(taskSpec.getVertexName(), inputSpec.getSourceVertexName());
numAutoStarts++;
this.initializerCompletionService.submit(new StartInputCallable(inputsMap.get(inputSpec.getSourceVertexName()), inputSpec.getSourceVertexName()));
LOG.info("Input: " + inputSpec.getSourceVertexName() + " being auto started by the framework. Subsequent instances will not be auto-started");
}
}
if (groupInputSpecs != null) {
for (GroupInputSpec group : groupInputSpecs) {
if (!inputAlreadyStarted(taskSpec.getVertexName(), group.getGroupName())) {
numAutoStarts++;
this.initializerCompletionService.submit(new StartInputCallable(groupInputsMap.get(group.getGroupName()), group.getGroupName()));
LOG.info("InputGroup: " + group.getGroupName() + " being auto started by the framework. Subsequent instance will not be auto-started");
}
}
}
// Shutdown after all tasks complete.
this.initializerExecutor.shutdown();
completedTasks = 0;
LOG.info("Num IOs determined for AutoStart: " + numAutoStarts);
while (completedTasks < numAutoStarts) {
LOG.info("Waiting for " + (numAutoStarts - completedTasks) + " IOs to start");
Future<Void> future = initializerCompletionService.take();
try {
future.get();
completedTasks++;
} catch (ExecutionException e) {
if (e.getCause() instanceof Exception) {
throw (Exception) e.getCause();
} else {
throw new Exception(e);
}
}
}
LOG.info("AutoStartComplete");
// then add the non-grouped inputs
for (InputSpec inputSpec : inputSpecs) {
if (!groupInputs.contains(inputSpec.getSourceVertexName())) {
LogicalInput input = inputsMap.get(inputSpec.getSourceVertexName());
runInputMap.put(inputSpec.getSourceVertexName(), input);
}
}
for (OutputSpec outputSpec : outputSpecs) {
LogicalOutput output = outputsMap.get(outputSpec.getDestinationVertexName());
String outputName = outputSpec.getDestinationVertexName();
runOutputMap.put(outputName, output);
}
// TODO Maybe close initialized inputs / outputs in case of failure to
// initialize.
startRouterThread();
}
use of org.apache.tez.runtime.api.impl.InputSpec in project tez by apache.
the class TestTaskExecution2 method createTaskRunner.
private TezTaskRunner2 createTaskRunner(ApplicationId appId, TaskExecutionTestHelpers.TezTaskUmbilicalForTest umbilical, TaskReporter taskReporter, ListeningExecutorService executor, String processorClass, byte[] processorConf, boolean testRunner, boolean updateSysCounters) throws IOException {
TezConfiguration tezConf = new TezConfiguration(defaultConf);
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
Path testDir = new Path(workDir, UUID.randomUUID().toString());
String[] localDirs = new String[] { testDir.toString() };
TezDAGID dagId = TezDAGID.getInstance(appId, 1);
TezVertexID vertexId = TezVertexID.getInstance(dagId, 1);
TezTaskID taskId = TezTaskID.getInstance(vertexId, 1);
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 1);
ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create(processorClass).setUserPayload(UserPayload.create(ByteBuffer.wrap(processorConf)));
TaskSpec taskSpec = new TaskSpec(taskAttemptId, "dagName", "vertexName", -1, processorDescriptor, new ArrayList<InputSpec>(), new ArrayList<OutputSpec>(), null, null);
TezExecutors sharedExecutor = new TezSharedExecutor(tezConf);
TezTaskRunner2 taskRunner;
if (testRunner) {
taskRunner = new TezTaskRunner2ForTest(tezConf, ugi, localDirs, taskSpec, 1, new HashMap<String, ByteBuffer>(), new HashMap<String, String>(), HashMultimap.<String, String>create(), taskReporter, executor, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), updateSysCounters, sharedExecutor);
} else {
taskRunner = new TezTaskRunner2(tezConf, ugi, localDirs, taskSpec, 1, new HashMap<String, ByteBuffer>(), new HashMap<String, String>(), HashMultimap.<String, String>create(), taskReporter, executor, null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), updateSysCounters, new DefaultHadoopShim(), sharedExecutor);
}
return taskRunner;
}
use of org.apache.tez.runtime.api.impl.InputSpec in project tez by apache.
the class TestMockDAGAppMaster method testBasicCounters.
@Test(timeout = 10000)
public void testBasicCounters() throws Exception {
TezConfiguration tezconf = new TezConfiguration(defaultConf);
MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null, false, false);
tezClient.start();
final String vAName = "A";
final String vBName = "B";
final String procCounterName = "Proc";
final String globalCounterName = "Global";
DAG dag = DAG.create("testBasicCounters");
Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 10);
Vertex vB = Vertex.create(vBName, ProcessorDescriptor.create("Proc.class"), 1);
dag.addVertex(vA).addVertex(vB).addEdge(Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
TezCounters temp = new TezCounters();
temp.findCounter(new String(globalCounterName), new String(globalCounterName)).increment(1);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutput out = new DataOutputStream(bos);
temp.write(out);
final byte[] payload = bos.toByteArray();
MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
mockLauncher.startScheduling(false);
mockApp.countersDelegate = new CountersDelegate() {
@Override
public TezCounters getCounters(TaskSpec taskSpec) {
String vName = taskSpec.getVertexName();
TezCounters counters = new TezCounters();
final DataInputByteBuffer in = new DataInputByteBuffer();
in.reset(ByteBuffer.wrap(payload));
try {
// this ensures that the serde code path is covered.
// the internal merges of counters covers the constructor code path.
counters.readFields(in);
} catch (IOException e) {
Assert.fail(e.getMessage());
}
counters.findCounter(vName, procCounterName).increment(1);
for (OutputSpec output : taskSpec.getOutputs()) {
counters.findCounter(vName, output.getDestinationVertexName()).increment(1);
}
for (InputSpec input : taskSpec.getInputs()) {
counters.findCounter(vName, input.getSourceVertexName()).increment(1);
}
return counters;
}
};
mockApp.doSleep = false;
DAGClient dagClient = tezClient.submitDAG(dag);
mockLauncher.waitTillContainersLaunched();
DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
mockLauncher.startScheduling(true);
DAGStatus status = dagClient.waitForCompletion();
Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
TezCounters counters = dagImpl.getAllCounters();
String osName = System.getProperty("os.name").toLowerCase(Locale.ENGLISH);
if (SystemUtils.IS_OS_LINUX) {
Assert.assertTrue(counters.findCounter(DAGCounter.AM_CPU_MILLISECONDS).getValue() > 0);
}
// verify processor counters
Assert.assertEquals(10, counters.findCounter(vAName, procCounterName).getValue());
Assert.assertEquals(1, counters.findCounter(vBName, procCounterName).getValue());
// verify edge counters
Assert.assertEquals(10, counters.findCounter(vAName, vBName).getValue());
Assert.assertEquals(1, counters.findCounter(vBName, vAName).getValue());
// verify global counters
Assert.assertEquals(11, counters.findCounter(globalCounterName, globalCounterName).getValue());
VertexImpl vAImpl = (VertexImpl) dagImpl.getVertex(vAName);
VertexImpl vBImpl = (VertexImpl) dagImpl.getVertex(vBName);
TezCounters vACounters = vAImpl.getAllCounters();
TezCounters vBCounters = vBImpl.getAllCounters();
String vACounterName = vACounters.findCounter(globalCounterName, globalCounterName).getName();
String vBCounterName = vBCounters.findCounter(globalCounterName, globalCounterName).getName();
if (vACounterName != vBCounterName) {
Assert.fail("String counter name objects dont match despite interning.");
}
CounterGroup vaGroup = vACounters.getGroup(globalCounterName);
String vaGrouName = vaGroup.getName();
CounterGroup vBGroup = vBCounters.getGroup(globalCounterName);
String vBGrouName = vBGroup.getName();
if (vaGrouName != vBGrouName) {
Assert.fail("String group name objects dont match despite interning.");
}
tezClient.stop();
}
Aggregations