use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.
the class VertexInitializedEvent method toProto.
public RecoveryProtos.VertexInitializedProto toProto() throws IOException {
VertexInitializedProto.Builder builder = VertexInitializedProto.newBuilder();
if (additionalInputs != null && !additionalInputs.isEmpty()) {
for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : additionalInputs.values()) {
RootInputLeafOutputProto.Builder inputBuilder = RootInputLeafOutputProto.newBuilder();
inputBuilder.setName(input.getName());
if (input.getControllerDescriptor() != null) {
inputBuilder.setControllerDescriptor(DagTypeConverters.convertToDAGPlan(input.getControllerDescriptor()));
}
inputBuilder.setIODescriptor(DagTypeConverters.convertToDAGPlan(input.getIODescriptor()));
builder.addInputs(inputBuilder.build());
}
}
if (initGeneratedEvents != null && !initGeneratedEvents.isEmpty()) {
for (TezEvent event : initGeneratedEvents) {
builder.addInitGeneratedEvents(TezEventUtils.toProto(event));
}
}
return builder.setVertexId(vertexID.toString()).setVertexName(vertexName).setInitRequestedTime(initRequestedTime).setInitTime(initedTime).setNumTasks(numTasks).build();
}
use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.
the class CartesianProduct method createDAG.
private DAG createDAG(TezConfiguration tezConf) throws IOException {
InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
InputInitializerDescriptor inputInitializerDescriptor = InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
DataSourceDescriptor dataSourceDescriptor = DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);
Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v1.addDataSource(INPUT, dataSourceDescriptor);
Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v2.addDataSource(INPUT, dataSourceDescriptor);
OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
OutputCommitterDescriptor outputCommitterDescriptor = OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
DataSinkDescriptor dataSinkDescriptor = DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);
CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices));
UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
v3.addDataSink(OUTPUT, dataSinkDescriptor);
v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(userPayload));
EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
edgeManagerDescriptor.setUserPayload(userPayload);
UnorderedPartitionedKVEdgeConfig edgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), RoundRobinPartitioner.class.getName()).build();
EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3).addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
use of org.apache.tez.dag.api.InputDescriptor in project hive by apache.
the class DagUtils method createVertex.
/*
* Helper function to create Vertex from MapWork.
*/
private Vertex createVertex(JobConf conf, MapWork mapWork, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType, Map<String, LocalResource> localResources) throws Exception {
// set up the operator plan
Utilities.cacheMapWork(conf, mapWork, mrScratchDir);
// create the directories FileSinkOperators need
Utilities.createTmpDirs(conf, mapWork);
// finally create the vertex
Vertex map = null;
// use tez to combine splits
boolean groupSplitsInInputInitializer;
DataSourceDescriptor dataSource;
int numTasks = -1;
@SuppressWarnings("rawtypes") Class inputFormatClass = conf.getClass("mapred.input.format.class", InputFormat.class);
boolean vertexHasCustomInput = VertexType.isCustomInputType(vertexType);
LOG.info("Vertex has custom input? " + vertexHasCustomInput);
if (vertexHasCustomInput) {
groupSplitsInInputInitializer = false;
// grouping happens in execution phase. The input payload should not enable grouping here,
// it will be enabled in the CustomVertex.
inputFormatClass = HiveInputFormat.class;
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
// mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using
// this plug-in to avoid getting a serialized event at run-time.
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
} else {
// is HiveInputFormat
if (inputFormatClass == HiveInputFormat.class) {
groupSplitsInInputInitializer = true;
} else {
groupSplitsInInputInitializer = false;
}
}
if (mapWork instanceof MergeFileWork) {
Path outputPath = ((MergeFileWork) mapWork).getOutputDir();
// prepare the tmp output directory. The output tmp directory should
// exist before jobClose (before renaming after job completion)
Path tempOutPath = Utilities.toTempPath(outputPath);
try {
FileSystem tmpOutFS = tempOutPath.getFileSystem(conf);
if (!tmpOutFS.exists(tempOutPath)) {
tmpOutFS.mkdirs(tempOutPath);
}
} catch (IOException e) {
throw new RuntimeException("Can't make path " + outputPath + " : " + e.getMessage(), e);
}
}
// remember mapping of plan to input
conf.set(Utilities.INPUT_NAME, mapWork.getName());
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) {
// set up the operator plan. (before setting up splits on the AM)
Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
// the correct plugin.
if (groupSplitsInInputInitializer) {
// Not setting a payload, since the MRInput payload is the same and can be accessed.
InputInitializerDescriptor descriptor = InputInitializerDescriptor.create(HiveSplitGenerator.class.getName());
dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(true).setCustomInitializerDescriptor(descriptor).build();
} else {
// Not HiveInputFormat, or a custom VertexManager will take care of grouping splits
if (vertexHasCustomInput && vertexType == VertexType.MULTI_INPUT_UNINITIALIZED_EDGES) {
// SMB Join.
dataSource = MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
} else {
dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
}
}
} else {
// Setup client side split generation.
// we need to set this, because with HS2 and client side split
// generation we end up not finding the map work. This is
// because of thread local madness (tez split generation is
// multi-threaded - HS2 plan cache uses thread locals). Setting
// VECTOR_MODE/USE_VECTORIZED_INPUT_FILE_FORMAT causes the split gen code to use the conf instead
// of the map work.
conf.setBoolean(Utilities.VECTOR_MODE, mapWork.getVectorMode());
conf.setBoolean(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, mapWork.getUseVectorizedInputFileFormat());
InputSplitInfo inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(conf, false, 0);
InputInitializerDescriptor descriptor = InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName());
InputDescriptor inputDescriptor = InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(conf)).setSplits(inputSplitInfo.getSplitsProto()).build().toByteString().asReadOnlyByteBuffer()));
dataSource = DataSourceDescriptor.create(inputDescriptor, descriptor, null);
numTasks = inputSplitInfo.getNumTasks();
// set up the operator plan. (after generating splits - that changes configs)
Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
}
UserPayload serializedConf = TezUtils.createUserPayloadFromConf(conf);
String procClassName = MapTezProcessor.class.getName();
if (mapWork instanceof MergeFileWork) {
procClassName = MergeFileTezProcessor.class.getName();
}
VertexExecutionContext executionContext = createVertexExecutionContext(mapWork);
map = Vertex.create(mapWork.getName(), ProcessorDescriptor.create(procClassName).setUserPayload(serializedConf), numTasks, getContainerResource(conf));
map.setTaskEnvironment(getContainerEnvironment(conf, true));
map.setExecutionContext(executionContext);
map.setTaskLaunchCmdOpts(getContainerJavaOpts(conf));
assert mapWork.getAliasToWork().keySet().size() == 1;
// Add the actual source input
String alias = mapWork.getAliasToWork().keySet().iterator().next();
map.addDataSource(alias, dataSource);
map.addTaskLocalFiles(localResources);
return map;
}
use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.
the class TestRootInputInitializerManager method testCorrectUgiUsage.
@Test(timeout = 5000)
public void testCorrectUgiUsage() throws TezException, InterruptedException {
Vertex vertex = mock(Vertex.class);
doReturn(mock(TezVertexID.class)).when(vertex).getVertexId();
AppContext appContext = mock(AppContext.class);
doReturn(new DefaultHadoopShim()).when(appContext).getHadoopShim();
doReturn(mock(EventHandler.class)).when(appContext).getEventHandler();
UserGroupInformation dagUgi = UserGroupInformation.createRemoteUser("fakeuser");
StateChangeNotifier stateChangeNotifier = mock(StateChangeNotifier.class);
RootInputInitializerManager rootInputInitializerManager = new RootInputInitializerManager(vertex, appContext, dagUgi, stateChangeNotifier);
InputDescriptor id = mock(InputDescriptor.class);
InputInitializerDescriptor iid = InputInitializerDescriptor.create(InputInitializerForUgiTest.class.getName());
RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> rootInput = new RootInputLeafOutput<>("InputName", id, iid);
rootInputInitializerManager.runInputInitializers(Collections.singletonList(rootInput));
InputInitializerForUgiTest.awaitInitialize();
assertEquals(dagUgi, InputInitializerForUgiTest.ctorUgi);
assertEquals(dagUgi, InputInitializerForUgiTest.initializeUgi);
}
use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.
the class TestRootInputInitializerManager method testEventBeforeSuccess.
// Simple testing. No events if task doesn't succeed.
// Also exercises path where two attempts are reported as successful via the stateChangeNotifier.
// Primarily a failure scenario, when a Task moves back to running from success
// Order event1, success1, event2, success2
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testEventBeforeSuccess() throws Exception {
InputDescriptor id = mock(InputDescriptor.class);
InputInitializerDescriptor iid = mock(InputInitializerDescriptor.class);
RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> rootInput = new RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>("InputName", id, iid);
InputInitializer initializer = mock(InputInitializer.class);
InputInitializerContext initializerContext = mock(InputInitializerContext.class);
Vertex vertex = mock(Vertex.class);
StateChangeNotifier stateChangeNotifier = mock(StateChangeNotifier.class);
AppContext appContext = mock(AppContext.class, RETURNS_DEEP_STUBS);
RootInputInitializerManager.InitializerWrapper initializerWrapper = new RootInputInitializerManager.InitializerWrapper(rootInput, initializer, initializerContext, vertex, stateChangeNotifier, appContext);
ApplicationId appId = ApplicationId.newInstance(1000, 1);
TezDAGID dagId = TezDAGID.getInstance(appId, 1);
TezVertexID srcVertexId = TezVertexID.getInstance(dagId, 2);
TezTaskID srcTaskId1 = TezTaskID.getInstance(srcVertexId, 3);
Vertex srcVertex = mock(Vertex.class);
Task srcTask1 = mock(Task.class);
doReturn(TaskState.RUNNING).when(srcTask1).getState();
doReturn(srcTask1).when(srcVertex).getTask(srcTaskId1.getId());
when(appContext.getCurrentDAG().getVertex(any(String.class))).thenReturn(srcVertex);
String srcVertexName = "srcVertexName";
List<TezEvent> eventList = Lists.newLinkedList();
// First Attempt send event
TezTaskAttemptID srcTaskAttemptId11 = TezTaskAttemptID.getInstance(srcTaskId1, 1);
EventMetaData sourceInfo11 = new EventMetaData(EventMetaData.EventProducerConsumerType.PROCESSOR, srcVertexName, null, srcTaskAttemptId11);
InputInitializerEvent e1 = InputInitializerEvent.create("fakeVertex", "fakeInput", null);
TezEvent te1 = new TezEvent(e1, sourceInfo11);
eventList.add(te1);
initializerWrapper.handleInputInitializerEvents(eventList);
verify(initializer, never()).handleInputInitializerEvent(any(List.class));
eventList.clear();
// First attempt, Task success notification
initializerWrapper.onTaskSucceeded(srcVertexName, srcTaskId1, srcTaskAttemptId11.getId());
ArgumentCaptor<List> argumentCaptor = ArgumentCaptor.forClass(List.class);
verify(initializer, times(1)).handleInputInitializerEvent(argumentCaptor.capture());
List<InputInitializerEvent> invokedEvents = argumentCaptor.getValue();
assertEquals(1, invokedEvents.size());
reset(initializer);
// 2nd attempt send event
TezTaskAttemptID srcTaskAttemptId12 = TezTaskAttemptID.getInstance(srcTaskId1, 2);
EventMetaData sourceInfo12 = new EventMetaData(EventMetaData.EventProducerConsumerType.PROCESSOR, srcVertexName, null, srcTaskAttemptId12);
InputInitializerEvent e2 = InputInitializerEvent.create("fakeVertex", "fakeInput", null);
TezEvent te2 = new TezEvent(e2, sourceInfo12);
eventList.add(te2);
initializerWrapper.handleInputInitializerEvents(eventList);
verify(initializer, never()).handleInputInitializerEvent(any(List.class));
eventList.clear();
reset(initializer);
// 2nd attempt succeeded
initializerWrapper.onTaskSucceeded(srcVertexName, srcTaskId1, srcTaskAttemptId12.getId());
verify(initializer, never()).handleInputInitializerEvent(argumentCaptor.capture());
}
Aggregations