use of org.apache.tez.dag.api.EdgeProperty in project tez by apache.
the class TestFaultTolerance method testCartesianProduct.
/**
* In unpartitioned cartesian product, failure fraction should be #unique failure/#consumer that
* depends on the src task. Here we test a 2x2 cartesian product and let 4th destination task fail.
* The failure fraction limit is configured to be 0.25. So the failure fraction should be 1/2,
* not 1/4.
* @throws Exception
*/
@Test
public void testCartesianProduct() throws Exception {
Configuration dagConf = new Configuration();
dagConf.setDouble(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_FRACTION, 0.25);
DAG dag = DAG.create("dag");
Configuration vertexConf = new Configuration();
vertexConf.setInt(TestProcessor.getVertexConfName(TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_TASK_INDEX, "v3"), 3);
vertexConf.setInt(TestProcessor.getVertexConfName(TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_VALUE, "v3"), 5);
UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(vertexConf);
ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create(TestProcessor.class.getName()).setUserPayload(vertexPayload);
Vertex v1 = Vertex.create("v1", processorDescriptor, 2);
Vertex v2 = Vertex.create("v2", processorDescriptor, 2);
Vertex v3 = Vertex.create("v3", processorDescriptor);
String[] sourceVertices = { "v1", "v2" };
CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices));
TezConfiguration tezConf = new TezConfiguration();
tezConf.setInt(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_NUM_PARTITIONS, 1);
tezConf.setBoolean(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_ENABLE_GROUPING, false);
UserPayload cartesianProductPayload = cartesianProductConfig.toUserPayload(tezConf);
v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(cartesianProductPayload));
EdgeManagerPluginDescriptor edgeManagerPluginDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName()).setUserPayload(cartesianProductPayload);
Configuration inputConf = new Configuration();
inputConf.setBoolean(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v3"), true);
inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v3"), 3);
inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v3"), 0);
inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v3"), 0);
inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v3"), 0);
UserPayload inputPayload = TezUtils.createUserPayloadFromConf(inputConf);
EdgeProperty edgeProperty = EdgeProperty.create(edgeManagerPluginDescriptor, DataMovementType.CUSTOM, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, TestOutput.getOutputDesc(null), TestInput.getInputDesc(inputPayload));
Edge e1 = Edge.create(v1, v3, edgeProperty);
Edge e2 = Edge.create(v2, v3, edgeProperty);
dag.addVertex(v1).addVertex(v2).addVertex(v3);
dag.addEdge(e1).addEdge(e2);
// run dag
runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
}
use of org.apache.tez.dag.api.EdgeProperty in project tez by apache.
the class CartesianProduct method createDAG.
private DAG createDAG(TezConfiguration tezConf) throws IOException {
InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
InputInitializerDescriptor inputInitializerDescriptor = InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
DataSourceDescriptor dataSourceDescriptor = DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);
Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v1.addDataSource(INPUT, dataSourceDescriptor);
Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v2.addDataSource(INPUT, dataSourceDescriptor);
OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
OutputCommitterDescriptor outputCommitterDescriptor = OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
DataSinkDescriptor dataSinkDescriptor = DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);
CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices));
UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
v3.addDataSink(OUTPUT, dataSinkDescriptor);
v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(userPayload));
EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
edgeManagerDescriptor.setUserPayload(userPayload);
UnorderedPartitionedKVEdgeConfig edgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), RoundRobinPartitioner.class.getName()).build();
EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3).addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
use of org.apache.tez.dag.api.EdgeProperty in project hive by apache.
the class CustomPartitionVertex method processAllEvents.
private void processAllEvents(String inputName, Multimap<Integer, InputSplit> bucketToGroupedSplitMap, boolean secondLevelGroupingDone) throws IOException {
int totalInputsCount = 0;
List<Integer> numSplitsForTask = new ArrayList<Integer>();
for (Entry<Integer, Collection<InputSplit>> entry : bucketToGroupedSplitMap.asMap().entrySet()) {
int bucketNum = entry.getKey();
Collection<InputSplit> initialSplits = entry.getValue();
finalSplits.addAll(initialSplits);
for (InputSplit inputSplit : initialSplits) {
bucketToTaskMap.put(bucketNum, taskCount);
if (secondLevelGroupingDone) {
TezGroupedSplit groupedSplit = (TezGroupedSplit) inputSplit;
numSplitsForTask.add(groupedSplit.getGroupedSplits().size());
totalInputsCount += groupedSplit.getGroupedSplits().size();
} else {
numSplitsForTask.add(1);
totalInputsCount += 1;
}
taskCount++;
}
}
inputNameInputSpecMap.put(inputName, InputSpecUpdate.createPerTaskInputSpecUpdate(numSplitsForTask));
// Construct the EdgeManager descriptor to be used by all edges which need
// the routing table.
EdgeManagerPluginDescriptor hiveEdgeManagerDesc = null;
if ((vertexType == VertexType.MULTI_INPUT_INITIALIZED_EDGES) || (vertexType == VertexType.INITIALIZED_EDGES)) {
hiveEdgeManagerDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
UserPayload payload = getBytePayload(bucketToTaskMap);
hiveEdgeManagerDesc.setUserPayload(payload);
}
// Replace the edge manager for all vertices which have routing type custom.
for (Entry<String, EdgeProperty> edgeEntry : context.getInputVertexEdgeProperties().entrySet()) {
if (edgeEntry.getValue().getDataMovementType() == DataMovementType.CUSTOM && edgeEntry.getValue().getEdgeManagerDescriptor().getClassName().equals(CustomPartitionEdge.class.getName())) {
emMap.put(edgeEntry.getKey(), hiveEdgeManagerDesc);
}
}
LOG.info("Task count is " + taskCount + " for input name: " + inputName);
List<InputDataInformationEvent> taskEvents = Lists.newArrayListWithCapacity(totalInputsCount);
// Re-serialize the splits after grouping.
int count = 0;
for (InputSplit inputSplit : finalSplits) {
if (secondLevelGroupingDone) {
TezGroupedSplit tezGroupedSplit = (TezGroupedSplit) inputSplit;
for (InputSplit subSplit : tezGroupedSplit.getGroupedSplits()) {
if ((subSplit instanceof TezGroupedSplit) == false) {
throw new IOException("Unexpected split type found: " + subSplit.getClass().getCanonicalName());
}
MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(subSplit);
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
diEvent.setTargetIndex(count);
taskEvents.add(diEvent);
}
} else {
MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(inputSplit);
InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
diEvent.setTargetIndex(count);
taskEvents.add(diEvent);
}
count++;
}
// Set the actual events for the tasks.
LOG.info("For input name: " + inputName + " task events size is " + taskEvents.size());
context.addRootInputEvents(inputName, taskEvents);
if (!inputToGroupedSplitMap.isEmpty()) {
for (Entry<String, Multimap<Integer, InputSplit>> entry : inputToGroupedSplitMap.entrySet()) {
processAllSideEvents(entry.getKey(), entry.getValue());
}
setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
inputToGroupedSplitMap.clear();
}
// Only done when it is a bucket map join only no SMB.
if (numInputsAffectingRootInputSpecUpdate == 1) {
setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
// Send the bucket IDs associated with the tasks, must happen after parallelism is set.
sendBucketIdsToProcessor();
}
}
use of org.apache.tez.dag.api.EdgeProperty in project tez by apache.
the class TestVertexImpl method testSetCustomEdgeManager.
@Test(timeout = 5000)
public void testSetCustomEdgeManager() throws Exception {
// Vertex5 linked to v3 (v3 src, v5 dest)
VertexImpl v5 = vertices.get("vertex5");
v5.vertexReconfigurationPlanned();
initAllVertices(VertexState.INITED);
Edge edge = edges.get("e4");
EdgeManagerPlugin em = edge.getEdgeManager();
EdgeManagerForTest originalEm = (EdgeManagerForTest) em;
assertTrue(Arrays.equals(edgePayload, originalEm.getEdgeManagerContext().getUserPayload().deepCopyAsArray()));
UserPayload userPayload = UserPayload.create(ByteBuffer.wrap(new String("foo").getBytes()));
EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(EdgeManagerForTest.class.getName());
edgeManagerDescriptor.setUserPayload(userPayload);
EdgeProperty edgeProp = EdgeProperty.create(edgeManagerDescriptor, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In"));
Vertex v3 = vertices.get("vertex3");
Map<String, EdgeProperty> edgeManagerDescriptors = Collections.singletonMap(v3.getName(), edgeProp);
v5.reconfigureVertex(v5.getTotalTasks() - 1, null, edgeManagerDescriptors);
v5.doneReconfiguringVertex();
VertexImpl v5Impl = (VertexImpl) v5;
EdgeManagerPlugin modifiedEdgeManager = v5Impl.sourceVertices.get(v3).getEdgeManager();
Assert.assertNotNull(modifiedEdgeManager);
assertTrue(modifiedEdgeManager instanceof EdgeManagerForTest);
// Ensure initialize() is called with the correct payload
assertTrue(Arrays.equals(userPayload.deepCopyAsArray(), ((EdgeManagerForTest) modifiedEdgeManager).getUserPayload().deepCopyAsArray()));
}
use of org.apache.tez.dag.api.EdgeProperty in project tez by apache.
the class TestVertexImpl method testVertexSetParallelismDecrease.
@Test(timeout = 5000)
public void testVertexSetParallelismDecrease() throws Exception {
VertexImpl v3 = vertices.get("vertex3");
v3.vertexReconfigurationPlanned();
initAllVertices(VertexState.INITED);
Assert.assertEquals(2, v3.getTotalTasks());
Assert.assertEquals(2, v3.getTasks().size());
VertexImpl v1 = vertices.get("vertex1");
startVertex(vertices.get("vertex2"));
startVertex(v1);
EdgeManagerPluginDescriptor mockEdgeManagerDescriptor = EdgeManagerPluginDescriptor.create(EdgeManagerForTest.class.getName());
EdgeProperty edgeProp = EdgeProperty.create(mockEdgeManagerDescriptor, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In"));
Map<String, EdgeProperty> edgeManagerDescriptors = Collections.singletonMap(v1.getName(), edgeProp);
v3.reconfigureVertex(1, null, edgeManagerDescriptors);
v3.doneReconfiguringVertex();
assertTrue(v3.sourceVertices.get(v1).getEdgeManager() instanceof EdgeManagerForTest);
checkTasks(v3, 1);
}
Aggregations