Search in sources :

Example 31 with EdgeProperty

use of org.apache.tez.dag.api.EdgeProperty in project tez by apache.

the class TestFaultTolerance method testCartesianProduct.

/**
 * In unpartitioned cartesian product, failure fraction should be #unique failure/#consumer that
 * depends on the src task. Here we test a 2x2 cartesian product and let 4th destination task fail.
 * The failure fraction limit is configured to be 0.25. So the failure fraction should be 1/2,
 * not 1/4.
 * @throws Exception
 */
@Test
public void testCartesianProduct() throws Exception {
    Configuration dagConf = new Configuration();
    dagConf.setDouble(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_FRACTION, 0.25);
    DAG dag = DAG.create("dag");
    Configuration vertexConf = new Configuration();
    vertexConf.setInt(TestProcessor.getVertexConfName(TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_TASK_INDEX, "v3"), 3);
    vertexConf.setInt(TestProcessor.getVertexConfName(TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_VALUE, "v3"), 5);
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(vertexConf);
    ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create(TestProcessor.class.getName()).setUserPayload(vertexPayload);
    Vertex v1 = Vertex.create("v1", processorDescriptor, 2);
    Vertex v2 = Vertex.create("v2", processorDescriptor, 2);
    Vertex v3 = Vertex.create("v3", processorDescriptor);
    String[] sourceVertices = { "v1", "v2" };
    CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices));
    TezConfiguration tezConf = new TezConfiguration();
    tezConf.setInt(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_NUM_PARTITIONS, 1);
    tezConf.setBoolean(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_ENABLE_GROUPING, false);
    UserPayload cartesianProductPayload = cartesianProductConfig.toUserPayload(tezConf);
    v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(cartesianProductPayload));
    EdgeManagerPluginDescriptor edgeManagerPluginDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName()).setUserPayload(cartesianProductPayload);
    Configuration inputConf = new Configuration();
    inputConf.setBoolean(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v3"), true);
    inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v3"), 3);
    inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v3"), 0);
    inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v3"), 0);
    inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v3"), 0);
    UserPayload inputPayload = TezUtils.createUserPayloadFromConf(inputConf);
    EdgeProperty edgeProperty = EdgeProperty.create(edgeManagerPluginDescriptor, DataMovementType.CUSTOM, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, TestOutput.getOutputDesc(null), TestInput.getInputDesc(inputPayload));
    Edge e1 = Edge.create(v1, v3, edgeProperty);
    Edge e2 = Edge.create(v2, v3, edgeProperty);
    dag.addVertex(v1).addVertex(v2).addVertex(v3);
    dag.addEdge(e1).addEdge(e2);
    // run dag
    runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) UserPayload(org.apache.tez.dag.api.UserPayload) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) CartesianProductVertexManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager) SixLevelsFailingDAG(org.apache.tez.test.dag.SixLevelsFailingDAG) SimpleReverseVTestDAG(org.apache.tez.test.dag.SimpleReverseVTestDAG) TwoLevelsFailingDAG(org.apache.tez.test.dag.TwoLevelsFailingDAG) ThreeLevelsFailingDAG(org.apache.tez.test.dag.ThreeLevelsFailingDAG) DAG(org.apache.tez.dag.api.DAG) SimpleVTestDAG(org.apache.tez.test.dag.SimpleVTestDAG) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) CartesianProductConfig(org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig) Edge(org.apache.tez.dag.api.Edge) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 32 with EdgeProperty

use of org.apache.tez.dag.api.EdgeProperty in project tez by apache.

the class CartesianProduct method createDAG.

private DAG createDAG(TezConfiguration tezConf) throws IOException {
    InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
    InputInitializerDescriptor inputInitializerDescriptor = InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
    DataSourceDescriptor dataSourceDescriptor = DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);
    Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
    v1.addDataSource(INPUT, dataSourceDescriptor);
    Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
    v2.addDataSource(INPUT, dataSourceDescriptor);
    OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
    OutputCommitterDescriptor outputCommitterDescriptor = OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
    DataSinkDescriptor dataSinkDescriptor = DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);
    CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices));
    UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
    Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
    v3.addDataSink(OUTPUT, dataSinkDescriptor);
    v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(userPayload));
    EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
    edgeManagerDescriptor.setUserPayload(userPayload);
    UnorderedPartitionedKVEdgeConfig edgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), RoundRobinPartitioner.class.getName()).build();
    EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
    return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3).addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Vertex(org.apache.tez.dag.api.Vertex) UserPayload(org.apache.tez.dag.api.UserPayload) OutputCommitterDescriptor(org.apache.tez.dag.api.OutputCommitterDescriptor) CartesianProductVertexManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager) DataSinkDescriptor(org.apache.tez.dag.api.DataSinkDescriptor) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) CartesianProductEdgeManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) UnorderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) CartesianProductConfig(org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor)

Example 33 with EdgeProperty

use of org.apache.tez.dag.api.EdgeProperty in project hive by apache.

the class CustomPartitionVertex method processAllEvents.

private void processAllEvents(String inputName, Multimap<Integer, InputSplit> bucketToGroupedSplitMap, boolean secondLevelGroupingDone) throws IOException {
    int totalInputsCount = 0;
    List<Integer> numSplitsForTask = new ArrayList<Integer>();
    for (Entry<Integer, Collection<InputSplit>> entry : bucketToGroupedSplitMap.asMap().entrySet()) {
        int bucketNum = entry.getKey();
        Collection<InputSplit> initialSplits = entry.getValue();
        finalSplits.addAll(initialSplits);
        for (InputSplit inputSplit : initialSplits) {
            bucketToTaskMap.put(bucketNum, taskCount);
            if (secondLevelGroupingDone) {
                TezGroupedSplit groupedSplit = (TezGroupedSplit) inputSplit;
                numSplitsForTask.add(groupedSplit.getGroupedSplits().size());
                totalInputsCount += groupedSplit.getGroupedSplits().size();
            } else {
                numSplitsForTask.add(1);
                totalInputsCount += 1;
            }
            taskCount++;
        }
    }
    inputNameInputSpecMap.put(inputName, InputSpecUpdate.createPerTaskInputSpecUpdate(numSplitsForTask));
    // Construct the EdgeManager descriptor to be used by all edges which need
    // the routing table.
    EdgeManagerPluginDescriptor hiveEdgeManagerDesc = null;
    if ((vertexType == VertexType.MULTI_INPUT_INITIALIZED_EDGES) || (vertexType == VertexType.INITIALIZED_EDGES)) {
        hiveEdgeManagerDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
        UserPayload payload = getBytePayload(bucketToTaskMap);
        hiveEdgeManagerDesc.setUserPayload(payload);
    }
    // Replace the edge manager for all vertices which have routing type custom.
    for (Entry<String, EdgeProperty> edgeEntry : context.getInputVertexEdgeProperties().entrySet()) {
        if (edgeEntry.getValue().getDataMovementType() == DataMovementType.CUSTOM && edgeEntry.getValue().getEdgeManagerDescriptor().getClassName().equals(CustomPartitionEdge.class.getName())) {
            emMap.put(edgeEntry.getKey(), hiveEdgeManagerDesc);
        }
    }
    LOG.info("Task count is " + taskCount + " for input name: " + inputName);
    List<InputDataInformationEvent> taskEvents = Lists.newArrayListWithCapacity(totalInputsCount);
    // Re-serialize the splits after grouping.
    int count = 0;
    for (InputSplit inputSplit : finalSplits) {
        if (secondLevelGroupingDone) {
            TezGroupedSplit tezGroupedSplit = (TezGroupedSplit) inputSplit;
            for (InputSplit subSplit : tezGroupedSplit.getGroupedSplits()) {
                if ((subSplit instanceof TezGroupedSplit) == false) {
                    throw new IOException("Unexpected split type found: " + subSplit.getClass().getCanonicalName());
                }
                MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(subSplit);
                InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
                diEvent.setTargetIndex(count);
                taskEvents.add(diEvent);
            }
        } else {
            MRSplitProto serializedSplit = MRInputHelpers.createSplitProto(inputSplit);
            InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload(count, serializedSplit.toByteString().asReadOnlyByteBuffer());
            diEvent.setTargetIndex(count);
            taskEvents.add(diEvent);
        }
        count++;
    }
    // Set the actual events for the tasks.
    LOG.info("For input name: " + inputName + " task events size is " + taskEvents.size());
    context.addRootInputEvents(inputName, taskEvents);
    if (!inputToGroupedSplitMap.isEmpty()) {
        for (Entry<String, Multimap<Integer, InputSplit>> entry : inputToGroupedSplitMap.entrySet()) {
            processAllSideEvents(entry.getKey(), entry.getValue());
        }
        setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
        inputToGroupedSplitMap.clear();
    }
    // Only done when it is a bucket map join only no SMB.
    if (numInputsAffectingRootInputSpecUpdate == 1) {
        setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
        // Send the bucket IDs associated with the tasks, must happen after parallelism is set.
        sendBucketIdsToProcessor();
    }
}
Also used : UserPayload(org.apache.tez.dag.api.UserPayload) TezGroupedSplit(org.apache.hadoop.mapred.split.TezGroupedSplit) ByteString(com.google.protobuf.ByteString) IOException(java.io.IOException) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Multimap(com.google.common.collect.Multimap) HashMultimap(com.google.common.collect.HashMultimap) LinkedListMultimap(com.google.common.collect.LinkedListMultimap) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) InputSplit(org.apache.hadoop.mapred.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)

Example 34 with EdgeProperty

use of org.apache.tez.dag.api.EdgeProperty in project tez by apache.

the class TestVertexImpl method testSetCustomEdgeManager.

@Test(timeout = 5000)
public void testSetCustomEdgeManager() throws Exception {
    // Vertex5 linked to v3 (v3 src, v5 dest)
    VertexImpl v5 = vertices.get("vertex5");
    v5.vertexReconfigurationPlanned();
    initAllVertices(VertexState.INITED);
    Edge edge = edges.get("e4");
    EdgeManagerPlugin em = edge.getEdgeManager();
    EdgeManagerForTest originalEm = (EdgeManagerForTest) em;
    assertTrue(Arrays.equals(edgePayload, originalEm.getEdgeManagerContext().getUserPayload().deepCopyAsArray()));
    UserPayload userPayload = UserPayload.create(ByteBuffer.wrap(new String("foo").getBytes()));
    EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(EdgeManagerForTest.class.getName());
    edgeManagerDescriptor.setUserPayload(userPayload);
    EdgeProperty edgeProp = EdgeProperty.create(edgeManagerDescriptor, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In"));
    Vertex v3 = vertices.get("vertex3");
    Map<String, EdgeProperty> edgeManagerDescriptors = Collections.singletonMap(v3.getName(), edgeProp);
    v5.reconfigureVertex(v5.getTotalTasks() - 1, null, edgeManagerDescriptors);
    v5.doneReconfiguringVertex();
    VertexImpl v5Impl = (VertexImpl) v5;
    EdgeManagerPlugin modifiedEdgeManager = v5Impl.sourceVertices.get(v3).getEdgeManager();
    Assert.assertNotNull(modifiedEdgeManager);
    assertTrue(modifiedEdgeManager instanceof EdgeManagerForTest);
    // Ensure initialize() is called with the correct payload
    assertTrue(Arrays.equals(userPayload.deepCopyAsArray(), ((EdgeManagerForTest) modifiedEdgeManager).getUserPayload().deepCopyAsArray()));
}
Also used : Vertex(org.apache.tez.dag.app.dag.Vertex) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) UserPayload(org.apache.tez.dag.api.UserPayload) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) ByteString(com.google.protobuf.ByteString) EdgeManagerPlugin(org.apache.tez.dag.api.EdgeManagerPlugin) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest) VertexManagerPluginForTest(org.apache.tez.test.VertexManagerPluginForTest) Test(org.junit.Test) GraceShuffleVertexManagerForTest(org.apache.tez.test.GraceShuffleVertexManagerForTest) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest)

Example 35 with EdgeProperty

use of org.apache.tez.dag.api.EdgeProperty in project tez by apache.

the class TestVertexImpl method testVertexSetParallelismDecrease.

@Test(timeout = 5000)
public void testVertexSetParallelismDecrease() throws Exception {
    VertexImpl v3 = vertices.get("vertex3");
    v3.vertexReconfigurationPlanned();
    initAllVertices(VertexState.INITED);
    Assert.assertEquals(2, v3.getTotalTasks());
    Assert.assertEquals(2, v3.getTasks().size());
    VertexImpl v1 = vertices.get("vertex1");
    startVertex(vertices.get("vertex2"));
    startVertex(v1);
    EdgeManagerPluginDescriptor mockEdgeManagerDescriptor = EdgeManagerPluginDescriptor.create(EdgeManagerForTest.class.getName());
    EdgeProperty edgeProp = EdgeProperty.create(mockEdgeManagerDescriptor, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out"), InputDescriptor.create("In"));
    Map<String, EdgeProperty> edgeManagerDescriptors = Collections.singletonMap(v1.getName(), edgeProp);
    v3.reconfigureVertex(1, null, edgeManagerDescriptors);
    v3.doneReconfiguringVertex();
    assertTrue(v3.sourceVertices.get(v1).getEdgeManager() instanceof EdgeManagerForTest);
    checkTasks(v3, 1);
}
Also used : EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) ByteString(com.google.protobuf.ByteString) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest) VertexManagerPluginForTest(org.apache.tez.test.VertexManagerPluginForTest) Test(org.junit.Test) GraceShuffleVertexManagerForTest(org.apache.tez.test.GraceShuffleVertexManagerForTest) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest) EdgeManagerForTest(org.apache.tez.test.EdgeManagerForTest)

Aggregations

EdgeProperty (org.apache.tez.dag.api.EdgeProperty)62 Test (org.junit.Test)31 HashMap (java.util.HashMap)28 ByteString (com.google.protobuf.ByteString)19 VertexStateUpdate (org.apache.tez.dag.api.event.VertexStateUpdate)19 EdgeManagerPluginDescriptor (org.apache.tez.dag.api.EdgeManagerPluginDescriptor)16 VertexManagerPluginContext (org.apache.tez.dag.api.VertexManagerPluginContext)15 VertexLocationHint (org.apache.tez.dag.api.VertexLocationHint)14 Configuration (org.apache.hadoop.conf.Configuration)13 Map (java.util.Map)9 EdgeManagerForTest (org.apache.tez.test.EdgeManagerForTest)7 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)6 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)6 Vertex (org.apache.tez.dag.app.dag.Vertex)6 UserPayload (org.apache.tez.dag.api.UserPayload)5 TaskAttemptIdentifier (org.apache.tez.runtime.api.TaskAttemptIdentifier)5 GraceShuffleVertexManagerForTest (org.apache.tez.test.GraceShuffleVertexManagerForTest)5 VertexManagerPluginForTest (org.apache.tez.test.VertexManagerPluginForTest)5 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)4 Vertex (org.apache.tez.dag.api.Vertex)4