Search in sources :

Example 6 with ProcessorDescriptor

use of org.apache.tez.dag.api.ProcessorDescriptor in project tez by apache.

the class TestMemoryDistributor method testScalingProcessor.

@Test(timeout = 5000)
public void testScalingProcessor() throws TezException {
    MemoryDistributor dist = new MemoryDistributor(2, 1, conf);
    dist.setJvmMemory(10000l);
    // First request
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor();
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor();
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - output
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - processor
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    ProcessorContext e4ProcessorContext1 = createTestProcessortContext();
    ProcessorDescriptor e4ProcessorDesc1 = createTestProcessorDescriptor();
    dist.requestMemory(5000, e4Callback, e4ProcessorContext1, e4ProcessorDesc1);
    dist.makeInitialAllocations();
    // Total available: 70% of 10K = 7000
    // 4 requests - 10K, 10K, 5K, 5K
    // Scale down to - 2333.33, 2333.33, 1166.66, 1166.66
    assertTrue(e1Callback.assigned >= 2333 && e1Callback.assigned <= 2334);
    assertTrue(e2Callback.assigned >= 2333 && e2Callback.assigned <= 2334);
    assertTrue(e3Callback.assigned >= 1166 && e3Callback.assigned <= 1167);
    assertTrue(e4Callback.assigned >= 1166 && e4Callback.assigned <= 1167);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) ProcessorContext(org.apache.tez.runtime.api.ProcessorContext) Test(org.junit.Test)

Example 7 with ProcessorDescriptor

use of org.apache.tez.dag.api.ProcessorDescriptor in project tez by apache.

the class TestTaskSpec method testSerDe.

@Test(timeout = 5000)
public void testSerDe() throws IOException {
    ByteBuffer payload = null;
    ProcessorDescriptor procDesc = ProcessorDescriptor.create("proc").setUserPayload(UserPayload.create(payload)).setHistoryText("historyText");
    List<InputSpec> inputSpecs = new ArrayList<>();
    InputSpec inputSpec = new InputSpec("src1", InputDescriptor.create("inputClass"), 10);
    inputSpecs.add(inputSpec);
    List<OutputSpec> outputSpecs = new ArrayList<>();
    OutputSpec outputSpec = new OutputSpec("dest1", OutputDescriptor.create("outputClass"), 999);
    outputSpecs.add(outputSpec);
    List<GroupInputSpec> groupInputSpecs = null;
    Configuration taskConf = new Configuration(false);
    taskConf.set("foo", "bar");
    TezTaskAttemptID taId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(TezVertexID.getInstance(TezDAGID.getInstance("1234", 1, 1), 1), 1), 1);
    TaskSpec taskSpec = new TaskSpec(taId, "dagName", "vName", -1, procDesc, inputSpecs, outputSpecs, groupInputSpecs, taskConf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    DataOutput out = new DataOutputStream(bos);
    taskSpec.write(out);
    TaskSpec deSerTaskSpec = new TaskSpec();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
    DataInput in = new DataInputStream(bis);
    deSerTaskSpec.readFields(in);
    Assert.assertEquals(taskSpec.getDAGName(), deSerTaskSpec.getDAGName());
    Assert.assertEquals(taskSpec.getVertexName(), deSerTaskSpec.getVertexName());
    Assert.assertEquals(taskSpec.getVertexParallelism(), deSerTaskSpec.getVertexParallelism());
    Assert.assertEquals(taskSpec.getInputs().size(), deSerTaskSpec.getInputs().size());
    Assert.assertEquals(taskSpec.getOutputs().size(), deSerTaskSpec.getOutputs().size());
    Assert.assertNull(deSerTaskSpec.getGroupInputs());
    Assert.assertEquals(taskSpec.getInputs().get(0).getSourceVertexName(), deSerTaskSpec.getInputs().get(0).getSourceVertexName());
    Assert.assertEquals(taskSpec.getOutputs().get(0).getDestinationVertexName(), deSerTaskSpec.getOutputs().get(0).getDestinationVertexName());
    Assert.assertEquals(taskConf.get("foo"), deSerTaskSpec.getTaskConf().get("foo"));
}
Also used : DataOutput(java.io.DataOutput) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) ByteBuffer(java.nio.ByteBuffer) DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 8 with ProcessorDescriptor

use of org.apache.tez.dag.api.ProcessorDescriptor in project tez by apache.

the class TaskSpec method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    taskAttemptId = TezTaskAttemptID.readTezTaskAttemptID(in);
    dagName = StringInterner.weakIntern(in.readUTF());
    vertexName = StringInterner.weakIntern(in.readUTF());
    vertexParallelism = in.readInt();
    // TODO TEZ-305 convert this to PB
    processorDescriptor = new ProcessorDescriptor();
    processorDescriptor.readFields(in);
    int numInputSpecs = in.readInt();
    inputSpecList = new ArrayList<InputSpec>(numInputSpecs);
    for (int i = 0; i < numInputSpecs; i++) {
        InputSpec inputSpec = new InputSpec();
        inputSpec.readFields(in);
        inputSpecList.add(inputSpec);
    }
    int numOutputSpecs = in.readInt();
    outputSpecList = new ArrayList<OutputSpec>(numOutputSpecs);
    for (int i = 0; i < numOutputSpecs; i++) {
        OutputSpec outputSpec = new OutputSpec();
        outputSpec.readFields(in);
        outputSpecList.add(outputSpec);
    }
    boolean hasGroupInputs = in.readBoolean();
    if (hasGroupInputs) {
        int numGroups = in.readInt();
        groupInputSpecList = Lists.newArrayListWithCapacity(numGroups);
        for (int i = 0; i < numGroups; ++i) {
            GroupInputSpec group = new GroupInputSpec();
            group.readFields(in);
            groupInputSpecList.add(group);
        }
    }
    boolean hasVertexConf = in.readBoolean();
    if (hasVertexConf) {
        taskConf = new Configuration(false);
        taskConf.readFields(in);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor)

Example 9 with ProcessorDescriptor

use of org.apache.tez.dag.api.ProcessorDescriptor in project tez by apache.

the class TestFaultTolerance method testCartesianProduct.

/**
 * In unpartitioned cartesian product, failure fraction should be #unique failure/#consumer that
 * depends on the src task. Here we test a 2x2 cartesian product and let 4th destination task fail.
 * The failure fraction limit is configured to be 0.25. So the failure fraction should be 1/2,
 * not 1/4.
 * @throws Exception
 */
@Test
public void testCartesianProduct() throws Exception {
    Configuration dagConf = new Configuration();
    dagConf.setDouble(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_FRACTION, 0.25);
    DAG dag = DAG.create("dag");
    Configuration vertexConf = new Configuration();
    vertexConf.setInt(TestProcessor.getVertexConfName(TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_TASK_INDEX, "v3"), 3);
    vertexConf.setInt(TestProcessor.getVertexConfName(TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_VALUE, "v3"), 5);
    UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(vertexConf);
    ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create(TestProcessor.class.getName()).setUserPayload(vertexPayload);
    Vertex v1 = Vertex.create("v1", processorDescriptor, 2);
    Vertex v2 = Vertex.create("v2", processorDescriptor, 2);
    Vertex v3 = Vertex.create("v3", processorDescriptor);
    String[] sourceVertices = { "v1", "v2" };
    CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices));
    TezConfiguration tezConf = new TezConfiguration();
    tezConf.setInt(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_NUM_PARTITIONS, 1);
    tezConf.setBoolean(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_ENABLE_GROUPING, false);
    UserPayload cartesianProductPayload = cartesianProductConfig.toUserPayload(tezConf);
    v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()).setUserPayload(cartesianProductPayload));
    EdgeManagerPluginDescriptor edgeManagerPluginDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName()).setUserPayload(cartesianProductPayload);
    Configuration inputConf = new Configuration();
    inputConf.setBoolean(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v3"), true);
    inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v3"), 3);
    inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v3"), 0);
    inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v3"), 0);
    inputConf.setInt(TestInput.getVertexConfName(TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v3"), 0);
    UserPayload inputPayload = TezUtils.createUserPayloadFromConf(inputConf);
    EdgeProperty edgeProperty = EdgeProperty.create(edgeManagerPluginDescriptor, DataMovementType.CUSTOM, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, TestOutput.getOutputDesc(null), TestInput.getInputDesc(inputPayload));
    Edge e1 = Edge.create(v1, v3, edgeProperty);
    Edge e2 = Edge.create(v2, v3, edgeProperty);
    dag.addVertex(v1).addVertex(v2).addVertex(v3);
    dag.addEdge(e1).addEdge(e2);
    // run dag
    runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) UserPayload(org.apache.tez.dag.api.UserPayload) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) CartesianProductVertexManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager) SixLevelsFailingDAG(org.apache.tez.test.dag.SixLevelsFailingDAG) SimpleReverseVTestDAG(org.apache.tez.test.dag.SimpleReverseVTestDAG) TwoLevelsFailingDAG(org.apache.tez.test.dag.TwoLevelsFailingDAG) ThreeLevelsFailingDAG(org.apache.tez.test.dag.ThreeLevelsFailingDAG) DAG(org.apache.tez.dag.api.DAG) SimpleVTestDAG(org.apache.tez.test.dag.SimpleVTestDAG) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) CartesianProductConfig(org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig) Edge(org.apache.tez.dag.api.Edge) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 10 with ProcessorDescriptor

use of org.apache.tez.dag.api.ProcessorDescriptor in project hive by apache.

the class Converters method getTaskSpecfromProto.

public static TaskSpec getTaskSpecfromProto(SignableVertexSpec vectorProto, int fragmentNum, int attemptNum, TezTaskAttemptID attemptId) {
    TezTaskAttemptID taskAttemptID = attemptId != null ? attemptId : createTaskAttemptId(vectorProto.getQueryIdentifier(), vectorProto.getVertexIndex(), fragmentNum, attemptNum);
    ProcessorDescriptor processorDescriptor = null;
    if (vectorProto.hasProcessorDescriptor()) {
        processorDescriptor = convertProcessorDescriptorFromProto(vectorProto.getProcessorDescriptor());
    }
    List<InputSpec> inputSpecList = new ArrayList<InputSpec>(vectorProto.getInputSpecsCount());
    if (vectorProto.getInputSpecsCount() > 0) {
        for (IOSpecProto inputSpecProto : vectorProto.getInputSpecsList()) {
            inputSpecList.add(getInputSpecFromProto(inputSpecProto));
        }
    }
    List<OutputSpec> outputSpecList = new ArrayList<OutputSpec>(vectorProto.getOutputSpecsCount());
    if (vectorProto.getOutputSpecsCount() > 0) {
        for (IOSpecProto outputSpecProto : vectorProto.getOutputSpecsList()) {
            outputSpecList.add(getOutputSpecFromProto(outputSpecProto));
        }
    }
    List<GroupInputSpec> groupInputSpecs = new ArrayList<GroupInputSpec>(vectorProto.getGroupedInputSpecsCount());
    if (vectorProto.getGroupedInputSpecsCount() > 0) {
        for (GroupInputSpecProto groupInputSpecProto : vectorProto.getGroupedInputSpecsList()) {
            groupInputSpecs.add(getGroupInputSpecFromProto(groupInputSpecProto));
        }
    }
    TaskSpec taskSpec = new TaskSpec(taskAttemptID, vectorProto.getDagName(), vectorProto.getVertexName(), vectorProto.getVertexParallelism(), processorDescriptor, inputSpecList, outputSpecList, groupInputSpecs);
    return taskSpec;
}
Also used : IOSpecProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.IOSpecProto) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) ArrayList(java.util.ArrayList) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) GroupInputSpec(org.apache.tez.runtime.api.impl.GroupInputSpec) GroupInputSpecProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.GroupInputSpecProto) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) GroupInputSpec(org.apache.tez.runtime.api.impl.GroupInputSpec)

Aggregations

ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)22 TaskSpec (org.apache.tez.runtime.api.impl.TaskSpec)10 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)8 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)8 InputSpec (org.apache.tez.runtime.api.impl.InputSpec)8 OutputSpec (org.apache.tez.runtime.api.impl.OutputSpec)8 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)6 ByteBuffer (java.nio.ByteBuffer)5 HashMap (java.util.HashMap)5 TezTaskID (org.apache.tez.dag.records.TezTaskID)5 DefaultHadoopShim (org.apache.tez.hadoop.shim.DefaultHadoopShim)5 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 InputDescriptor (org.apache.tez.dag.api.InputDescriptor)4 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)4 UserPayload (org.apache.tez.dag.api.UserPayload)4 LogicalIOProcessorRuntimeTask (org.apache.tez.runtime.LogicalIOProcessorRuntimeTask)4 GroupInputSpec (org.apache.tez.runtime.api.impl.GroupInputSpec)4 TezSharedExecutor (org.apache.tez.common.TezSharedExecutor)3