Search in sources :

Example 31 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestLogicalIOProcessorRuntimeTask method createInputSpecList.

private List<InputSpec> createInputSpecList() {
    InputDescriptor inputDesc = InputDescriptor.create(TestInput.class.getName());
    InputSpec inputSpec = new InputSpec("inedge", inputDesc, 1);
    return Lists.newArrayList(inputSpec);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) InputSpec(org.apache.tez.runtime.api.impl.InputSpec)

Example 32 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class VertexImpl method setupVertex.

private VertexState setupVertex() {
    this.initTimeRequested = clock.getTime();
    // VertexManager for handling.
    if (dagVertexGroups != null && !dagVertexGroups.isEmpty()) {
        List<GroupInputSpec> groupSpecList = Lists.newLinkedList();
        for (VertexGroupInfo groupInfo : dagVertexGroups.values()) {
            if (groupInfo.edgeMergedInputs.containsKey(getName())) {
                InputDescriptor mergedInput = groupInfo.edgeMergedInputs.get(getName());
                groupSpecList.add(new GroupInputSpec(groupInfo.groupName, Lists.newLinkedList(groupInfo.groupMembers), mergedInput));
            }
        }
        if (!groupSpecList.isEmpty()) {
            groupInputSpecList = groupSpecList;
        }
    }
    // Check if any inputs need initializers
    if (rootInputDescriptors != null) {
        LOG.info("Root Inputs exist for Vertex: " + getName() + " : " + rootInputDescriptors);
        for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : rootInputDescriptors.values()) {
            if (input.getControllerDescriptor() != null && input.getControllerDescriptor().getClassName() != null) {
                if (inputsWithInitializers == null) {
                    inputsWithInitializers = Sets.newHashSet();
                }
                inputsWithInitializers.add(input.getName());
                LOG.info("Starting root input initializer for input: " + input.getName() + ", with class: [" + input.getControllerDescriptor().getClassName() + "]");
            }
        }
    }
    boolean hasBipartite = false;
    if (sourceVertices != null) {
        for (Edge edge : sourceVertices.values()) {
            if (edge.getEdgeProperty().getDataMovementType() == DataMovementType.SCATTER_GATHER) {
                hasBipartite = true;
                break;
            }
        }
    }
    if (hasBipartite && inputsWithInitializers != null) {
        LOG.error("A vertex with an Initial Input and a Shuffle Input are not supported at the moment");
        return finished(VertexState.FAILED);
    }
    numTasks = getVertexPlan().getTaskConfig().getNumTasks();
    if (!(numTasks == -1 || numTasks >= 0)) {
        addDiagnostic("Invalid task count for vertex" + ", numTasks=" + numTasks);
        trySetTerminationCause(VertexTerminationCause.INVALID_NUM_OF_TASKS);
        return VertexState.FAILED;
    }
    checkTaskLimits();
    // reset to -1 after the restore.
    try {
        assignVertexManager();
    } catch (TezException e1) {
        String msg = "Fail to create VertexManager, " + ExceptionUtils.getStackTrace(e1);
        LOG.error(msg);
        return finished(VertexState.FAILED, VertexTerminationCause.INIT_FAILURE, msg);
    }
    try {
        vertexManager.initialize();
        vmIsInitialized.set(true);
        if (!pendingVmEvents.isEmpty()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Processing: " + pendingVmEvents.size() + " pending VMEvents for Vertex: " + logIdentifier);
            }
            for (VertexManagerEvent vmEvent : pendingVmEvents) {
                vertexManager.onVertexManagerEventReceived(vmEvent);
            }
            pendingVmEvents.clear();
        }
    } catch (AMUserCodeException e) {
        String msg = "Exception in " + e.getSource() + ", vertex:" + logIdentifier;
        LOG.error(msg, e);
        finished(VertexState.FAILED, VertexTerminationCause.AM_USERCODE_FAILURE, msg + ", " + e.getMessage() + ", " + ExceptionUtils.getStackTrace(e.getCause()));
        return VertexState.FAILED;
    }
    return VertexState.INITED;
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) TezException(org.apache.tez.dag.api.TezException) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) VertexGroupInfo(org.apache.tez.dag.app.dag.impl.DAGImpl.VertexGroupInfo) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) GroupInputSpec(org.apache.tez.runtime.api.impl.GroupInputSpec)

Example 33 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class RootInputInitializerManager method runInputInitializers.

public void runInputInitializers(List<RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>> inputs) throws TezException {
    for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : inputs) {
        InputInitializerContext context = new TezRootInputInitializerContextImpl(input, vertex, appContext, this);
        InputInitializer initializer;
        try {
            TezUtilsInternal.setHadoopCallerContext(appContext.getHadoopShim(), vertex.getVertexId());
            initializer = createInitializer(input, context);
        } finally {
            appContext.getHadoopShim().clearHadoopCallerContext();
        }
        InitializerWrapper initializerWrapper = new InitializerWrapper(input, initializer, context, vertex, entityStateTracker, appContext);
        // Register pending vertex update registrations
        List<VertexUpdateRegistrationHolder> vertexUpdateRegistrations = pendingVertexRegistrations.removeAll(input.getName());
        if (vertexUpdateRegistrations != null) {
            for (VertexUpdateRegistrationHolder h : vertexUpdateRegistrations) {
                initializerWrapper.registerForVertexStateUpdates(h.vertexName, h.stateSet);
            }
        }
        initializerMap.put(input.getName(), initializerWrapper);
        ListenableFuture<List<Event>> future = executor.submit(new InputInitializerCallable(initializerWrapper, dagUgi, appContext));
        Futures.addCallback(future, createInputInitializerCallback(initializerWrapper));
    }
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) InputInitializer(org.apache.tez.runtime.api.InputInitializer) List(java.util.List) LinkedList(java.util.LinkedList) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) TezRootInputInitializerContextImpl(org.apache.tez.dag.app.dag.impl.TezRootInputInitializerContextImpl)

Example 34 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestExceptionPropagation method createDAG.

/**
 * create a DAG with 2 vertices (v1 --> v2), set payload on Input/Output/Processor/VertexManagerPlugin to
 * control where throw exception
 *
 * @param exLocation
 * @return
 * @throws IOException
 */
private DAG createDAG(ExceptionLocation exLocation) throws IOException {
    DAG dag = DAG.create("dag_" + exLocation.name());
    UserPayload payload = UserPayload.create(ByteBuffer.wrap(exLocation.name().getBytes()));
    Vertex v1 = Vertex.create("v1", ProcessorWithException.getProcDesc(payload), 1);
    InputDescriptor inputDesc = InputWithException.getInputDesc(payload);
    InputInitializerDescriptor iiDesc = InputInitializerWithException.getIIDesc(payload);
    v1.addDataSource("input", DataSourceDescriptor.create(inputDesc, iiDesc, null));
    v1.setVertexManagerPlugin(RootInputVertexManagerWithException.getVMDesc(exLocation));
    Vertex v2 = Vertex.create("v2", DoNothingProcessor.getProcDesc(), 1);
    v2.addDataSource("input2", DataSourceDescriptor.create(InputDescriptor.create(NoOpInput.class.getName()), InputInitializerWithException2.getIIDesc(payload), null));
    dag.addVertex(v1).addVertex(v2);
    if (exLocation.name().startsWith("EM_")) {
        dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(EdgeManagerPluginDescriptor.create(CustomEdgeManager.class.getName()).setUserPayload(payload), DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputWithException.getOutputDesc(payload), InputWithException.getInputDesc(payload))));
    } else {
        // set Customized VertexManager here, it can't been used for CustomEdge
        v2.setVertexManagerPlugin(InputReadyVertexManagerWithException.getVMDesc(exLocation));
        dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(DataMovementType.ONE_TO_ONE, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputWithException.getOutputDesc(payload), InputWithException.getInputDesc(payload))));
    }
    return dag;
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Vertex(org.apache.tez.dag.api.Vertex) UserPayload(org.apache.tez.dag.api.UserPayload) NoOpInput(org.apache.tez.test.dag.MultiAttemptDAG.NoOpInput) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) DAG(org.apache.tez.dag.api.DAG)

Example 35 with InputDescriptor

use of org.apache.tez.dag.api.InputDescriptor in project tez by apache.

the class TestWeightedScalingMemoryDistributor method testWeightedScalingNonConcurrent.

@Test(timeout = 5000)
public void testWeightedScalingNonConcurrent() throws TezException {
    Configuration conf = new Configuration(this.conf);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_INPUT_OUTPUT_CONCURRENT, false);
    conf.setBoolean(TezConfiguration.TEZ_TASK_SCALE_MEMORY_NON_CONCURRENT_INPUTS_ENABLED, true);
    conf.setDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION, 0.2);
    conf.setStrings(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS, WeightedScalingMemoryDistributor.generateWeightStrings(0, 0, 1, 2, 3, 1, 1));
    System.err.println(Joiner.on(",").join(conf.getStringCollection(TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS)));
    MemoryDistributor dist = new MemoryDistributor(2, 2, conf);
    dist.setJvmMemory(10000l);
    // First request - ScatterGatherShuffleInput
    MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
    InputContext e1InputContext1 = createTestInputContext();
    InputDescriptor e1InDesc1 = createTestInputDescriptor(OrderedGroupedKVInput.class);
    dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
    // Second request - BroadcastInput
    MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
    InputContext e2InputContext2 = createTestInputContext();
    InputDescriptor e2InDesc2 = createTestInputDescriptor(UnorderedKVInput.class);
    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
    // Third request - randomOutput (simulates MROutput)
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(10000, e3Callback, e3OutputContext1, e3OutDesc1);
    // Fourth request - OnFileSortedOutput
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    OutputContext e4OutputContext2 = createTestOutputContext();
    OutputDescriptor e4OutDesc2 = createTestOutputDescriptor(OrderedPartitionedKVOutput.class);
    dist.requestMemory(10000, e4Callback, e4OutputContext2, e4OutDesc2);
    // Fifth request - Processor
    MemoryUpdateCallbackForTest e5Callback = new MemoryUpdateCallbackForTest();
    ProcessorContext e5ProcContext = createTestProcessortContext();
    ProcessorDescriptor e5ProcDesc = createTestProcessorDescriptor();
    dist.requestMemory(10000, e5Callback, e5ProcContext, e5ProcDesc);
    dist.makeInitialAllocations();
    // Total available: 80% of 10K = 8000
    // 5 requests (weight) - 10K (3), 10K(1), 10K(1), 10K(2), 10K(1)
    // Overlap input and output memory
    assertEquals(5250, e1Callback.assigned);
    assertEquals(1750, e2Callback.assigned);
    assertEquals(2333, e3Callback.assigned);
    assertEquals(4666, e4Callback.assigned);
    assertEquals(1000, e5Callback.assigned);
}
Also used : InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) InputContext(org.apache.tez.runtime.api.InputContext) WeightedScalingMemoryDistributor(org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor) ProcessorDescriptor(org.apache.tez.dag.api.ProcessorDescriptor) OutputContext(org.apache.tez.runtime.api.OutputContext) ProcessorContext(org.apache.tez.runtime.api.ProcessorContext) Test(org.junit.Test)

Aggregations

InputDescriptor (org.apache.tez.dag.api.InputDescriptor)37 Test (org.junit.Test)18 InputInitializerDescriptor (org.apache.tez.dag.api.InputInitializerDescriptor)11 OutputDescriptor (org.apache.tez.dag.api.OutputDescriptor)10 InputContext (org.apache.tez.runtime.api.InputContext)10 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)9 Configuration (org.apache.hadoop.conf.Configuration)8 OutputContext (org.apache.tez.runtime.api.OutputContext)8 UserPayload (org.apache.tez.dag.api.UserPayload)7 LinkedList (java.util.LinkedList)5 WeightedScalingMemoryDistributor (org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor)5 DataSourceDescriptor (org.apache.tez.dag.api.DataSourceDescriptor)4 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)4 TezVertexID (org.apache.tez.dag.records.TezVertexID)4 InputSpec (org.apache.tez.runtime.api.impl.InputSpec)4 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)4 IOException (java.io.IOException)3 List (java.util.List)3 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)3 RootInputLeafOutput (org.apache.tez.dag.api.RootInputLeafOutput)3