Search in sources :

Example 1 with InputSpecUpdate

use of org.apache.tez.runtime.api.InputSpecUpdate in project tez by apache.

the class VertexImpl method setParallelismWrapper.

private void setParallelismWrapper(int parallelism, VertexLocationHint vertexLocationHint, Map<String, EdgeProperty> sourceEdgeProperties, Map<String, InputSpecUpdate> rootInputSpecUpdates, boolean fromVertexManager) throws AMUserCodeException {
    Preconditions.checkArgument(parallelism >= 0, "Parallelism must be >=0. Value: " + parallelism + " for vertex: " + logIdentifier);
    writeLock.lock();
    this.setParallelismCalledFlag = true;
    try {
        // disallow changing things after a vertex has started
        if (!tasksNotYetScheduled) {
            String msg = "setParallelism cannot be called after scheduling tasks. Vertex: " + getLogIdentifier();
            LOG.info(msg);
            throw new TezUncheckedException(msg);
        }
        if (fromVertexManager && canInitVertex()) {
            // vertex is fully defined. setParallelism has been called. VertexManager should have
            // informed us about this. Otherwise we would have notified listeners that we are fully
            // defined before we are actually fully defined
            Preconditions.checkState(vertexToBeReconfiguredByManager, "Vertex is fully configured but still" + " the reconfiguration API has been called. VertexManager must notify the framework using " + " context.vertexReconfigurationPlanned() before re-configuring the vertex." + " vertexId=" + logIdentifier);
        }
        // Input initializer/Vertex Manager/1-1 split expected to set parallelism.
        if (numTasks == -1) {
            if (getState() != VertexState.INITIALIZING) {
                throw new TezUncheckedException("Vertex state is not Initializing. Value: " + getState() + " for vertex: " + logIdentifier);
            }
            if (sourceEdgeProperties != null) {
                for (Map.Entry<String, EdgeProperty> entry : sourceEdgeProperties.entrySet()) {
                    LOG.info("Replacing edge manager for source:" + entry.getKey() + " destination: " + getLogIdentifier());
                    Vertex sourceVertex = appContext.getCurrentDAG().getVertex(entry.getKey());
                    Edge edge = sourceVertices.get(sourceVertex);
                    try {
                        edge.setEdgeProperty(entry.getValue());
                    } catch (Exception e) {
                        throw new TezUncheckedException("Fail to update EdgeProperty for Edge," + "sourceVertex:" + edge.getSourceVertexName() + "destinationVertex:" + edge.getDestinationVertexName(), e);
                    }
                }
            }
            if (rootInputSpecUpdates != null) {
                LOG.info("Got updated RootInputsSpecs: " + rootInputSpecUpdates.toString());
                // Sanity check for correct number of updates.
                for (Entry<String, InputSpecUpdate> rootInputSpecUpdateEntry : rootInputSpecUpdates.entrySet()) {
                    Preconditions.checkState(rootInputSpecUpdateEntry.getValue().isForAllWorkUnits() || (rootInputSpecUpdateEntry.getValue().getAllNumPhysicalInputs() != null && rootInputSpecUpdateEntry.getValue().getAllNumPhysicalInputs().size() == parallelism), "Not enough input spec updates for root input named " + rootInputSpecUpdateEntry.getKey());
                }
                this.rootInputSpecs.putAll(rootInputSpecUpdates);
            }
            int oldNumTasks = numTasks;
            this.numTasks = parallelism;
            stateChangeNotifier.stateChanged(vertexId, new VertexStateUpdateParallelismUpdated(vertexName, numTasks, oldNumTasks));
            this.createTasks();
            setVertexLocationHint(vertexLocationHint);
            LOG.info("Vertex " + getLogIdentifier() + " parallelism set to " + parallelism);
            if (canInitVertex()) {
                getEventHandler().handle(new VertexEvent(getVertexId(), VertexEventType.V_READY_TO_INIT));
            }
        } else {
            // This is an artificial restriction since there's no way of knowing whether a VertexManager
            // will attempt to update root input specs. When parallelism has not been initialized, the
            // Vertex will not be in started state so it's safe to update the specifications.
            // TODO TEZ-937 - add e mechanism to query vertex managers, or for VMs to indicate readines
            // for a vertex to start.
            Preconditions.checkState(rootInputSpecUpdates == null, "Root Input specs can only be updated when the vertex is configured with -1 tasks");
            int oldNumTasks = numTasks;
            // start buffering incoming events so that we can re-route existing events
            for (Edge edge : sourceVertices.values()) {
                edge.startEventBuffering();
            }
            if (parallelism == numTasks) {
                LOG.info("setParallelism same as current value: " + parallelism + " for vertex: " + logIdentifier);
                Preconditions.checkArgument(sourceEdgeProperties != null, "Source edge managers or RootInputSpecs must be set when not changing parallelism");
            } else {
                LOG.info("Resetting vertex location hints due to change in parallelism for vertex: " + logIdentifier);
                vertexLocationHint = null;
                if (parallelism > numTasks) {
                    addTasks((parallelism));
                } else if (parallelism < numTasks) {
                    removeTasks(parallelism);
                }
            }
            Preconditions.checkState(this.numTasks == parallelism, getLogIdentifier());
            // set new vertex location hints
            setVertexLocationHint(vertexLocationHint);
            LOG.info("Vertex " + getLogIdentifier() + " parallelism set to " + parallelism + " from " + oldNumTasks);
            // notify listeners
            stateChangeNotifier.stateChanged(vertexId, new VertexStateUpdateParallelismUpdated(vertexName, numTasks, oldNumTasks));
            assert tasks.size() == numTasks;
            // set new edge managers
            if (sourceEdgeProperties != null) {
                for (Map.Entry<String, EdgeProperty> entry : sourceEdgeProperties.entrySet()) {
                    LOG.info("Replacing edge manager for source:" + entry.getKey() + " destination: " + getLogIdentifier());
                    Vertex sourceVertex = appContext.getCurrentDAG().getVertex(entry.getKey());
                    Edge edge = sourceVertices.get(sourceVertex);
                    try {
                        edge.setEdgeProperty(entry.getValue());
                    } catch (Exception e) {
                        throw new TezUncheckedException(e);
                    }
                }
            }
            // stop buffering events
            for (Edge edge : sourceVertices.values()) {
                edge.stopEventBuffering();
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : VertexStateUpdateParallelismUpdated(org.apache.tez.dag.api.event.VertexStateUpdateParallelismUpdated) VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) InvalidStateTransitonException(org.apache.hadoop.yarn.state.InvalidStateTransitonException) LimitExceededException(org.apache.tez.common.counters.LimitExceededException) TezException(org.apache.tez.dag.api.TezException) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) VertexEvent(org.apache.tez.dag.app.dag.event.VertexEvent) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 2 with InputSpecUpdate

use of org.apache.tez.runtime.api.InputSpecUpdate in project tez by apache.

the class RootInputVertexManager method onRootVertexInitialized.

@Override
public void onRootVertexInitialized(String inputName, InputDescriptor inputDescriptor, List<Event> events) {
    List<InputDataInformationEvent> riEvents = Lists.newLinkedList();
    boolean dataInformationEventSeen = false;
    for (Event event : events) {
        if (event instanceof InputConfigureVertexTasksEvent) {
            // No tasks should have been started yet. Checked by initial state check.
            Preconditions.checkState(dataInformationEventSeen == false);
            Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) == -1, "Parallelism for the vertex should be set to -1 if the InputInitializer is setting parallelism" + ", VertexName: " + getContext().getVertexName());
            Preconditions.checkState(configuredInputName == null, "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName: " + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName);
            configuredInputName = inputName;
            InputConfigureVertexTasksEvent cEvent = (InputConfigureVertexTasksEvent) event;
            Map<String, InputSpecUpdate> rootInputSpecUpdate = new HashMap<String, InputSpecUpdate>();
            rootInputSpecUpdate.put(inputName, cEvent.getInputSpecUpdate() == null ? InputSpecUpdate.getDefaultSinglePhysicalInputSpecUpdate() : cEvent.getInputSpecUpdate());
            getContext().reconfigureVertex(rootInputSpecUpdate, cEvent.getLocationHint(), cEvent.getNumTasks());
        }
        if (event instanceof InputUpdatePayloadEvent) {
            // No tasks should have been started yet. Checked by initial state check.
            Preconditions.checkState(dataInformationEventSeen == false);
            inputDescriptor.setUserPayload(UserPayload.create(((InputUpdatePayloadEvent) event).getUserPayload()));
        } else if (event instanceof InputDataInformationEvent) {
            dataInformationEventSeen = true;
            // # Tasks should have been set by this point.
            Preconditions.checkState(getContext().getVertexNumTasks(getContext().getVertexName()) != 0);
            Preconditions.checkState(configuredInputName == null || configuredInputName.equals(inputName), "RootInputVertexManager cannot configure multiple inputs. Use a custom VertexManager" + ", VertexName:" + getContext().getVertexName() + ", ConfiguredInput: " + configuredInputName + ", CurrentInput: " + inputName);
            configuredInputName = inputName;
            InputDataInformationEvent rEvent = (InputDataInformationEvent) event;
            // 1:1 routing
            rEvent.setTargetIndex(rEvent.getSourceIndex());
            riEvents.add(rEvent);
        }
    }
    getContext().addRootInputEvents(inputName, riEvents);
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) InputUpdatePayloadEvent(org.apache.tez.runtime.api.events.InputUpdatePayloadEvent) Event(org.apache.tez.runtime.api.Event) VertexManagerEvent(org.apache.tez.runtime.api.events.VertexManagerEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent)

Example 3 with InputSpecUpdate

use of org.apache.tez.runtime.api.InputSpecUpdate in project tez by apache.

the class TestDAGRecovery method initMockDAGRecoveryDataForTask.

// ///////////////////////////// Task ////////////////////////////////////////////////////////////
private void initMockDAGRecoveryDataForTask() {
    List<TezEvent> inputGeneratedTezEvents = new ArrayList<TezEvent>();
    VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null);
    Map<String, InputSpecUpdate> rootInputSpecs = new HashMap<String, InputSpecUpdate>();
    VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, rootInputSpecs, true);
    VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime);
    VertexRecoveryData v1RecoveryData = new VertexRecoveryData(v1InitedEvent, v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap<TezTaskID, TaskRecoveryData>(), false);
    DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagId, dagInitedTime, "user", "dagName", null);
    DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagId, dagStartedTime, "user", "dagName");
    doReturn(v1RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id);
    doReturn(dagInitedEvent).when(dagRecoveryData).getDAGInitializedEvent();
    doReturn(dagStartedEvent).when(dagRecoveryData).getDAGStartedEvent();
}
Also used : VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) TezTaskID(org.apache.tez.dag.records.TezTaskID) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) VertexRecoveryData(org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData) TezEvent(org.apache.tez.runtime.api.impl.TezEvent)

Example 4 with InputSpecUpdate

use of org.apache.tez.runtime.api.InputSpecUpdate in project tez by apache.

the class TestHistoryEventsProtoConversion method testVertexReconfigureDoneEvent.

private void testVertexReconfigureDoneEvent() throws Exception {
    VertexLocationHint vertexLocationHint = VertexLocationHint.create(new ArrayList<TaskLocationHint>());
    InputSpecUpdate rootInputSpecUpdateBulk = InputSpecUpdate.createAllTaskInputSpecUpdate(2);
    InputSpecUpdate rootInputSpecUpdatePerTask = InputSpecUpdate.createPerTaskInputSpecUpdate(Lists.newArrayList(1, 2, 3));
    Map<String, InputSpecUpdate> rootInputSpecUpdates = new HashMap<String, InputSpecUpdate>();
    rootInputSpecUpdates.put("input1", rootInputSpecUpdateBulk);
    rootInputSpecUpdates.put("input2", rootInputSpecUpdatePerTask);
    Map<String, EdgeProperty> sourceEdgeManagers = new HashMap<String, EdgeProperty>();
    // add standard and custom edge
    sourceEdgeManagers.put("foo", EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out1"), InputDescriptor.create("in1")));
    sourceEdgeManagers.put("foo1", EdgeProperty.create(EdgeManagerPluginDescriptor.create("bar1").setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("payload").getBytes()), 100)), DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("Out1"), InputDescriptor.create("in1")));
    final long reconfigureDoneTime = 100;
    final int numTasks = 2;
    VertexConfigurationDoneEvent event = new VertexConfigurationDoneEvent(TezVertexID.getInstance(TezDAGID.getInstance(ApplicationId.newInstance(0, 1), 1), 111), reconfigureDoneTime, numTasks, vertexLocationHint, sourceEdgeManagers, rootInputSpecUpdates, true);
    Assert.assertEquals(numTasks, event.getNumTasks());
    Assert.assertEquals(reconfigureDoneTime, event.getReconfigureDoneTime());
    VertexConfigurationDoneEvent deserializedEvent = (VertexConfigurationDoneEvent) testProtoConversion(event);
    Assert.assertEquals(event.getVertexID(), deserializedEvent.getVertexID());
    Assert.assertEquals(event.getNumTasks(), deserializedEvent.getNumTasks());
    Assert.assertEquals(event.isSetParallelismCalled(), deserializedEvent.isSetParallelismCalled());
    // vertexLocationHint
    Assert.assertEquals(event.getVertexLocationHint(), deserializedEvent.getVertexLocationHint());
    // rootInputSpec
    Assert.assertEquals(event.getRootInputSpecUpdates().size(), deserializedEvent.getRootInputSpecUpdates().size());
    InputSpecUpdate deserializedBulk = deserializedEvent.getRootInputSpecUpdates().get("input1");
    InputSpecUpdate deserializedPerTask = deserializedEvent.getRootInputSpecUpdates().get("input2");
    Assert.assertEquals(rootInputSpecUpdateBulk.isForAllWorkUnits(), deserializedBulk.isForAllWorkUnits());
    Assert.assertEquals(rootInputSpecUpdateBulk.getAllNumPhysicalInputs(), deserializedBulk.getAllNumPhysicalInputs());
    Assert.assertEquals(rootInputSpecUpdatePerTask.isForAllWorkUnits(), deserializedPerTask.isForAllWorkUnits());
    Assert.assertEquals(rootInputSpecUpdatePerTask.getAllNumPhysicalInputs(), deserializedPerTask.getAllNumPhysicalInputs());
    // sourceEdgeManager
    Assert.assertEquals(event.getSourceEdgeProperties().size(), deserializedEvent.getSourceEdgeProperties().size());
    Assert.assertEquals(event.getSourceEdgeProperties().get("foo").getDataMovementType(), deserializedEvent.getSourceEdgeProperties().get("foo").getDataMovementType());
    Assert.assertNull(deserializedEvent.getSourceEdgeProperties().get("foo").getEdgeManagerDescriptor());
    Assert.assertEquals(event.getSourceEdgeProperties().get("foo1").getDataMovementType(), deserializedEvent.getSourceEdgeProperties().get("foo1").getDataMovementType());
    Assert.assertEquals(event.getSourceEdgeProperties().get("foo1").getEdgeManagerDescriptor().getUserPayload().getVersion(), deserializedEvent.getSourceEdgeProperties().get("foo1").getEdgeManagerDescriptor().getUserPayload().getVersion());
    Assert.assertArrayEquals(event.getSourceEdgeProperties().get("foo1").getEdgeManagerDescriptor().getUserPayload().deepCopyAsArray(), deserializedEvent.getSourceEdgeProperties().get("foo1").getEdgeManagerDescriptor().getUserPayload().deepCopyAsArray());
    logEvents(event, deserializedEvent);
}
Also used : TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) HashMap(java.util.HashMap) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) EdgeProperty(org.apache.tez.dag.api.EdgeProperty) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint)

Example 5 with InputSpecUpdate

use of org.apache.tez.runtime.api.InputSpecUpdate in project tez by apache.

the class TestDAGRecovery method initMockDAGRecoveryDataForTaskAttempt.

// ///////////////////////////// TaskAttempt Recovery /////////////////////////////////////////////////////
private void initMockDAGRecoveryDataForTaskAttempt() {
    TaskStartedEvent t1StartedEvent = new TaskStartedEvent(t1v1Id, "vertex1", 0L, t1StartedTime);
    TaskRecoveryData taskRecoveryData = new TaskRecoveryData(t1StartedEvent, null, null);
    Map<TezTaskID, TaskRecoveryData> taskRecoveryDataMap = new HashMap<TezTaskID, TaskRecoveryData>();
    taskRecoveryDataMap.put(t1v1Id, taskRecoveryData);
    List<TezEvent> inputGeneratedTezEvents = new ArrayList<TezEvent>();
    VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null);
    Map<String, InputSpecUpdate> rootInputSpecs = new HashMap<String, InputSpecUpdate>();
    VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, rootInputSpecs, true);
    VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime);
    VertexRecoveryData v1RecoveryData = new VertexRecoveryData(v1InitedEvent, v1ReconfigureDoneEvent, v1StartedEvent, null, taskRecoveryDataMap, false);
    DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagId, dagInitedTime, "user", "dagName", null);
    DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagId, dagStartedTime, "user", "dagName");
    doReturn(v1RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id);
    doReturn(dagInitedEvent).when(dagRecoveryData).getDAGInitializedEvent();
    doReturn(dagStartedEvent).when(dagRecoveryData).getDAGStartedEvent();
}
Also used : VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) InputSpecUpdate(org.apache.tez.runtime.api.InputSpecUpdate) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TezTaskID(org.apache.tez.dag.records.TezTaskID) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) VertexRecoveryData(org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData) TezEvent(org.apache.tez.runtime.api.impl.TezEvent)

Aggregations

InputSpecUpdate (org.apache.tez.runtime.api.InputSpecUpdate)6 HashMap (java.util.HashMap)5 EdgeProperty (org.apache.tez.dag.api.EdgeProperty)3 ArrayList (java.util.ArrayList)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 TaskLocationHint (org.apache.tez.dag.api.TaskLocationHint)2 VertexLocationHint (org.apache.tez.dag.api.VertexLocationHint)2 TaskRecoveryData (org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData)2 VertexRecoveryData (org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData)2 DAGInitializedEvent (org.apache.tez.dag.history.events.DAGInitializedEvent)2 DAGStartedEvent (org.apache.tez.dag.history.events.DAGStartedEvent)2 VertexConfigurationDoneEvent (org.apache.tez.dag.history.events.VertexConfigurationDoneEvent)2 VertexInitializedEvent (org.apache.tez.dag.history.events.VertexInitializedEvent)2 VertexStartedEvent (org.apache.tez.dag.history.events.VertexStartedEvent)2 TezTaskID (org.apache.tez.dag.records.TezTaskID)2 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)2 IOException (java.io.IOException)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 InvalidStateTransitonException (org.apache.hadoop.yarn.state.InvalidStateTransitonException)1