Search in sources :

Example 6 with PartitionInfo

use of org.knime.core.node.streamable.PartitionInfo in project knime-core by knime.

the class StreamingTestNodeExecutionJob method performIntermediateIteration.

/**
 * @param remoteNodeContainers
 * @param inSpecsNoFlowPort
 * @param portInputs port inputs for each chunk and port
 * @param numChunks
 * @param mergeOpAvailable true if a mergeOperator is available
 * @return the newly created operation internals of each remote node
 * @throws Exception
 */
private StreamableOperatorInternals[] performIntermediateIteration(final NativeNodeContainer[] remoteNodeContainers, final ExecutionContext[] exec, final StreamableOperatorInternals internals, final PortObjectSpec[] inSpecsNoFlowPort, final PortInput[][] portInputs, final int numChunks, final boolean mergeOpAvailable) throws Exception {
    StreamableOperatorInternals[] newInternals = new StreamableOperatorInternals[numChunks];
    for (int i = 0; i < numChunks; i++) {
        // LOGGER.info("call remote: NodeModel#createInitialStreamableOperatorInternals");
        // StreamableOperatorInternals internals =
        // remoteNodeContainers[i].getNodeModel().createInitialStreamableOperatorInternals();
        // LOGGER.info("call remote: NodeModel#createStreamableOperator");
        StreamableOperator streamableOperator = remoteNodeContainers[i].getNodeModel().createStreamableOperator(new PartitionInfo(i, numChunks), inSpecsNoFlowPort);
        if (mergeOpAvailable) {
            LOGGER.info("call: StreamableOperator#loadInternals");
            streamableOperator.loadInternals(saveAndLoadInternals(internals));
        }
        LOGGER.info("call: StreamableOperator#runIntermediate");
        streamableOperator.runIntermediate(portInputs[i], exec[i]);
        if (mergeOpAvailable) {
            LOGGER.info("call: StreamableOperator#saveInternals");
            newInternals[i] = saveAndLoadInternals(streamableOperator.saveInternals());
        }
    }
    return newInternals;
}
Also used : StreamableOperator(org.knime.core.node.streamable.StreamableOperator) PartitionInfo(org.knime.core.node.streamable.PartitionInfo) StreamableOperatorInternals(org.knime.core.node.streamable.StreamableOperatorInternals)

Example 7 with PartitionInfo

use of org.knime.core.node.streamable.PartitionInfo in project knime-core by knime.

the class StreamingTestNodeExecutionJob method mainExecute.

/**
 * {@inheritDoc}
 */
@Override
protected NodeContainerExecutionStatus mainExecute() {
    NodeContainer nodeContainer = getNodeContainer();
    if (!(nodeContainer instanceof NativeNodeContainer)) {
        String message = "Streaming and distributed TEST execution only available for native nodes (i.e. no meta- or subnodes)";
        nodeContainer.setNodeMessage(new NodeMessage(Type.ERROR, message));
        LOGGER.error(message);
        return NodeContainerExecutionStatus.FAILURE;
    }
    // TODO should actually not be used for execution itself, but is currently!
    NativeNodeContainer localNodeContainer = (NativeNodeContainer) nodeContainer;
    if (localNodeContainer.getNodeModel() instanceof LoopStartNode || localNodeContainer.getNodeModel() instanceof LoopEndNode) {
        String message = "Streaming and distributed TEST execution doesn't work for Loop Start and End nodes.";
        nodeContainer.setNodeMessage(new NodeMessage(Type.ERROR, message));
        LOGGER.error(message);
        return NodeContainerExecutionStatus.FAILURE;
    }
    localNodeContainer.getNodeModel().addWarningListener(w -> {
        if (w != null) {
            m_warningMessages.add(w);
        }
    });
    // get the input object specs
    // includes the flow
    PortObject[] inPortObjects = getPortObjects();
    // variable port object!
    PortObjectSpec[] inPortObjectSpecs = new PortObjectSpec[inPortObjects.length];
    for (int i = 1; i < inPortObjectSpecs.length; i++) {
        // check if it's not an optional in-port
        if (inPortObjects[i] != null) {
            inPortObjectSpecs[i] = inPortObjects[i].getSpec();
        }
    }
    // get input port roles
    LOGGER.info("call local: NodeModel#getInputPortRoles");
    InputPortRole[] inputPortRoles = localNodeContainer.getNodeModel().getInputPortRoles();
    // get flow variables for all non-streamable ports
    // TODO: why only for non-streamable ports?
    // WorkflowManager wfm = localNodeContainer.getParent();
    // ArrayList<FlowObjectStack> flowObjectStacks = new
    // ArrayList<FlowObjectStack>(inPortObjects.length);
    // for (int i = 0; i < inPortObjects.length; i++) {
    // ConnectionContainer con =
    // wfm.getIncomingConnectionFor(localNodeContainer.getID(), i);
    // if ((con != null && i == 0) || (con != null && inputPortRoles[i -
    // 1].isStreamable())) {
    // flowObjectStacks.add(((SingleNodeContainer)wfm.getNodeContainer(con.getSource())).getFlowObjectStack());
    // }
    // }
    // check for distributable ports
    boolean isDistributable = false;
    for (int i = 0; i < inputPortRoles.length; i++) {
        if (inputPortRoles[i].isDistributable()) {
            isDistributable = true;
        }
    }
    /* ---- create node copies and configure ----*/
    // adjust the number of chunks if one of the distributable input table contains less rows than chunks
    int numChunks = isDistributable ? m_numChunks : 1;
    for (int i = 1; i < inPortObjects.length; i++) {
        // without the flow variable port
        if (inputPortRoles[i - 1].isDistributable()) {
            int rowCount = (int) ((BufferedDataTable) inPortObjects[i]).size();
            if (rowCount < numChunks) {
                numChunks = Math.max(1, rowCount);
            }
        }
    }
    // create the 'remote' node containers used for the execution itself
    NativeNodeContainer[] remoteNodeContainers = createNodeCopies(localNodeContainer, numChunks);
    // exactly one execution context per 'remote' node
    ExecutionContext[] remoteExec = createExecutionContexts(remoteNodeContainers);
    // execution context for the original node
    // - mainly for the creation of the input and output tables (to be fed into the 'remote' node copies)
    // - created tables are tracked in m_tableChunksToBeDisposed to be disposed at the end
    // - should actually not be used for the actual execution but is currently! (TODO)
    ExecutionContext localExec = remoteExec[0];
    // configure the node copies
    for (int i = 0; i < remoteNodeContainers.length; i++) {
        try (WorkflowLock lock = localNodeContainer.getParent().lock()) {
            // wfm.createAndSetFlowObjectStackFor(localNodeContainer,
            // flowObjectStacks.toArray(new
            // FlowObjectStack[flowObjectStacks.size()]));
            LOGGER.info("call remote: NodeModel#configure");
            boolean isConfigureOK = remoteNodeContainers[i].callNodeConfigure(inPortObjectSpecs, true);
            if (!isConfigureOK) {
                String message = "Configuration failed";
                nodeContainer.setNodeMessage(new NodeMessage(Type.ERROR, message));
                LOGGER.error(message);
                return NodeContainerExecutionStatus.FAILURE;
            }
        }
    }
    // Otherwise it doesn't make sense.
    if (checkForOverriddenMethod(localNodeContainer, "createInitialStreamableOperatorInternals") && !checkForOverriddenMethod(localNodeContainer, "iterate", StreamableOperatorInternals.class)) {
        m_warningMessages.add("Implementation warning: Overriding the 'createInitialStreamableOperatorInternals'-method without overriding the 'iterate'-method doesn't make sense.");
    }
    // create initial streamable operator internals for the first call of the iterate-method
    LOGGER.info("call local: NodeModel#createInitialStreamableOperatorInternals");
    StreamableOperatorInternals operatorInternals = localNodeContainer.getNodeModel().createInitialStreamableOperatorInternals();
    LOGGER.info("call local: NodeModel#createMergeOperator");
    // can be null
    MergeOperator localMergeOperator = localNodeContainer.getNodeModel().createMergeOperator();
    StreamableOperatorInternals[] newInternals = new StreamableOperatorInternals[numChunks];
    final PortObjectSpec[] inSpecsNoFlowPort = ArrayUtils.remove(inPortObjectSpecs, 0);
    LOGGER.info("call local: NodeModel#iterate");
    // Port types for determining whether a port must be copied or not in createPortInputs(...)
    PortType[] portTypes = new PortType[inPortObjects.length];
    // Skipping the variable port
    for (int i = 1; i < inPortObjects.length; i++) {
        portTypes[i - 1] = localNodeContainer.getInPort(i).getPortType();
    }
    try {
        // create port inputs for the streamable execution
        PortInput[][] portInputs = createPortInputs(inputPortRoles, inPortObjects, portTypes, numChunks, localExec);
        while (localNodeContainer.getNodeModel().iterate(operatorInternals)) {
            newInternals = performIntermediateIteration(remoteNodeContainers, remoteExec, operatorInternals, inSpecsNoFlowPort, portInputs, numChunks, localMergeOperator != null);
            if (localMergeOperator != null) {
                LOGGER.info("call local: MergeOperator#mergeIntermediate");
                operatorInternals = localMergeOperator.mergeIntermediate(newInternals);
            }
            // re-create port inputs since they were already iterated above
            portInputs = createPortInputs(inputPortRoles, inPortObjects, portTypes, numChunks, localExec);
        }
        // create the out specs (after all intermediate iterations have been
        // performed!)
        LOGGER.info("call local: NodeModel#computeFinalOutputSpecs");
        PortObjectSpec[] outSpecsNoFlowPort = null;
        outSpecsNoFlowPort = localNodeContainer.getNodeModel().computeFinalOutputSpecs(operatorInternals, inSpecsNoFlowPort);
        /* ---- take care about the output ---- */
        LOGGER.info("call local: NodeModel#getOutputPortRoles");
        OutputPortRole[] outputPortRoles = localNodeContainer.getNodeModel().getOutputPortRoles();
        // TODO: one single output table (for distributed ports) for all distributed nodes ... should be ok?
        // create the portOutputs for the StreamableOperator#runFinal-method
        // -> if node is run distributed, only distributed ports have to be set (i.e. RowOutputs), otherwise all
        PortOutput[] portOutputs = createPortOutputs(localNodeContainer.getNode(), outputPortRoles, outSpecsNoFlowPort, isDistributable, true, localExec);
        for (int i = 0; i < numChunks; i++) {
            LOGGER.info("call remote: NodeModel#createStreamableOperator");
            StreamableOperator streamableOperator = null;
            streamableOperator = remoteNodeContainers[i].getNodeModel().createStreamableOperator(new PartitionInfo(i, numChunks), inSpecsNoFlowPort);
            // simulates transfer of the internals from the local node to the remote ones
            operatorInternals = saveAndLoadInternals(operatorInternals);
            if (localMergeOperator != null) {
                LOGGER.info("call: StreamableOperator#loadInternals");
                streamableOperator.loadInternals(operatorInternals);
            }
            LOGGER.info("call: StreamableOperator#runFinal");
            try {
                PortOutput[] tmpPortOutputs = portOutputs.clone();
                streamableOperator.runFinal(portInputs[i], portOutputs, remoteExec[i]);
                // make sure that the portOutputs-object hasn't been manipulated directly (only it's containing objects)
                if (IntStream.range(0, portOutputs.length).anyMatch(j -> {
                    return tmpPortOutputs[j] != portOutputs[j];
                })) {
                    throw new IllegalStateException("Output array must not be manipulated.");
                }
            } catch (ClassCastException e) {
                throw new ClassCastException(e.getMessage() + ". Likely reason: port-role is not set as streamable -> overwrite get[Input|Ouptut]PortRoles()-methods in NodeModel.");
            }
            checkClosedPortOutputs(portOutputs);
            if (localMergeOperator != null) {
                LOGGER.info("call: StreamableOperator#saveInternals");
                newInternals[i] = saveAndLoadInternals(streamableOperator.saveInternals());
            }
        }
        if (localMergeOperator != null) {
            LOGGER.info("call: MergeOperator#mergeFinals");
            operatorInternals = localMergeOperator.mergeFinal(newInternals);
        } else if (numChunks == 1) {
            operatorInternals = newInternals[0];
        }
        if (localMergeOperator != null) {
            LOGGER.info("call local: NodeModel#finishStreamableExecution");
            // create the port outputs for the NodeModel#finishStreamableExecution-method -> only non-distributed ports have to be provided here
            PortOutput[] nonDistrPortOutputs;
            if (isDistributable) {
                nonDistrPortOutputs = createPortOutputs(localNodeContainer.getNode(), outputPortRoles, outSpecsNoFlowPort, isDistributable, false, localExec);
            } else {
                // if the node is not distributable we assume that all port-outputs have already been set in the runFinal-Method
                // and don't pass any port outputs here -> the finishStreamableExecution method is than only be used
                // to set warning messages etc.
                nonDistrPortOutputs = new PortOutput[outputPortRoles.length];
            }
            PortOutput[] tmpPortOutputs = nonDistrPortOutputs.clone();
            localNodeContainer.getNodeModel().finishStreamableExecution(operatorInternals, localExec, nonDistrPortOutputs);
            // make sure that the pArrays.equals(a, a2)ortOutputs-object hasn't been manipulated directly, only it's containing objects
            if (IntStream.range(0, portOutputs.length).anyMatch(j -> {
                return tmpPortOutputs[j] != nonDistrPortOutputs[j];
            })) {
                throw new IllegalStateException("Output array must not be manipulated.");
            }
            // merge the portOutputs and the nonDistrPortOutputs
            for (int i = 0; i < nonDistrPortOutputs.length; i++) {
                if (nonDistrPortOutputs[i] != null) {
                    portOutputs[i] = nonDistrPortOutputs[i];
                }
            }
        } else {
            // check whether the current node model overrides the #finishStreamableExecution-method
            if (checkForOverriddenMethod(localNodeContainer, "finishStreamableExecution", StreamableOperatorInternals.class, ExecutionContext.class, PortOutput[].class)) {
                // method has been overridden -> createMergeOperator-method actually needs to be implemented as well!
                throw new IllegalStateException("The 'NodeModel#finishStreamExecution'-method is overridden but no merge operator provided. Please override the 'NodeModel#createMergeOperator'-method as well.");
            }
        }
        PortObject[] outPortObjects = new PortObject[localNodeContainer.getNrOutPorts()];
        PortObjectSpec[] outPortObjectSpecs = new PortObjectSpec[localNodeContainer.getNrOutPorts()];
        // set variable out port
        outPortObjects[0] = FlowVariablePortObject.INSTANCE;
        // set variable out port
        outPortObjectSpecs[0] = FlowVariablePortObjectSpec.INSTANCE;
        for (int i = 1; i < outPortObjects.length; i++) {
            // retrieve the out port objects
            if (portOutputs[i - 1] instanceof BufferedDataContainerRowOutput) {
                BufferedDataTable table = ((BufferedDataContainerRowOutput) portOutputs[i - 1]).getDataTable();
                outPortObjects[i] = table;
                // check if table is empty and set appropriate warning message
                if (table.size() == 0) {
                    m_warningMessages.add("Node created an empty data table.");
                }
            } else {
                outPortObjects[i] = ((PortObjectOutput) portOutputs[i - 1]).getPortObject();
            }
            // retrieve the out port object specs
            if (outSpecsNoFlowPort != null && outSpecsNoFlowPort[i - 1] != null) {
                // get out port specs as return by the configure-method (happen to be null in some cases, i.e. the Transpose-node)
                outPortObjectSpecs[i] = outSpecsNoFlowPort[i - 1];
            } else if (outPortObjects[i] != null) {
                // port objects can be null (mainly in loop iterations)
                // get outport specs as given by the result port objects
                outPortObjectSpecs[i] = outPortObjects[i].getSpec();
            }
        }
        NativeNodeContainerExecutionResult execResult = localNodeContainer.createExecutionResult(localExec);
        NodeExecutionResult nodeExecResult = execResult.getNodeExecutionResult();
        nodeExecResult.setInternalHeldPortObjects(null);
        nodeExecResult.setNodeInternDir(null);
        nodeExecResult.setPortObjects(outPortObjects);
        nodeExecResult.setPortObjectSpecs(outPortObjectSpecs);
        WorkflowPersistor.LoadResult loadResult = new WorkflowPersistor.LoadResult("streaming test exec result");
        execResult.setSuccess(true);
        // TODO: since some port objects are null if in an iteration of a loop end node, the execution result cannot be loaded every time
        // possible workaround: check for all port objects to be non-null and only load execution result if that's the case
        // if (Arrays.stream(outPortObjects).noneMatch(p -> p == null)) {
        localNodeContainer.loadExecutionResult(execResult, localExec, loadResult);
        // }
        if (!m_warningMessages.isEmpty()) {
            String joinedMessages = m_warningMessages.stream().collect(Collectors.joining("\n"));
            NodeMessage nm = new NodeMessage(Type.WARNING, joinedMessages);
            localNodeContainer.setNodeMessage(nm);
            execResult.setMessage(nm);
        }
        return execResult;
    } catch (Exception e) {
        // copied from Node.java
        boolean isCanceled = e instanceof CanceledExecutionException;
        isCanceled = isCanceled || e instanceof InterruptedException;
        // TODO this can all be shortened to exec.isCanceled()?
        // isCanceled = isCanceled || localExec.isCanceled(); //not visible
        // writing to a buffer is done asynchronously -- if this thread
        // is interrupted while waiting for the IO thread to flush we take
        // it as a graceful exit
        isCanceled = isCanceled || (e instanceof DataContainerException && e.getCause() instanceof InterruptedException);
        if (isCanceled) {
            localNodeContainer.setNodeMessage(NodeMessage.newWarning("Execution canceled"));
            return NodeContainerExecutionStatus.FAILURE;
        }
        localNodeContainer.getNode().createErrorMessageAndNotify("Execute failed: " + e.getMessage(), e);
        return NodeContainerExecutionStatus.FAILURE;
    } finally {
        // remove virtual nodes from workflow
        removeNodeCopies(remoteNodeContainers);
        // other things to be done in post execution
        postExecution(remoteExec, remoteNodeContainers);
        // clear/dispose all newly created table chunks if there are any (created via creatTableChunks)
        m_tableChunksToBeDisposed.forEach(c -> c.dispose());
        m_tableChunksToBeDisposed.clear();
    }
}
Also used : StreamableOperator(org.knime.core.node.streamable.StreamableOperator) WorkflowPersistor(org.knime.core.node.workflow.WorkflowPersistor) NodeContainer(org.knime.core.node.workflow.NodeContainer) NativeNodeContainer(org.knime.core.node.workflow.NativeNodeContainer) StreamableOperatorInternals(org.knime.core.node.streamable.StreamableOperatorInternals) LoopEndNode(org.knime.core.node.workflow.LoopEndNode) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) FlowVariablePortObjectSpec(org.knime.core.node.port.flowvariable.FlowVariablePortObjectSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) NativeNodeContainerExecutionResult(org.knime.core.node.workflow.execresult.NativeNodeContainerExecutionResult) PartitionInfo(org.knime.core.node.streamable.PartitionInfo) LoopStartNode(org.knime.core.node.workflow.LoopStartNode) PortObject(org.knime.core.node.port.PortObject) FlowVariablePortObject(org.knime.core.node.port.flowvariable.FlowVariablePortObject) WorkflowLock(org.knime.core.node.workflow.WorkflowLock) NodeExecutionResult(org.knime.core.node.workflow.execresult.NodeExecutionResult) DataContainerException(org.knime.core.data.container.DataContainerException) PortOutput(org.knime.core.node.streamable.PortOutput) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DataContainerException(org.knime.core.data.container.DataContainerException) OutputPortRole(org.knime.core.node.streamable.OutputPortRole) ExecutionContext(org.knime.core.node.ExecutionContext) InputPortRole(org.knime.core.node.streamable.InputPortRole) NodeMessage(org.knime.core.node.workflow.NodeMessage) MergeOperator(org.knime.core.node.streamable.MergeOperator) NativeNodeContainer(org.knime.core.node.workflow.NativeNodeContainer) PortType(org.knime.core.node.port.PortType)

Aggregations

PartitionInfo (org.knime.core.node.streamable.PartitionInfo)7 StreamableOperator (org.knime.core.node.streamable.StreamableOperator)7 BufferedDataTable (org.knime.core.node.BufferedDataTable)6 PortOutput (org.knime.core.node.streamable.PortOutput)6 IOException (java.io.IOException)5 DataRow (org.knime.core.data.DataRow)5 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)5 ExecutionContext (org.knime.core.node.ExecutionContext)5 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)5 InputPortRole (org.knime.core.node.streamable.InputPortRole)5 OutputPortRole (org.knime.core.node.streamable.OutputPortRole)5 PortInput (org.knime.core.node.streamable.PortInput)5 File (java.io.File)4 Arrays (java.util.Arrays)4 ReplacedColumnsDataRow (org.knime.base.data.replace.ReplacedColumnsDataRow)4 DataCell (org.knime.core.data.DataCell)4 DataColumnSpec (org.knime.core.data.DataColumnSpec)4 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)4 DataTableSpec (org.knime.core.data.DataTableSpec)4 AppendedColumnRow (org.knime.core.data.append.AppendedColumnRow)4