use of org.knime.core.node.streamable.PortOutput in project knime-core by knime.
the class StreamingTestNodeExecutionJob method createPortOutputs.
/**
* Creates the port output array depending on the given parameters. If the node is distributable (i.e. there is at
* least one input port that is distributable) AND the outputs are to be created for the
* StreamableOperator#runFinal-method, only the 'slots' that are distributed (according to the output role) are set,
* the others are <code>null</code>. If the node is distributable AND the outputs are to be created for the
* NodeModel#finishStreamableExecution-method, only the non-distributed ports are set.
*
* If the node is NOT distributable, all 'slots' of the port output-array are filled (either with RowOutputs, if its
* a data table or PortObjectOutputs otherwise), not matter distributed or not (in this case the
* 'createForRunFinalMethod' has no effect)
*
* @param node needed to determine the out port type
* @param outRoles the output-roles - distributed or not
* @param outSpecsNoFlowPort the out specs needed to create the RowOutput's, if null (i.e. if the
* NodeModel#configure-method returns null, e.g. Transpose-node), a PortObjectOutput is created instead
* @param isDistributable if the whole node can be run in distributed manner (i.e. there is at least one distributed
* in port)
* @param createForRunFinalMethod if the port outputs are to be created to be used as parameters in the
* StreamableOperator#runFinal method (only distributed outputs are set) or not (only non-distributed
* outputs are set, since assumed to be used in the NodeModel#finishStreamableExecution-method)
* @param exec the execution context to create the buffered data container
* @return the port outputs with some 'slots' possibly set to null
*/
private PortOutput[] createPortOutputs(final Node node, final OutputPortRole[] outRoles, final PortObjectSpec[] outSpecsNoFlowPort, final boolean isDistributable, final boolean createForRunFinalMethod, final ExecutionContext exec) {
// without flow variables port
PortOutput[] portOutputs = new PortOutput[node.getNrOutPorts() - 1];
for (int i = 0; i < portOutputs.length; i++) {
// fill all port outputs if NOT distributed OR fill either the distributed outports or non-distributed outports ONLY (depending on the createForRunFinal-flag)
if (!isDistributable || (isDistributable && outRoles[i].isDistributable() && createForRunFinalMethod) || (isDistributable && !outRoles[i].isDistributable() && !createForRunFinalMethod)) {
if ((// (i+1)-> skip flow variable port
node.getOutputType(i + 1).equals(BufferedDataTable.TYPE) || node.getOutputType(i + 1).equals(BufferedDataTable.TYPE_OPTIONAL))) {
// output is a BufferedDataTable -> create a row output that wraps a BufferedDataTable
BufferedDataContainerRowOutput rowOutput;
if (outSpecsNoFlowPort == null || outSpecsNoFlowPort[i] == null) {
// outSpecsNoFlowPort might be null if the node models' configure-method return null (e.g. Transpose-node)
// use row output the only is allowed to be filled by 'setFully'
rowOutput = new BufferedDataContainerRowOutput();
} else {
// don't dispose (i.e. add it to m_tableChunksToBeDisposed) the output data containers
// since they are needed by successors and are persisted
rowOutput = new BufferedDataContainerRowOutput(exec.createDataContainer((DataTableSpec) outSpecsNoFlowPort[i], true));
}
portOutputs[i] = rowOutput;
} else {
// output is not a data table (or configure return null)
// only set ALL port outputs if node is NOT distributed!! (see javadoc of StreamableOperator#runFinal(...))
// if the node is distributed, only the RowOutput are set
portOutputs[i] = new PortObjectOutput();
}
}
// else - port at position i remains null
}
return portOutputs;
}
use of org.knime.core.node.streamable.PortOutput in project knime-core by knime.
the class StreamingTestNodeExecutionJob method mainExecute.
/**
* {@inheritDoc}
*/
@Override
protected NodeContainerExecutionStatus mainExecute() {
NodeContainer nodeContainer = getNodeContainer();
if (!(nodeContainer instanceof NativeNodeContainer)) {
String message = "Streaming and distributed TEST execution only available for native nodes (i.e. no meta- or subnodes)";
nodeContainer.setNodeMessage(new NodeMessage(Type.ERROR, message));
LOGGER.error(message);
return NodeContainerExecutionStatus.FAILURE;
}
// TODO should actually not be used for execution itself, but is currently!
NativeNodeContainer localNodeContainer = (NativeNodeContainer) nodeContainer;
if (localNodeContainer.getNodeModel() instanceof LoopStartNode || localNodeContainer.getNodeModel() instanceof LoopEndNode) {
String message = "Streaming and distributed TEST execution doesn't work for Loop Start and End nodes.";
nodeContainer.setNodeMessage(new NodeMessage(Type.ERROR, message));
LOGGER.error(message);
return NodeContainerExecutionStatus.FAILURE;
}
localNodeContainer.getNodeModel().addWarningListener(w -> {
if (w != null) {
m_warningMessages.add(w);
}
});
// get the input object specs
// includes the flow
PortObject[] inPortObjects = getPortObjects();
// variable port object!
PortObjectSpec[] inPortObjectSpecs = new PortObjectSpec[inPortObjects.length];
for (int i = 1; i < inPortObjectSpecs.length; i++) {
// check if it's not an optional in-port
if (inPortObjects[i] != null) {
inPortObjectSpecs[i] = inPortObjects[i].getSpec();
}
}
// get input port roles
LOGGER.info("call local: NodeModel#getInputPortRoles");
InputPortRole[] inputPortRoles = localNodeContainer.getNodeModel().getInputPortRoles();
// get flow variables for all non-streamable ports
// TODO: why only for non-streamable ports?
// WorkflowManager wfm = localNodeContainer.getParent();
// ArrayList<FlowObjectStack> flowObjectStacks = new
// ArrayList<FlowObjectStack>(inPortObjects.length);
// for (int i = 0; i < inPortObjects.length; i++) {
// ConnectionContainer con =
// wfm.getIncomingConnectionFor(localNodeContainer.getID(), i);
// if ((con != null && i == 0) || (con != null && inputPortRoles[i -
// 1].isStreamable())) {
// flowObjectStacks.add(((SingleNodeContainer)wfm.getNodeContainer(con.getSource())).getFlowObjectStack());
// }
// }
// check for distributable ports
boolean isDistributable = false;
for (int i = 0; i < inputPortRoles.length; i++) {
if (inputPortRoles[i].isDistributable()) {
isDistributable = true;
}
}
/* ---- create node copies and configure ----*/
// adjust the number of chunks if one of the distributable input table contains less rows than chunks
int numChunks = isDistributable ? m_numChunks : 1;
for (int i = 1; i < inPortObjects.length; i++) {
// without the flow variable port
if (inputPortRoles[i - 1].isDistributable()) {
int rowCount = (int) ((BufferedDataTable) inPortObjects[i]).size();
if (rowCount < numChunks) {
numChunks = Math.max(1, rowCount);
}
}
}
// create the 'remote' node containers used for the execution itself
NativeNodeContainer[] remoteNodeContainers = createNodeCopies(localNodeContainer, numChunks);
// exactly one execution context per 'remote' node
ExecutionContext[] remoteExec = createExecutionContexts(remoteNodeContainers);
// execution context for the original node
// - mainly for the creation of the input and output tables (to be fed into the 'remote' node copies)
// - created tables are tracked in m_tableChunksToBeDisposed to be disposed at the end
// - should actually not be used for the actual execution but is currently! (TODO)
ExecutionContext localExec = remoteExec[0];
// configure the node copies
for (int i = 0; i < remoteNodeContainers.length; i++) {
try (WorkflowLock lock = localNodeContainer.getParent().lock()) {
// wfm.createAndSetFlowObjectStackFor(localNodeContainer,
// flowObjectStacks.toArray(new
// FlowObjectStack[flowObjectStacks.size()]));
LOGGER.info("call remote: NodeModel#configure");
boolean isConfigureOK = remoteNodeContainers[i].callNodeConfigure(inPortObjectSpecs, true);
if (!isConfigureOK) {
String message = "Configuration failed";
nodeContainer.setNodeMessage(new NodeMessage(Type.ERROR, message));
LOGGER.error(message);
return NodeContainerExecutionStatus.FAILURE;
}
}
}
// Otherwise it doesn't make sense.
if (checkForOverriddenMethod(localNodeContainer, "createInitialStreamableOperatorInternals") && !checkForOverriddenMethod(localNodeContainer, "iterate", StreamableOperatorInternals.class)) {
m_warningMessages.add("Implementation warning: Overriding the 'createInitialStreamableOperatorInternals'-method without overriding the 'iterate'-method doesn't make sense.");
}
// create initial streamable operator internals for the first call of the iterate-method
LOGGER.info("call local: NodeModel#createInitialStreamableOperatorInternals");
StreamableOperatorInternals operatorInternals = localNodeContainer.getNodeModel().createInitialStreamableOperatorInternals();
LOGGER.info("call local: NodeModel#createMergeOperator");
// can be null
MergeOperator localMergeOperator = localNodeContainer.getNodeModel().createMergeOperator();
StreamableOperatorInternals[] newInternals = new StreamableOperatorInternals[numChunks];
final PortObjectSpec[] inSpecsNoFlowPort = ArrayUtils.remove(inPortObjectSpecs, 0);
LOGGER.info("call local: NodeModel#iterate");
// Port types for determining whether a port must be copied or not in createPortInputs(...)
PortType[] portTypes = new PortType[inPortObjects.length];
// Skipping the variable port
for (int i = 1; i < inPortObjects.length; i++) {
portTypes[i - 1] = localNodeContainer.getInPort(i).getPortType();
}
try {
// create port inputs for the streamable execution
PortInput[][] portInputs = createPortInputs(inputPortRoles, inPortObjects, portTypes, numChunks, localExec);
while (localNodeContainer.getNodeModel().iterate(operatorInternals)) {
newInternals = performIntermediateIteration(remoteNodeContainers, remoteExec, operatorInternals, inSpecsNoFlowPort, portInputs, numChunks, localMergeOperator != null);
if (localMergeOperator != null) {
LOGGER.info("call local: MergeOperator#mergeIntermediate");
operatorInternals = localMergeOperator.mergeIntermediate(newInternals);
}
// re-create port inputs since they were already iterated above
portInputs = createPortInputs(inputPortRoles, inPortObjects, portTypes, numChunks, localExec);
}
// create the out specs (after all intermediate iterations have been
// performed!)
LOGGER.info("call local: NodeModel#computeFinalOutputSpecs");
PortObjectSpec[] outSpecsNoFlowPort = null;
outSpecsNoFlowPort = localNodeContainer.getNodeModel().computeFinalOutputSpecs(operatorInternals, inSpecsNoFlowPort);
/* ---- take care about the output ---- */
LOGGER.info("call local: NodeModel#getOutputPortRoles");
OutputPortRole[] outputPortRoles = localNodeContainer.getNodeModel().getOutputPortRoles();
// TODO: one single output table (for distributed ports) for all distributed nodes ... should be ok?
// create the portOutputs for the StreamableOperator#runFinal-method
// -> if node is run distributed, only distributed ports have to be set (i.e. RowOutputs), otherwise all
PortOutput[] portOutputs = createPortOutputs(localNodeContainer.getNode(), outputPortRoles, outSpecsNoFlowPort, isDistributable, true, localExec);
for (int i = 0; i < numChunks; i++) {
LOGGER.info("call remote: NodeModel#createStreamableOperator");
StreamableOperator streamableOperator = null;
streamableOperator = remoteNodeContainers[i].getNodeModel().createStreamableOperator(new PartitionInfo(i, numChunks), inSpecsNoFlowPort);
// simulates transfer of the internals from the local node to the remote ones
operatorInternals = saveAndLoadInternals(operatorInternals);
if (localMergeOperator != null) {
LOGGER.info("call: StreamableOperator#loadInternals");
streamableOperator.loadInternals(operatorInternals);
}
LOGGER.info("call: StreamableOperator#runFinal");
try {
PortOutput[] tmpPortOutputs = portOutputs.clone();
streamableOperator.runFinal(portInputs[i], portOutputs, remoteExec[i]);
// make sure that the portOutputs-object hasn't been manipulated directly (only it's containing objects)
if (IntStream.range(0, portOutputs.length).anyMatch(j -> {
return tmpPortOutputs[j] != portOutputs[j];
})) {
throw new IllegalStateException("Output array must not be manipulated.");
}
} catch (ClassCastException e) {
throw new ClassCastException(e.getMessage() + ". Likely reason: port-role is not set as streamable -> overwrite get[Input|Ouptut]PortRoles()-methods in NodeModel.");
}
checkClosedPortOutputs(portOutputs);
if (localMergeOperator != null) {
LOGGER.info("call: StreamableOperator#saveInternals");
newInternals[i] = saveAndLoadInternals(streamableOperator.saveInternals());
}
}
if (localMergeOperator != null) {
LOGGER.info("call: MergeOperator#mergeFinals");
operatorInternals = localMergeOperator.mergeFinal(newInternals);
} else if (numChunks == 1) {
operatorInternals = newInternals[0];
}
if (localMergeOperator != null) {
LOGGER.info("call local: NodeModel#finishStreamableExecution");
// create the port outputs for the NodeModel#finishStreamableExecution-method -> only non-distributed ports have to be provided here
PortOutput[] nonDistrPortOutputs;
if (isDistributable) {
nonDistrPortOutputs = createPortOutputs(localNodeContainer.getNode(), outputPortRoles, outSpecsNoFlowPort, isDistributable, false, localExec);
} else {
// if the node is not distributable we assume that all port-outputs have already been set in the runFinal-Method
// and don't pass any port outputs here -> the finishStreamableExecution method is than only be used
// to set warning messages etc.
nonDistrPortOutputs = new PortOutput[outputPortRoles.length];
}
PortOutput[] tmpPortOutputs = nonDistrPortOutputs.clone();
localNodeContainer.getNodeModel().finishStreamableExecution(operatorInternals, localExec, nonDistrPortOutputs);
// make sure that the pArrays.equals(a, a2)ortOutputs-object hasn't been manipulated directly, only it's containing objects
if (IntStream.range(0, portOutputs.length).anyMatch(j -> {
return tmpPortOutputs[j] != nonDistrPortOutputs[j];
})) {
throw new IllegalStateException("Output array must not be manipulated.");
}
// merge the portOutputs and the nonDistrPortOutputs
for (int i = 0; i < nonDistrPortOutputs.length; i++) {
if (nonDistrPortOutputs[i] != null) {
portOutputs[i] = nonDistrPortOutputs[i];
}
}
} else {
// check whether the current node model overrides the #finishStreamableExecution-method
if (checkForOverriddenMethod(localNodeContainer, "finishStreamableExecution", StreamableOperatorInternals.class, ExecutionContext.class, PortOutput[].class)) {
// method has been overridden -> createMergeOperator-method actually needs to be implemented as well!
throw new IllegalStateException("The 'NodeModel#finishStreamExecution'-method is overridden but no merge operator provided. Please override the 'NodeModel#createMergeOperator'-method as well.");
}
}
PortObject[] outPortObjects = new PortObject[localNodeContainer.getNrOutPorts()];
PortObjectSpec[] outPortObjectSpecs = new PortObjectSpec[localNodeContainer.getNrOutPorts()];
// set variable out port
outPortObjects[0] = FlowVariablePortObject.INSTANCE;
// set variable out port
outPortObjectSpecs[0] = FlowVariablePortObjectSpec.INSTANCE;
for (int i = 1; i < outPortObjects.length; i++) {
// retrieve the out port objects
if (portOutputs[i - 1] instanceof BufferedDataContainerRowOutput) {
BufferedDataTable table = ((BufferedDataContainerRowOutput) portOutputs[i - 1]).getDataTable();
outPortObjects[i] = table;
// check if table is empty and set appropriate warning message
if (table.size() == 0) {
m_warningMessages.add("Node created an empty data table.");
}
} else {
outPortObjects[i] = ((PortObjectOutput) portOutputs[i - 1]).getPortObject();
}
// retrieve the out port object specs
if (outSpecsNoFlowPort != null && outSpecsNoFlowPort[i - 1] != null) {
// get out port specs as return by the configure-method (happen to be null in some cases, i.e. the Transpose-node)
outPortObjectSpecs[i] = outSpecsNoFlowPort[i - 1];
} else if (outPortObjects[i] != null) {
// port objects can be null (mainly in loop iterations)
// get outport specs as given by the result port objects
outPortObjectSpecs[i] = outPortObjects[i].getSpec();
}
}
NativeNodeContainerExecutionResult execResult = localNodeContainer.createExecutionResult(localExec);
NodeExecutionResult nodeExecResult = execResult.getNodeExecutionResult();
nodeExecResult.setInternalHeldPortObjects(null);
nodeExecResult.setNodeInternDir(null);
nodeExecResult.setPortObjects(outPortObjects);
nodeExecResult.setPortObjectSpecs(outPortObjectSpecs);
WorkflowPersistor.LoadResult loadResult = new WorkflowPersistor.LoadResult("streaming test exec result");
execResult.setSuccess(true);
// TODO: since some port objects are null if in an iteration of a loop end node, the execution result cannot be loaded every time
// possible workaround: check for all port objects to be non-null and only load execution result if that's the case
// if (Arrays.stream(outPortObjects).noneMatch(p -> p == null)) {
localNodeContainer.loadExecutionResult(execResult, localExec, loadResult);
// }
if (!m_warningMessages.isEmpty()) {
String joinedMessages = m_warningMessages.stream().collect(Collectors.joining("\n"));
NodeMessage nm = new NodeMessage(Type.WARNING, joinedMessages);
localNodeContainer.setNodeMessage(nm);
execResult.setMessage(nm);
}
return execResult;
} catch (Exception e) {
// copied from Node.java
boolean isCanceled = e instanceof CanceledExecutionException;
isCanceled = isCanceled || e instanceof InterruptedException;
// TODO this can all be shortened to exec.isCanceled()?
// isCanceled = isCanceled || localExec.isCanceled(); //not visible
// writing to a buffer is done asynchronously -- if this thread
// is interrupted while waiting for the IO thread to flush we take
// it as a graceful exit
isCanceled = isCanceled || (e instanceof DataContainerException && e.getCause() instanceof InterruptedException);
if (isCanceled) {
localNodeContainer.setNodeMessage(NodeMessage.newWarning("Execution canceled"));
return NodeContainerExecutionStatus.FAILURE;
}
localNodeContainer.getNode().createErrorMessageAndNotify("Execute failed: " + e.getMessage(), e);
return NodeContainerExecutionStatus.FAILURE;
} finally {
// remove virtual nodes from workflow
removeNodeCopies(remoteNodeContainers);
// other things to be done in post execution
postExecution(remoteExec, remoteNodeContainers);
// clear/dispose all newly created table chunks if there are any (created via creatTableChunks)
m_tableChunksToBeDisposed.forEach(c -> c.dispose());
m_tableChunksToBeDisposed.clear();
}
}
Aggregations