use of org.knime.core.node.ExecutionContext in project knime-core by knime.
the class CategoryToNumberNodeModel method createStreamableOperator.
/**
* {@inheritDoc}
*/
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
return new StreamableOperator() {
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
ColumnRearranger cr = createRearranger((DataTableSpec) inSpecs[0]);
cr.createStreamableFunction(0, 0).runFinal(inputs, outputs, exec);
// the optional PMML in port (can be null)
PMMLPortObject inPMMLPort = null;
if (m_pmmlInEnabled && inputs[1] != null) {
inPMMLPort = (PMMLPortObject) ((PortObjectInput) inputs[1]).getPortObject();
}
PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(inPMMLPort, cr.createSpec());
PMMLPortObject outPMMLPort = new PMMLPortObject(creator.createSpec(), inPMMLPort);
for (CategoryToNumberCellFactory factory : m_factories) {
PMMLMapValuesTranslator trans = new PMMLMapValuesTranslator(factory.getConfig(), new DerivedFieldMapper(inPMMLPort));
outPMMLPort.addGlobalTransformations(trans.exportToTransDict());
}
PortObjectOutput portObjectOutput = (PortObjectOutput) outputs[1];
portObjectOutput.setPortObject(outPMMLPort);
}
};
}
use of org.knime.core.node.ExecutionContext in project knime-core by knime.
the class ColumnAppenderNodeModel method createStreamableOperator.
// ////////////// STREAMING FUNCTIONS ////////////////
/**
* {@inheritDoc}
*/
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
return new StreamableOperator() {
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
RowInput in1 = (RowInput) inputs[0];
RowInput in2 = (RowInput) inputs[1];
RowOutput out = (RowOutput) outputs[0];
CustomRowIterator tableIt1 = new CustomRowIteratorImpl2(in1);
CustomRowIterator tableIt2 = new CustomRowIteratorImpl2(in2);
compute(tableIt1, tableIt2, in1.getDataTableSpec().getNumColumns() + in2.getDataTableSpec().getNumColumns(), row -> {
out.push(row);
}, exec, -1, -1);
// poll all the remaining rows if there are any but don't do anything with them
while (tableIt1.hasNext()) {
tableIt1.next();
}
while (tableIt2.hasNext()) {
tableIt2.next();
}
in1.close();
in2.close();
out.close();
}
};
}
use of org.knime.core.node.ExecutionContext in project knime-core by knime.
the class CAIMDiscretizationNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
// measure the time
long startTime = System.currentTimeMillis();
// empty model
if (m_includedColumnNames.getIncludeList() == null || m_includedColumnNames.getIncludeList().size() == 0) {
return new PortObject[] { inData[0], new DiscretizationModel() };
}
LOGGER.debug("Start discretizing.");
// as the algorithm is for binary class problems only
// (positive, negative) the algorithm is performed for each class value
// labeled as positive class and the rest as negative
exec.setProgress(0.0, "Preparing...");
// check input data
BufferedDataTable data = (BufferedDataTable) inData[0];
// get class column index
m_classifyColumnIndex = data.getDataTableSpec().findColumnIndex(m_classColumnName.getStringValue());
assert m_classifyColumnIndex > -1;
// create the class - index mapping
createClassFromToIndexMaps(data.getDataTableSpec());
// create the array with the result discretization schemes for
// each included column
DiscretizationScheme[] resultSchemes = new DiscretizationScheme[m_includedColumnNames.getIncludeList().size()];
// for all included columns do the discretization
int currentColumn = 0;
for (String includedColumnName : m_includedColumnNames.getIncludeList()) {
LOGGER.debug("Process column: " + includedColumnName);
exec.setProgress("Discretizing column '" + includedColumnName + "'");
ExecutionContext subExecPerColumn = exec.createSubExecutionContext(1.0D / m_includedColumnNames.getIncludeList().size());
subExecPerColumn.checkCanceled();
// never discretize the column index (should never happen)
if (m_classColumnName.getStringValue().equals(includedColumnName)) {
continue;
}
// determine the column index of the current column
int columnIndex = data.getDataTableSpec().findColumnIndex(includedColumnName);
DataColumnDomain domain = data.getDataTableSpec().getColumnSpec(columnIndex).getDomain();
double minValue = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
double maxValue = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
// find all distinct values of the column and create
// a table with all possible interval boundaries (midpoint value of
// adjacent values)
subExecPerColumn.setProgress("Find possible boundaries.");
BoundaryScheme boundaryScheme = null;
// create subExec for sorting
ExecutionContext subExecSort = subExecPerColumn.createSubExecutionContext(0.1);
// long t1 = System.currentTimeMillis();
if (m_classOptimizedVersion) {
boundaryScheme = createAllIntervalBoundaries(data, columnIndex, subExecSort);
} else {
boundaryScheme = createAllIntervalBoundaries2(data, columnIndex, subExecSort);
}
subExecSort.setProgress(1.0D);
// long t2 = System.currentTimeMillis() - t1;
// LOGGER.error("Create boundaries time: " + (t2 / 1000.0)
// + " optimized: " + m_classOptimizedVersion);
// LOGGER.error("Boundaries: " + boundaryScheme.getHead());
LinkedDouble allIntervalBoundaries = boundaryScheme.getHead();
// create the initial discretization scheme
DiscretizationScheme discretizationScheme = new DiscretizationScheme(new Interval(minValue, maxValue, true, true));
double globalCAIM = 0;
// performe the iterative search for the best intervals
int numInsertedBounds = 0;
double currentCAIM = 0;
// create subExec for inserted bounds
ExecutionContext subExecBounds = subExecPerColumn.createSubExecutionContext(0.9);
while (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length - 1) {
subExecPerColumn.checkCanceled();
// create subExec for counting
ExecutionContext subExecCount = subExecBounds.createSubExecutionContext(1.0D / m_classValues.length);
// LOGGER.debug("Inserted bounds: " + numInsertedBounds);
// LOGGER.debug("intervall boundaries: " +
// allIntervalBoundaries);
// for all possible interval boundaries
// insert each one, calculate the caim value and add
// the one with the biggest caim
LinkedDouble intervalBoundary = allIntervalBoundaries.m_next;
currentCAIM = 0;
LinkedDouble bestBoundary = null;
long currentCountedBoundaries = 0;
while (intervalBoundary != null) {
subExecPerColumn.checkCanceled();
// set progress
currentCountedBoundaries++;
subExecCount.setProgress((double) currentCountedBoundaries / (double) boundaryScheme.getNumBoundaries(), "Count for possible boundary " + currentCountedBoundaries + " of " + boundaryScheme.getNumBoundaries());
// LOGGER.debug("current caim: " + currentCAIM);
DiscretizationScheme tentativeDS = new DiscretizationScheme(discretizationScheme);
tentativeDS.insertBound(intervalBoundary.m_value);
// create the quanta matrix
QuantaMatrix2D quantaMatrix = new QuantaMatrix2D(tentativeDS, m_classValueToIndexMap);
// pass the data for filling the matrix
quantaMatrix.countData(data, columnIndex, m_classifyColumnIndex);
// calculate the caim
double caim = quantaMatrix.calculateCaim();
if (caim > currentCAIM) {
currentCAIM = caim;
bestBoundary = intervalBoundary;
}
intervalBoundary = intervalBoundary.m_next;
}
// if there is no best boundary, break the first while loop
if (bestBoundary == null) {
break;
}
// in this case accept the best discretization scheme
if (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length) {
int numIntervals = discretizationScheme.getNumIntervals();
discretizationScheme.insertBound(bestBoundary.m_value);
// remove the linked list element from the list
bestBoundary.remove();
globalCAIM = currentCAIM;
if (numIntervals < discretizationScheme.getNumIntervals()) {
numInsertedBounds++;
subExecPerColumn.setProgress("Inserted bound " + numInsertedBounds);
// LOGGER.debug("Inserted boundary: "
// + bestBoundary.m_value);
} else {
throw new IllegalStateException("Only usefull bounds should be inserted: " + bestBoundary.m_value);
}
}
subExecCount.setProgress(1.0D);
}
resultSchemes[currentColumn] = discretizationScheme;
subExecBounds.setProgress(1.0D);
// ensure the full progress is set for this iteration
subExecPerColumn.setProgress(1.0D);
currentColumn++;
}
// set the model
DataTableSpec modelSpec = createModelSpec(m_includedColumnNames, data.getDataTableSpec());
m_discretizationModel = new DiscretizationModel(resultSchemes, modelSpec);
// create an output table that replaces the included columns by
// interval values
BufferedDataTable resultTable = createResultTable(exec, data, m_discretizationModel);
// log the runtime of the execute method
long runtime = System.currentTimeMillis() - startTime;
LOGGER.debug("Binning runtime: " + (runtime / 1000.0) + " sec.");
return new PortObject[] { resultTable, m_discretizationModel };
}
use of org.knime.core.node.ExecutionContext in project knime-core by knime.
the class RowFilterNodeModel method createStreamableOperator.
/**
* {@inheritDoc}
*/
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
return new StreamableOperator() {
@Override
public StreamableOperatorInternals saveInternals() {
return null;
}
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext ctx) throws Exception {
RowInput in = (RowInput) inputs[0];
RowOutput out = (RowOutput) outputs[0];
RowFilterNodeModel.this.execute(in, out, ctx);
}
};
}
use of org.knime.core.node.ExecutionContext in project knime-core by knime.
the class BigGroupByTable method createGroupByTable.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
final DataTableSpec origSpec = table.getDataTableSpec();
// sort the data table in order to process the input table chunk wise
final BufferedDataTable sortedTable;
final ExecutionContext groupExec;
final DataValueComparator[] comparators;
if (groupColIdx.length < 1) {
sortedTable = table;
groupExec = exec;
comparators = new DataValueComparator[0];
} else {
final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
exec.setMessage("Sorting input table...");
sortedTable = sortTable(sortExec, table, getGroupCols());
sortExec.setProgress(1.0);
groupExec = exec.createSubExecutionContext(0.4);
comparators = new DataValueComparator[groupColIdx.length];
for (int i = 0, length = groupColIdx.length; i < length; i++) {
final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
comparators[i] = colSpec.getType().getComparator();
}
}
final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
exec.setMessage("Creating groups");
final DataCell[] previousGroup = new DataCell[groupColIdx.length];
final DataCell[] currentGroup = new DataCell[groupColIdx.length];
final MutableInteger groupCounter = new MutableInteger(0);
boolean firstRow = true;
final double numOfRows = sortedTable.size();
long rowCounter = 0;
// In the rare case that the DataCell comparator return 0 for two
// data cells that are not equal we have to maintain a map with all
// rows with equal cells in the group columns per chunk.
// This variable stores for each chunk these members. A chunk consists
// of rows which return 0 for the pairwise group value comparison.
// Usually only equal data cells return 0 when compared with each other
// but in rare occasions also data cells that are NOT equal return 0 when
// compared to each other
// (such as cells that contain chemical structures).
// In this rare case this map will contain for each group of data cells
// that are pairwise equal in the chunk a separate entry.
final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
boolean logUnusualCells = true;
String groupLabel = "";
// cannot put init to the constructor, as the super() constructor directly calls the current function
initMissingValuesMap();
for (final DataRow row : sortedTable) {
// fetch the current group column values
for (int i = 0, length = groupColIdx.length; i < length; i++) {
currentGroup[i] = row.getCell(groupColIdx[i]);
}
if (firstRow) {
groupLabel = createGroupLabelForProgress(currentGroup);
System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
firstRow = false;
}
// group column data cells
if (!sameChunk(comparators, previousGroup, currentGroup)) {
groupLabel = createGroupLabelForProgress(currentGroup);
createTableRows(dc, chunkMembers, groupCounter);
// set the current group as previous group
System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
if (logUnusualCells && chunkMembers.size() > 1) {
// cause the problem
if (LOGGER.isEnabledFor(LEVEL.INFO)) {
final StringBuilder buf = new StringBuilder();
buf.append("Data chunk with ");
buf.append(chunkMembers.size());
buf.append(" members occured in groupby node. " + "Involved classes are: ");
final GroupKey key = chunkMembers.keySet().iterator().next();
for (final DataCell cell : key.getGroupVals()) {
buf.append(cell.getClass().getCanonicalName());
buf.append(", ");
}
LOGGER.info(buf.toString());
}
logUnusualCells = false;
}
// reset the chunk members map
chunkMembers.clear();
}
// process the row as one of the members of the current chunk
Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
if (member == null) {
Set<RowKey> rowKeys;
if (isEnableHilite()) {
rowKeys = new HashSet<>();
} else {
rowKeys = Collections.emptySet();
}
member = new Pair<>(cloneColumnAggregators(), rowKeys);
final DataCell[] groupKeys = new DataCell[currentGroup.length];
System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
chunkMembers.put(new GroupKey(groupKeys), member);
}
// compute the current row values
for (final ColumnAggregator colAggr : member.getFirst()) {
final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
}
if (isEnableHilite()) {
member.getSecond().add(row.getKey());
}
groupExec.checkCanceled();
groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
}
// create the final row for the last chunk after processing the last
// table row
createTableRows(dc, chunkMembers, groupCounter);
dc.close();
return dc.getTable();
}
Aggregations