use of org.knime.core.data.append.AppendedColumnRow in project knime-core by knime.
the class OldToNewTimeNodeModel method createStreamableOperator.
/**
* {@inheritDoc}
*/
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
return new StreamableOperator() {
SimpleStreamableOperatorInternals m_internals = new SimpleStreamableOperatorInternals();
/**
* {@inheritDoc}
*/
@Override
public void runIntermediate(final PortInput[] inputs, final ExecutionContext exec) throws Exception {
if (partitionInfo.getPartitionIndex() == 0) {
final RowInput rowInput = (RowInput) inputs[0];
final DataRow row = rowInput.poll();
if (row != null) {
if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
final DataColumnSpec[] colSpecs = new DataColumnSpec[row.getNumCells()];
final DataTableSpec inSpec = rowInput.getDataTableSpec();
final DataColumnSpec[] newColumnSpecs = getNewIncludedColumnSpecs(inSpec, row);
final int[] includeIndexes = Arrays.stream(m_colSelect.applyTo(inSpec).getIncludes()).mapToInt(s -> inSpec.findColumnIndex(s)).toArray();
for (int i = 0; i < inSpec.getNumColumns(); i++) {
final int searchIdx = Arrays.binarySearch(includeIndexes, i);
if (searchIdx < 0) {
colSpecs[i] = inSpec.getColumnSpec(i);
} else {
colSpecs[i] = newColumnSpecs[searchIdx];
}
}
final Config config = m_internals.getConfig();
config.addBoolean("hasIterated", false);
for (int i = 0; i < inSpec.getNumColumns(); i++) {
config.addDataType("type" + i, colSpecs[i].getType());
config.addString("colname" + i, colSpecs[i].getName());
}
config.addInt("sizeRow", colSpecs.length);
} else {
final DataTableSpec inSpec = rowInput.getDataTableSpec();
final DataColumnSpec[] newColumnSpecs = getNewIncludedColumnSpecs(inSpec, row);
final int[] includeIndexes = Arrays.stream(m_colSelect.applyTo(inSpec).getIncludes()).mapToInt(s -> inSpec.findColumnIndex(s)).toArray();
final DataColumnSpec[] colSpecs = new DataColumnSpec[row.getNumCells() + includeIndexes.length];
for (int i = 0; i < inSpec.getNumColumns(); i++) {
colSpecs[i] = inSpec.getColumnSpec(i);
}
for (int i = 0; i < newColumnSpecs.length; i++) {
colSpecs[i + inSpec.getNumColumns()] = new UniqueNameGenerator(inSpec).newColumn(newColumnSpecs[i].getName() + m_suffix.getStringValue(), newColumnSpecs[i].getType());
}
final Config config = m_internals.getConfig();
config.addBoolean("hasIterated", false);
for (int i = 0; i < colSpecs.length; i++) {
config.addDataType("type" + i, colSpecs[i].getType());
config.addString("colname" + i, colSpecs[i].getName());
}
config.addInt("sizeRow", colSpecs.length);
}
} else {
m_internals.getConfig().addInt("sizeRow", 0);
}
}
}
/**
* {@inheritDoc}
*/
@Override
public StreamableOperatorInternals saveInternals() {
return m_internals;
}
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
final RowInput in = (RowInput) inputs[0];
final RowOutput out = (RowOutput) outputs[0];
final DataTableSpec inSpec = in.getDataTableSpec();
final int[] includeIndexes = Arrays.stream(m_colSelect.applyTo(inSpec).getIncludes()).mapToInt(s -> inSpec.findColumnIndex(s)).toArray();
DataRow row;
while ((row = in.poll()) != null) {
exec.checkCanceled();
final DataColumnSpec[] newColumnSpecs = getNewIncludedColumnSpecs(inSpec, row);
DataCell[] datacells = new DataCell[includeIndexes.length];
for (int i = 0; i < includeIndexes.length; i++) {
if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
ConvertTimeCellFactory cellFac = new ConvertTimeCellFactory(newColumnSpecs[i], i, includeIndexes[i]);
datacells[i] = cellFac.getCells(row)[0];
} else {
final DataColumnSpec dataColSpec = new UniqueNameGenerator(inSpec).newColumn(newColumnSpecs[i].getName() + m_suffix.getStringValue(), newColumnSpecs[i].getType());
ConvertTimeCellFactory cellFac = new ConvertTimeCellFactory(dataColSpec, i, includeIndexes[i]);
datacells[i] = cellFac.getCells(row)[0];
}
}
if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
out.push(new ReplacedColumnsDataRow(row, datacells, includeIndexes));
} else {
out.push(new AppendedColumnRow(row, datacells));
}
}
in.close();
out.close();
}
};
}
use of org.knime.core.data.append.AppendedColumnRow in project knime-core by knime.
the class ClusterNodeModel method execute.
/**
* Generate new clustering based on InputDataTable and specified number of
* clusters. Currently the objective function only looks for cluster centers
* that are extremely similar to the first n patterns...
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] data, final ExecutionContext exec) throws Exception {
// FIXME actually do something useful with missing values!
BufferedDataTable inData = (BufferedDataTable) data[0];
DataTableSpec spec = inData.getDataTableSpec();
// get dimension of feature space
m_dimension = inData.getDataTableSpec().getNumColumns();
HashMap<RowKey, Set<RowKey>> mapping = new HashMap<RowKey, Set<RowKey>>();
addExcludeColumnsToIgnoreList(spec);
double[][] clusters = initializeClusters(inData);
// also keep counts of how many patterns fall in a specific cluster
int[] clusterCoverage = new int[m_nrOfClusters.getIntValue()];
// --------- create clusters --------------
// reserve space for cluster center updates (do batch update!)
double[][] delta = new double[m_nrOfClusters.getIntValue()][];
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
delta[c] = new double[m_dimension - m_nrIgnoredColumns];
}
// main loop - until clusters stop changing or maxNrIterations reached
int currentIteration = 0;
boolean finished = false;
while ((!finished) && (currentIteration < m_nrMaxIterations.getIntValue())) {
exec.checkCanceled();
exec.setProgress((double) currentIteration / (double) m_nrMaxIterations.getIntValue(), "Iteration " + currentIteration);
// initialize counts and cluster-deltas
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
clusterCoverage[c] = 0;
delta[c] = new double[m_dimension - m_nrIgnoredColumns];
int deltaPos = 0;
for (int i = 0; i < m_dimension; i++) {
if (!m_ignoreColumn[i]) {
delta[c][deltaPos++] = 0.0;
}
}
}
// assume that we are done (i.e. clusters have stopped changing)
finished = true;
// first training example
RowIterator rowIt = inData.iterator();
while (rowIt.hasNext()) {
DataRow currentRow = rowIt.next();
int winner = findClosestPrototypeFor(currentRow, clusters);
if (winner >= 0) {
// update winning cluster centers delta
int deltaPos = 0;
for (int i = 0; i < m_dimension; i++) {
DataCell currentCell = currentRow.getCell(i);
if (!m_ignoreColumn[i]) {
if (!currentCell.isMissing()) {
delta[winner][deltaPos] += ((DoubleValue) (currentCell)).getDoubleValue();
} else {
throw new Exception("Missing Values not (yet) allowed in k-Means.");
}
deltaPos++;
}
}
clusterCoverage[winner]++;
} else {
// let's report this during
assert (winner >= 0);
// otherwise just don't reproduce result
throw new IllegalStateException("No winner found: " + winner);
}
}
// update cluster centers
finished = updateClusterCenters(clusterCoverage, clusters, delta);
currentIteration++;
}
// while(!finished & nrIt<maxNrIt)
// create list of feature names
// index of not-ignored columns
int k = 0;
// index of column
int j = 0;
String[] featureNames = new String[m_dimension];
do {
if (!m_ignoreColumn[j]) {
featureNames[k] = spec.getColumnSpec(j).getName();
k++;
}
j++;
} while (j < m_dimension);
// create output container and also mapping for HiLiteing
BufferedDataContainer labeledInput = exec.createDataContainer(createAppendedSpec(spec));
for (DataRow row : inData) {
int winner = findClosestPrototypeFor(row, clusters);
DataCell cell = new StringCell(CLUSTER + winner);
labeledInput.addRowToTable(new AppendedColumnRow(row, cell));
if (m_enableHilite.getBooleanValue()) {
RowKey key = new RowKey(CLUSTER + winner);
if (mapping.get(key) == null) {
Set<RowKey> set = new HashSet<RowKey>();
set.add(row.getKey());
mapping.put(key, set);
} else {
mapping.get(key).add(row.getKey());
}
}
}
labeledInput.close();
if (m_enableHilite.getBooleanValue()) {
m_translator.setMapper(new DefaultHiLiteMapper(mapping));
}
BufferedDataTable outData = labeledInput.getTable();
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) data[1] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec pmmlOutSpec = createPMMLSpec(inPMMLSpec, spec);
PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
Set<String> columns = new LinkedHashSet<String>();
for (String s : pmmlOutSpec.getLearningFields()) {
columns.add(s);
}
outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrOfClusters.getIntValue(), clusters, clusterCoverage, columns));
m_viewData = new ClusterViewData(clusters, clusterCoverage, m_dimension - m_nrIgnoredColumns, featureNames);
if (m_outputCenters) {
DataContainer clusterCenterContainer = exec.createDataContainer(createClusterCentersSpec(spec));
int i = 0;
for (double[] cluster : clusters) {
List<DataCell> cells = new ArrayList<>();
for (double d : cluster) {
cells.add(new DoubleCell(d));
}
clusterCenterContainer.addRowToTable(new DefaultRow(new RowKey(PMMLClusterTranslator.CLUSTER_NAME_PREFIX + i++), cells));
}
clusterCenterContainer.close();
return new PortObject[] { outData, (BufferedDataTable) clusterCenterContainer.getTable(), outPMMLPort };
} else {
return new PortObject[] { outData, outPMMLPort };
}
}
use of org.knime.core.data.append.AppendedColumnRow in project knime-core by knime.
the class DBReaderImpl method loopTable.
/**
* @since 3.2
*/
@SuppressWarnings("resource")
@Override
public BufferedDataTableRowOutput loopTable(final ExecutionContext exec, final CredentialsProvider cp, final RowInput data, final long rowCount, final boolean failIfException, final boolean appendInputColumns, final boolean includeEmptyResults, final boolean retainAllColumns, final String... columns) throws Exception {
if (m_blobFactory == null) {
m_blobFactory = new BinaryObjectCellFactory();
}
final DatabaseQueryConnectionSettings dbConn = getQueryConnection();
return getQueryConnection().execute(cp, conn -> {
/* Get the selected timezone */
final TimeZone timezone = dbConn.getTimeZone();
/* Get the input table spec */
final DataTableSpec inSpec = data.getDataTableSpec();
/* Create PreparedStatement */
final String query = dbConn.getQuery();
LOGGER.debug("Executing SQL preparedStatement as execute: " + query);
/* Initialize the error table */
final UniqueNameGenerator errorGenerator = new UniqueNameGenerator(inSpec);
final DataColumnSpec errorColSpec = errorGenerator.newColumn(DEF_ERROR_COL_NAME, StringCell.TYPE);
final DataTableSpec errorSpec = new DataTableSpec(inSpec, new DataTableSpec(errorColSpec));
m_errorContainer = exec.createDataContainer(errorSpec);
DataTableSpec dbSpec = new DataTableSpec();
BufferedDataTableRowOutput output = null;
exec.setMessage("Start reading rows from database...");
try (final PreparedStatement stmt = conn.prepareStatement(query)) {
long inDataCounter = 1;
long rowIdCounter = 0;
DataRow row;
while ((row = data.poll()) != null) {
exec.checkCanceled();
if (rowCount > 0) {
exec.setProgress(1.0 * inDataCounter / rowCount, "Row " + "#" + inDataCounter + " of " + rowCount);
} else {
exec.setProgress("Writing Row " + "#" + inDataCounter);
}
final DataCell[] inCells = new DataCell[columns.length];
for (int i = 0; i < columns.length; i++) {
final int dbIdx = i + 1;
final int colIdx = inSpec.findColumnIndex(columns[i]);
final DataColumnSpec colSpec = inSpec.getColumnSpec(colIdx);
inCells[i] = row.getCell(colIdx);
fillStatement(stmt, dbIdx, colSpec, inCells[i], timezone, null);
}
try (final ResultSet result = stmt.executeQuery()) {
/* In the first iteration, create the out DataTableSpec and BufferedDataTableRowOutput */
if (output == null) {
dbSpec = createTableSpec(result.getMetaData());
if (appendInputColumns) {
// Create out DataTableSpec for input table
final DataTableSpec newInSpec;
if (retainAllColumns) {
newInSpec = inSpec;
} else {
final DataColumnSpec[] inColSpecs = new DataColumnSpec[columns.length];
for (int i = 0; i < inColSpecs.length; i++) {
inColSpecs[i] = inSpec.getColumnSpec(columns[i]);
}
newInSpec = new DataTableSpec(inColSpecs);
}
// Create DataTableSpec for database columns, rename if necessary
final UniqueNameGenerator generator = new UniqueNameGenerator(newInSpec);
final DataColumnSpec[] dbColSpecs = new DataColumnSpec[dbSpec.getNumColumns()];
for (int i = 0; i < dbColSpecs.length; i++) {
final DataColumnSpec colSpec = dbSpec.getColumnSpec(i);
dbColSpecs[i] = generator.newColumn(colSpec.getName(), colSpec.getType());
}
dbSpec = new DataTableSpec(dbColSpecs);
m_spec = new DataTableSpec(newInSpec, dbSpec);
} else {
m_spec = dbSpec;
}
output = new BufferedDataTableRowOutput(exec.createDataContainer(m_spec));
}
/* Iterate over the result of the database query and put it into the output table*/
final RowIterator dbRowIterator = createDBRowIterator(dbSpec, dbConn, m_blobFactory, false, result, rowIdCounter);
boolean hasDbRow = false;
while (dbRowIterator.hasNext()) {
hasDbRow = true;
final DataRow dbRow = dbRowIterator.next();
if (appendInputColumns) {
final DataRow inRow;
if (retainAllColumns) {
inRow = new DefaultRow(dbRow.getKey(), row);
} else {
inRow = new DefaultRow(dbRow.getKey(), inCells);
}
final JoinedRow joinedRow = new JoinedRow(inRow, dbRow);
output.push(joinedRow);
} else {
output.push(dbRow);
}
rowIdCounter++;
}
/* Append columns using MissingCell if no result is returned */
if (!hasDbRow && appendInputColumns && includeEmptyResults) {
final DataCell[] cells = new DataCell[dbSpec.getNumColumns()];
Arrays.fill(cells, DataType.getMissingCell());
final RowKey rowKey = RowKey.createRowKey(rowIdCounter);
final DataRow emptyDbRows = new DefaultRow(rowKey, cells);
final DataRow inRow;
if (retainAllColumns) {
inRow = new DefaultRow(rowKey, row);
} else {
inRow = new DefaultRow(rowKey, inCells);
}
final JoinedRow joinedRow = new JoinedRow(inRow, emptyDbRows);
output.push(joinedRow);
rowIdCounter++;
}
inDataCounter++;
} catch (SQLException ex) {
LOGGER.debug("SQLException: " + ex.getMessage());
if (!failIfException) {
if (output == null) {
throw new SQLException(ex);
}
final AppendedColumnRow appendedRow = new AppendedColumnRow(row, new StringCell(ex.getMessage()));
m_errorContainer.addRowToTable(appendedRow);
} else {
throw new SQLException(ex);
}
}
}
} finally {
data.close();
if (output == null) {
output = new BufferedDataTableRowOutput(exec.createDataContainer(inSpec));
}
output.close();
if (m_errorContainer != null) {
m_errorContainer.close();
}
}
return output;
});
}
Aggregations