use of org.knime.core.node.property.hilite.DefaultHiLiteMapper in project knime-core by knime.
the class EntropyNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
DataTable reference = inData[INPORT_REFERENCE];
DataTable clustering = inData[INPORT_CLUSTERING];
int referenceColIndex = reference.getDataTableSpec().findColumnIndex(m_referenceCol);
int clusteringColIndex = clustering.getDataTableSpec().findColumnIndex(m_clusteringCol);
m_calculator = new EntropyCalculator(reference, clustering, referenceColIndex, clusteringColIndex, exec);
Map<RowKey, Set<RowKey>> map = m_calculator.getClusteringMap();
m_translator.setMapper(new DefaultHiLiteMapper(map));
if (getNrOutPorts() > 0) {
BufferedDataTable out = exec.createBufferedDataTable(m_calculator.getScoreTable(), exec);
return new BufferedDataTable[] { out };
}
return new BufferedDataTable[0];
}
use of org.knime.core.node.property.hilite.DefaultHiLiteMapper in project knime-core by knime.
the class AppendedRowsNodeModel method run.
void run(final RowInput[] inputs, final RowOutput output, final ExecutionMonitor exec, final long totalRowCount) throws Exception {
RowInput[] corrected;
if (m_isIntersection) {
final RowInput[] noNullArray = noNullArray(inputs);
corrected = new RowInput[noNullArray.length];
DataTableSpec[] inSpecs = new DataTableSpec[noNullArray.length];
for (int i = 0; i < noNullArray.length; i++) {
inSpecs[i] = noNullArray[i].getDataTableSpec();
}
String[] intersection = getIntersection(inSpecs);
for (int i = 0; i < noNullArray.length; i++) {
corrected[i] = new FilterColumnRowInput(noNullArray[i], intersection);
}
} else {
corrected = inputs;
}
AppendedRowsTable.DuplicatePolicy duplPolicy;
if (m_isFailOnDuplicate) {
duplPolicy = AppendedRowsTable.DuplicatePolicy.Fail;
} else if (m_isAppendSuffix) {
duplPolicy = AppendedRowsTable.DuplicatePolicy.AppendSuffix;
} else {
duplPolicy = AppendedRowsTable.DuplicatePolicy.Skip;
}
AppendedRowsRowInput appendedInput = AppendedRowsRowInput.create(corrected, duplPolicy, m_suffix, exec, totalRowCount);
try {
DataRow next;
// note, this iterator throws runtime exceptions when canceled.
while ((next = appendedInput.poll()) != null) {
// may throw exception, also sets progress
output.push(next);
}
} catch (AppendedRowsIterator.RuntimeCanceledExecutionException rcee) {
throw rcee.getCause();
} finally {
output.close();
}
if (appendedInput.getNrRowsSkipped() > 0) {
setWarningMessage("Filtered out " + appendedInput.getNrRowsSkipped() + " duplicate row(s).");
}
if (m_enableHiliting) {
Map<RowKey, Set<RowKey>> map = createHiliteTranslationMap(appendedInput.getDuplicateNameMap());
m_hiliteTranslator.setMapper(new DefaultHiLiteMapper(map));
}
}
use of org.knime.core.node.property.hilite.DefaultHiLiteMapper in project knime-core by knime.
the class AppendedRowsNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] rawInData, final ExecutionContext exec) throws Exception {
// remove all null tables first (optional input data)
BufferedDataTable[] noNullArray = noNullArray(rawInData);
DataTableSpec[] noNullSpecs = new DataTableSpec[noNullArray.length];
for (int i = 0; i < noNullArray.length; i++) {
noNullSpecs[i] = noNullArray[i].getDataTableSpec();
}
// table can only be wrapped if a suffix is to be append or the node fails in case of duplicate row ID's
if (m_isAppendSuffix || m_isFailOnDuplicate) {
// just wrap the tables virtually instead of traversing it and copying the rows
// virtually create the concatenated table (no traverse necessary)
Optional<String> suffix = m_isAppendSuffix ? Optional.of(m_suffix) : Optional.empty();
BufferedDataTable concatTable = exec.createConcatenateTable(exec, suffix, m_isFailOnDuplicate, noNullArray);
if (m_isIntersection) {
// wrap the table and filter the non-intersecting columns
DataTableSpec actualOutSpec = getOutputSpec(noNullSpecs);
DataTableSpec currentOutSpec = concatTable.getDataTableSpec();
String[] intersectCols = getIntersection(actualOutSpec, currentOutSpec);
ColumnRearranger cr = new ColumnRearranger(currentOutSpec);
cr.keepOnly(intersectCols);
concatTable = exec.createColumnRearrangeTable(concatTable, cr, exec);
}
if (m_enableHiliting) {
AppendedRowsTable tmp = new AppendedRowsTable(DuplicatePolicy.Fail, null, noNullArray);
Map<RowKey, Set<RowKey>> map = createHiliteTranslationMap(createDuplicateMap(tmp, exec, m_suffix == null ? "" : m_suffix));
m_hiliteTranslator.setMapper(new DefaultHiLiteMapper(map));
}
return new BufferedDataTable[] { concatTable };
} else {
// traverse the table and copy the rows
long totalRowCount = 0L;
RowInput[] inputs = new RowInput[noNullArray.length];
for (int i = 0; i < noNullArray.length; i++) {
totalRowCount += noNullArray[i].size();
inputs[i] = new DataTableRowInput(noNullArray[i]);
}
DataTableSpec outputSpec = getOutputSpec(noNullSpecs);
BufferedDataTableRowOutput output = new BufferedDataTableRowOutput(exec.createDataContainer(outputSpec));
run(inputs, output, exec, totalRowCount);
return new BufferedDataTable[] { output.getDataTable() };
}
}
use of org.knime.core.node.property.hilite.DefaultHiLiteMapper in project knime-core by knime.
the class ClusterNodeModel method saveInternals.
/**
* {@inheritDoc}
*/
@Override
protected void saveInternals(final File internDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
NodeSettings internalSettings = new NodeSettings("kMeans");
internalSettings.addInt(CFG_DIMENSION, m_dimension);
internalSettings.addInt(CFG_IGNORED_COLS, m_nrIgnoredColumns);
internalSettings.addIntArray(CFG_COVERAGE, m_viewData.clusterCoverage());
for (int i = 0; i < m_nrOfClusters.getIntValue(); i++) {
internalSettings.addDoubleArray(CFG_CLUSTER + i, m_viewData.clusters()[i]);
}
internalSettings.addStringArray(CFG_FEATURE_NAMES, m_viewData.featureNames());
if (m_enableHilite.getBooleanValue()) {
NodeSettingsWO mapSet = internalSettings.addNodeSettings(CFG_HILITEMAPPING);
((DefaultHiLiteMapper) m_translator.getMapper()).save(mapSet);
}
File f = new File(internDir, SETTINGS_FILE_NAME);
FileOutputStream out = new FileOutputStream(f);
internalSettings.saveToXML(out);
}
use of org.knime.core.node.property.hilite.DefaultHiLiteMapper in project knime-core by knime.
the class ClusterNodeModel method execute.
/**
* Generate new clustering based on InputDataTable and specified number of
* clusters. Currently the objective function only looks for cluster centers
* that are extremely similar to the first n patterns...
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] data, final ExecutionContext exec) throws Exception {
// FIXME actually do something useful with missing values!
BufferedDataTable inData = (BufferedDataTable) data[0];
DataTableSpec spec = inData.getDataTableSpec();
// get dimension of feature space
m_dimension = inData.getDataTableSpec().getNumColumns();
HashMap<RowKey, Set<RowKey>> mapping = new HashMap<RowKey, Set<RowKey>>();
addExcludeColumnsToIgnoreList(spec);
double[][] clusters = initializeClusters(inData);
// also keep counts of how many patterns fall in a specific cluster
int[] clusterCoverage = new int[m_nrOfClusters.getIntValue()];
// --------- create clusters --------------
// reserve space for cluster center updates (do batch update!)
double[][] delta = new double[m_nrOfClusters.getIntValue()][];
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
delta[c] = new double[m_dimension - m_nrIgnoredColumns];
}
// main loop - until clusters stop changing or maxNrIterations reached
int currentIteration = 0;
boolean finished = false;
while ((!finished) && (currentIteration < m_nrMaxIterations.getIntValue())) {
exec.checkCanceled();
exec.setProgress((double) currentIteration / (double) m_nrMaxIterations.getIntValue(), "Iteration " + currentIteration);
// initialize counts and cluster-deltas
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
clusterCoverage[c] = 0;
delta[c] = new double[m_dimension - m_nrIgnoredColumns];
int deltaPos = 0;
for (int i = 0; i < m_dimension; i++) {
if (!m_ignoreColumn[i]) {
delta[c][deltaPos++] = 0.0;
}
}
}
// assume that we are done (i.e. clusters have stopped changing)
finished = true;
// first training example
RowIterator rowIt = inData.iterator();
while (rowIt.hasNext()) {
DataRow currentRow = rowIt.next();
int winner = findClosestPrototypeFor(currentRow, clusters);
if (winner >= 0) {
// update winning cluster centers delta
int deltaPos = 0;
for (int i = 0; i < m_dimension; i++) {
DataCell currentCell = currentRow.getCell(i);
if (!m_ignoreColumn[i]) {
if (!currentCell.isMissing()) {
delta[winner][deltaPos] += ((DoubleValue) (currentCell)).getDoubleValue();
} else {
throw new Exception("Missing Values not (yet) allowed in k-Means.");
}
deltaPos++;
}
}
clusterCoverage[winner]++;
} else {
// let's report this during
assert (winner >= 0);
// otherwise just don't reproduce result
throw new IllegalStateException("No winner found: " + winner);
}
}
// update cluster centers
finished = updateClusterCenters(clusterCoverage, clusters, delta);
currentIteration++;
}
// while(!finished & nrIt<maxNrIt)
// create list of feature names
// index of not-ignored columns
int k = 0;
// index of column
int j = 0;
String[] featureNames = new String[m_dimension];
do {
if (!m_ignoreColumn[j]) {
featureNames[k] = spec.getColumnSpec(j).getName();
k++;
}
j++;
} while (j < m_dimension);
// create output container and also mapping for HiLiteing
BufferedDataContainer labeledInput = exec.createDataContainer(createAppendedSpec(spec));
for (DataRow row : inData) {
int winner = findClosestPrototypeFor(row, clusters);
DataCell cell = new StringCell(CLUSTER + winner);
labeledInput.addRowToTable(new AppendedColumnRow(row, cell));
if (m_enableHilite.getBooleanValue()) {
RowKey key = new RowKey(CLUSTER + winner);
if (mapping.get(key) == null) {
Set<RowKey> set = new HashSet<RowKey>();
set.add(row.getKey());
mapping.put(key, set);
} else {
mapping.get(key).add(row.getKey());
}
}
}
labeledInput.close();
if (m_enableHilite.getBooleanValue()) {
m_translator.setMapper(new DefaultHiLiteMapper(mapping));
}
BufferedDataTable outData = labeledInput.getTable();
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) data[1] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec pmmlOutSpec = createPMMLSpec(inPMMLSpec, spec);
PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
Set<String> columns = new LinkedHashSet<String>();
for (String s : pmmlOutSpec.getLearningFields()) {
columns.add(s);
}
outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrOfClusters.getIntValue(), clusters, clusterCoverage, columns));
m_viewData = new ClusterViewData(clusters, clusterCoverage, m_dimension - m_nrIgnoredColumns, featureNames);
if (m_outputCenters) {
DataContainer clusterCenterContainer = exec.createDataContainer(createClusterCentersSpec(spec));
int i = 0;
for (double[] cluster : clusters) {
List<DataCell> cells = new ArrayList<>();
for (double d : cluster) {
cells.add(new DoubleCell(d));
}
clusterCenterContainer.addRowToTable(new DefaultRow(new RowKey(PMMLClusterTranslator.CLUSTER_NAME_PREFIX + i++), cells));
}
clusterCenterContainer.close();
return new PortObject[] { outData, (BufferedDataTable) clusterCenterContainer.getTable(), outPMMLPort };
} else {
return new PortObject[] { outData, outPMMLPort };
}
}
Aggregations