use of org.knime.core.data.RowIterator in project knime-core by knime.
the class RPropNodeModel method execute.
/**
* The execution consists of three steps:
* <ol>
* <li>A neural network is build with the inputs and outputs according to
* the input datatable, number of hidden layers as specified.</li>
* <li>Input DataTables are converted into double-arrays so they can be
* attached to the neural net.</li>
* <li>The neural net is trained.</li>
* </ol>
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
// If class column is not set, it is the last column.
DataTableSpec posSpec = (DataTableSpec) inData[INDATA].getSpec();
if (m_classcol.getStringValue() == null) {
m_classcol.setStringValue(posSpec.getColumnSpec(posSpec.getNumColumns() - 1).getName());
}
List<String> learningCols = new LinkedList<String>();
List<String> targetCols = new LinkedList<String>();
// Determine the number of inputs and the number of outputs. Make also
// sure that the inputs are double values.
int nrInputs = 0;
int nrOutputs = 0;
HashMap<String, Integer> inputmap = new HashMap<String, Integer>();
HashMap<DataCell, Integer> classMap = new HashMap<DataCell, Integer>();
for (DataColumnSpec colspec : posSpec) {
// check for class column
if (colspec.getName().toString().compareTo(m_classcol.getStringValue()) == 0) {
targetCols.add(colspec.getName());
if (colspec.getType().isCompatible(DoubleValue.class)) {
// check if the values are in range [0,1]
DataColumnDomain domain = colspec.getDomain();
if (domain.hasBounds()) {
double lower = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
double upper = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
if (lower < 0 || upper > 1) {
throw new InvalidSettingsException("Domain range for regression in column " + colspec.getName() + " not in range [0,1]");
}
}
nrOutputs = 1;
classMap = new HashMap<DataCell, Integer>();
classMap.put(new StringCell(colspec.getName()), 0);
m_regression = true;
} else {
m_regression = false;
DataColumnDomain domain = colspec.getDomain();
if (domain.hasValues()) {
Set<DataCell> allvalues = domain.getValues();
int outputneuron = 0;
classMap = new HashMap<DataCell, Integer>();
for (DataCell value : allvalues) {
classMap.put(value, outputneuron);
outputneuron++;
}
nrOutputs = allvalues.size();
} else {
throw new Exception("Could not find domain values in" + "nominal column " + colspec.getName().toString());
}
}
} else {
if (!colspec.getType().isCompatible(DoubleValue.class)) {
throw new Exception("Only double columns for input");
}
inputmap.put(colspec.getName(), nrInputs);
learningCols.add(colspec.getName());
nrInputs++;
}
}
assert targetCols.size() == 1 : "Only one class column allowed.";
m_architecture.setNrInputNeurons(nrInputs);
m_architecture.setNrHiddenLayers(m_nrHiddenLayers.getIntValue());
m_architecture.setNrHiddenNeurons(m_nrHiddenNeuronsperLayer.getIntValue());
m_architecture.setNrOutputNeurons(nrOutputs);
Random random = new Random();
if (m_useRandomSeed.getBooleanValue()) {
random.setSeed(m_randomSeed.getIntValue());
}
m_mlp = new MultiLayerPerceptron(m_architecture, random);
if (m_regression) {
m_mlp.setMode(MultiLayerPerceptron.REGRESSION_MODE);
} else {
m_mlp.setMode(MultiLayerPerceptron.CLASSIFICATION_MODE);
}
// Convert inputs to double arrays. Values from the class column are
// encoded as bitvectors.
int classColNr = posSpec.findColumnIndex(m_classcol.getStringValue());
List<Double[]> samples = new ArrayList<Double[]>();
List<Double[]> outputs = new ArrayList<Double[]>();
Double[] sample = new Double[nrInputs];
Double[] output = new Double[nrOutputs];
final RowIterator rowIt = ((BufferedDataTable) inData[INDATA]).iterator();
int rowcounter = 0;
while (rowIt.hasNext()) {
boolean add = true;
output = new Double[nrOutputs];
sample = new Double[nrInputs];
DataRow row = rowIt.next();
int nrCells = row.getNumCells();
int index = 0;
for (int i = 0; i < nrCells; i++) {
if (i != classColNr) {
if (!row.getCell(i).isMissing()) {
DoubleValue dc = (DoubleValue) row.getCell(i);
sample[index] = dc.getDoubleValue();
index++;
} else {
if (m_ignoreMV.getBooleanValue()) {
add = false;
break;
} else {
throw new Exception("Missing values in input" + " datatable");
}
}
} else {
if (row.getCell(i).isMissing()) {
add = false;
if (!m_ignoreMV.getBooleanValue()) {
throw new Exception("Missing value in class" + " column");
}
break;
}
if (m_regression) {
DoubleValue dc = (DoubleValue) row.getCell(i);
output[0] = dc.getDoubleValue();
} else {
for (int j = 0; j < nrOutputs; j++) {
if (classMap.get(row.getCell(i)) == j) {
output[j] = new Double(1.0);
} else {
output[j] = new Double(0.0);
}
}
}
}
}
if (add) {
samples.add(sample);
outputs.add(output);
rowcounter++;
}
}
Double[][] samplesarr = new Double[rowcounter][nrInputs];
Double[][] outputsarr = new Double[rowcounter][nrInputs];
for (int i = 0; i < samplesarr.length; i++) {
samplesarr[i] = samples.get(i);
outputsarr[i] = outputs.get(i);
}
// Now finally train the network.
m_mlp.setClassMapping(classMap);
m_mlp.setInputMapping(inputmap);
RProp myrprop = new RProp();
m_errors = new double[m_nrIterations.getIntValue()];
for (int iteration = 0; iteration < m_nrIterations.getIntValue(); iteration++) {
exec.setProgress((double) iteration / (double) m_nrIterations.getIntValue(), "Iteration " + iteration);
myrprop.train(m_mlp, samplesarr, outputsarr);
double error = 0;
for (int j = 0; j < outputsarr.length; j++) {
double[] myoutput = m_mlp.output(samplesarr[j]);
for (int o = 0; o < outputsarr[0].length; o++) {
error += (myoutput[o] - outputsarr[j][o]) * (myoutput[o] - outputsarr[j][o]);
}
}
m_errors[iteration] = error;
exec.checkCanceled();
}
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[INMODEL] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec outPortSpec = createPMMLPortObjectSpec(inPMMLSpec, posSpec, learningCols, targetCols);
PMMLPortObject outPMMLPort = new PMMLPortObject(outPortSpec, inPMMLPort, posSpec);
outPMMLPort.addModelTranslater(new PMMLNeuralNetworkTranslator(m_mlp));
return new PortObject[] { outPMMLPort };
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class ClusterNodeModel method initializeClusters.
private double[][] initializeClusters(final DataTable input) {
// initialize matrix of double (nr clusters * input dimension)
double[][] clusters = new double[m_nrOfClusters.getIntValue()][];
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
clusters[c] = new double[m_dimension - m_nrIgnoredColumns];
}
// initialize cluster centers with values of first rows in table
RowIterator rowIt = input.iterator();
int c = 0;
while (rowIt.hasNext() && c < m_nrOfClusters.getIntValue()) {
DataRow currentRow = rowIt.next();
int pos = 0;
for (int i = 0; i < currentRow.getNumCells(); i++) {
if (!m_ignoreColumn[i]) {
if (currentRow.getCell(i).isMissing()) {
clusters[c][pos] = 0;
// missing value: replace with zero
} else {
assert currentRow.getCell(i).getType().isCompatible(DoubleValue.class);
DoubleValue currentValue = (DoubleValue) currentRow.getCell(i);
clusters[c][pos] = currentValue.getDoubleValue();
}
pos++;
}
}
c++;
}
return clusters;
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class ClusterNodeModel method execute.
/**
* Generate new clustering based on InputDataTable and specified number of
* clusters. Currently the objective function only looks for cluster centers
* that are extremely similar to the first n patterns...
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] data, final ExecutionContext exec) throws Exception {
// FIXME actually do something useful with missing values!
BufferedDataTable inData = (BufferedDataTable) data[0];
DataTableSpec spec = inData.getDataTableSpec();
// get dimension of feature space
m_dimension = inData.getDataTableSpec().getNumColumns();
HashMap<RowKey, Set<RowKey>> mapping = new HashMap<RowKey, Set<RowKey>>();
addExcludeColumnsToIgnoreList(spec);
double[][] clusters = initializeClusters(inData);
// also keep counts of how many patterns fall in a specific cluster
int[] clusterCoverage = new int[m_nrOfClusters.getIntValue()];
// --------- create clusters --------------
// reserve space for cluster center updates (do batch update!)
double[][] delta = new double[m_nrOfClusters.getIntValue()][];
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
delta[c] = new double[m_dimension - m_nrIgnoredColumns];
}
// main loop - until clusters stop changing or maxNrIterations reached
int currentIteration = 0;
boolean finished = false;
while ((!finished) && (currentIteration < m_nrMaxIterations.getIntValue())) {
exec.checkCanceled();
exec.setProgress((double) currentIteration / (double) m_nrMaxIterations.getIntValue(), "Iteration " + currentIteration);
// initialize counts and cluster-deltas
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
clusterCoverage[c] = 0;
delta[c] = new double[m_dimension - m_nrIgnoredColumns];
int deltaPos = 0;
for (int i = 0; i < m_dimension; i++) {
if (!m_ignoreColumn[i]) {
delta[c][deltaPos++] = 0.0;
}
}
}
// assume that we are done (i.e. clusters have stopped changing)
finished = true;
// first training example
RowIterator rowIt = inData.iterator();
while (rowIt.hasNext()) {
DataRow currentRow = rowIt.next();
int winner = findClosestPrototypeFor(currentRow, clusters);
if (winner >= 0) {
// update winning cluster centers delta
int deltaPos = 0;
for (int i = 0; i < m_dimension; i++) {
DataCell currentCell = currentRow.getCell(i);
if (!m_ignoreColumn[i]) {
if (!currentCell.isMissing()) {
delta[winner][deltaPos] += ((DoubleValue) (currentCell)).getDoubleValue();
} else {
throw new Exception("Missing Values not (yet) allowed in k-Means.");
}
deltaPos++;
}
}
clusterCoverage[winner]++;
} else {
// let's report this during
assert (winner >= 0);
// otherwise just don't reproduce result
throw new IllegalStateException("No winner found: " + winner);
}
}
// update cluster centers
finished = updateClusterCenters(clusterCoverage, clusters, delta);
currentIteration++;
}
// while(!finished & nrIt<maxNrIt)
// create list of feature names
// index of not-ignored columns
int k = 0;
// index of column
int j = 0;
String[] featureNames = new String[m_dimension];
do {
if (!m_ignoreColumn[j]) {
featureNames[k] = spec.getColumnSpec(j).getName();
k++;
}
j++;
} while (j < m_dimension);
// create output container and also mapping for HiLiteing
BufferedDataContainer labeledInput = exec.createDataContainer(createAppendedSpec(spec));
for (DataRow row : inData) {
int winner = findClosestPrototypeFor(row, clusters);
DataCell cell = new StringCell(CLUSTER + winner);
labeledInput.addRowToTable(new AppendedColumnRow(row, cell));
if (m_enableHilite.getBooleanValue()) {
RowKey key = new RowKey(CLUSTER + winner);
if (mapping.get(key) == null) {
Set<RowKey> set = new HashSet<RowKey>();
set.add(row.getKey());
mapping.put(key, set);
} else {
mapping.get(key).add(row.getKey());
}
}
}
labeledInput.close();
if (m_enableHilite.getBooleanValue()) {
m_translator.setMapper(new DefaultHiLiteMapper(mapping));
}
BufferedDataTable outData = labeledInput.getTable();
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) data[1] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec pmmlOutSpec = createPMMLSpec(inPMMLSpec, spec);
PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
Set<String> columns = new LinkedHashSet<String>();
for (String s : pmmlOutSpec.getLearningFields()) {
columns.add(s);
}
outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrOfClusters.getIntValue(), clusters, clusterCoverage, columns));
m_viewData = new ClusterViewData(clusters, clusterCoverage, m_dimension - m_nrIgnoredColumns, featureNames);
if (m_outputCenters) {
DataContainer clusterCenterContainer = exec.createDataContainer(createClusterCentersSpec(spec));
int i = 0;
for (double[] cluster : clusters) {
List<DataCell> cells = new ArrayList<>();
for (double d : cluster) {
cells.add(new DoubleCell(d));
}
clusterCenterContainer.addRowToTable(new DefaultRow(new RowKey(PMMLClusterTranslator.CLUSTER_NAME_PREFIX + i++), cells));
}
clusterCenterContainer.close();
return new PortObject[] { outData, (BufferedDataTable) clusterCenterContainer.getTable(), outPMMLPort };
} else {
return new PortObject[] { outData, outPMMLPort };
}
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class FCMAlgorithm method updateClusterCenters.
/*
* The update method for the cluster centers
*/
private void updateClusterCenters(final DataTable inData, final ExecutionContext exec) throws CanceledExecutionException {
double[] sumNumerator = new double[m_dimension];
double sumDenominator = 0;
double sumupdate = 0;
// for each cluster center
for (int c = 0; c < m_nrClusters; c++) {
if (m_noise && (c == m_nrClusters - 1)) {
// stop updating at noise cluster position.
break;
}
for (int j = 0; j < m_dimension; j++) {
sumNumerator[j] = 0;
}
sumDenominator = 0;
RowIterator ri = inData.iterator();
int i = 0;
while (ri.hasNext()) {
exec.checkCanceled();
DataRow dRow = ri.next();
// for all attributes in X
for (int j = 0; j < m_dimension; j++) {
if (!(dRow.getCell(j).isMissing())) {
DataCell dc = dRow.getCell(j);
if (dc instanceof DoubleValue) {
sumNumerator[j] += Math.pow(m_weightMatrix[i][c], m_fuzzifier) * ((DoubleValue) dc).getDoubleValue();
}
}
}
sumDenominator += Math.pow(m_weightMatrix[i][c], m_fuzzifier);
i++;
if (m_noise && m_calculateDelta) {
sumupdate += m_distance.compute(m_clusters[c], dRow);
}
}
// end while for all datarows sum up
for (int j = 0; j < m_dimension; j++) {
double newValue = sumNumerator[j] / sumDenominator;
m_totalChange += Math.abs(m_clusters[c][j] - newValue);
m_clusters[c][j] = newValue;
}
}
/*
* Update the delta-value automatically if choosen.
*/
if (m_noise && m_calculateDelta) {
m_delta = Math.sqrt(m_lambda * (sumupdate / (m_nrRows * (m_clusters.length - 1))));
}
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class FCMAlgorithm method updateWeightMatrix.
/*
* The update method for the weight matrix
*/
private void updateWeightMatrix(final DataTable inData, final ExecutionContext exec) throws CanceledExecutionException {
RowIterator ri = inData.iterator();
for (int currentRow = 0; currentRow < m_nrRows; currentRow++) {
DataRow dRow = ri.next();
exec.checkCanceled();
int i = 0;
// first check if the actual row is equal to a cluster center
int sameCluster = -1;
int nrClusters = (m_noise) ? m_clusters.length - 1 : m_clusters.length;
while ((sameCluster < 0) && (i < nrClusters)) {
for (int j = 0; j < dRow.getNumCells(); j++) {
if (!(dRow.getCell(j).isMissing())) {
DataCell currentCell = dRow.getCell(j);
if (currentCell instanceof DoubleValue) {
if (((DoubleValue) currentCell).getDoubleValue() == m_clusters[i][j]) {
sameCluster = i;
} else {
sameCluster = -1;
break;
}
}
}
}
i++;
}
/*
* The weight of a data point is 1 if it is exactly on the position
* of the cluster, in this case 0 for the others
*/
if (sameCluster >= 0) {
for (i = 0; i < m_weightMatrix[0].length; i++) {
if (i != sameCluster) {
m_weightMatrix[currentRow][i] = 0;
} else {
m_weightMatrix[currentRow][i] = 1;
}
}
} else {
// calculate the fuzzy membership to each cluster
for (int j = 0; j < m_clusters.length; j++) {
// for each cluster
double distNumerator = 0;
if (m_noise && j == m_clusters.length - 1) {
distNumerator = Math.pow(m_delta, 2.0);
} else {
distNumerator = getDistance(m_clusters[j], dRow);
}
double sum = 0;
for (int k = 0; k < m_clusters.length; k++) {
double distance = 0;
if (m_noise && k == m_clusters.length - 1) {
distance = Math.pow(m_delta, 2.0);
} else {
distance = getDistance(m_clusters[k], dRow);
}
sum += Math.pow((distNumerator / distance), (1.0 / (m_fuzzifier - 1.0)));
}
m_weightMatrix[currentRow][j] = 1 / sum;
}
}
}
}
Aggregations