use of org.knime.base.node.mine.cluster.PMMLClusterTranslator in project knime-core by knime.
the class ClusterAssignerNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final PMMLPortObject port, final DataTableSpec inSpec) throws InvalidSettingsException {
List<Node> models = port.getPMMLValue().getModels(PMMLModelType.ClusteringModel);
if (models.isEmpty()) {
String msg = "No Clustering Model found.";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
PMMLClusterTranslator trans = new PMMLClusterTranslator();
port.initializeModelTranslator(trans);
ComparisonMeasure measure = trans.getComparisonMeasure();
List<Prototype> prototypes = new ArrayList<Prototype>();
String[] labels = trans.getLabels();
double[][] protos = trans.getPrototypes();
for (int i = 0; i < protos.length; i++) {
double[] prototype = protos[i];
prototypes.add(new Prototype(prototype, new StringCell(labels[i])));
}
ColumnRearranger colre = new ColumnRearranger(inSpec);
colre.append(new ClusterAssignFactory(measure, prototypes, createNewOutSpec(inSpec), findLearnedColumnIndices(inSpec, trans.getUsedColumns())));
return colre;
}
use of org.knime.base.node.mine.cluster.PMMLClusterTranslator in project knime-core by knime.
the class FuzzyClusterNodeModel method execute.
/**
* Generate new clustering based on InputDataTable and specified number of
* clusters. In the output table, you will find the datarow with
* supplementary information about the membership to each cluster center.
* OUTPORT = original datarows with cluster membership information
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable indata = (BufferedDataTable) inData[0];
m_clusters = null;
m_betweenClusterVariation = Double.NaN;
m_withinClusterVariation = null;
if (m_noise) {
if (m_calculateDelta) {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
}
} else {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
}
}
} else {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier);
}
}
int nrRows = indata.getRowCount();
DataTableSpec spec = indata.getDataTableSpec();
int nrCols = spec.getNumColumns();
List<String> learningCols = new LinkedList<String>();
List<String> ignoreCols = new LinkedList<String>();
// counter for included columns
int z = 0;
final int[] columns = new int[m_list.size()];
for (int i = 0; i < nrCols; i++) {
// if include does contain current column name
String colname = spec.getColumnSpec(i).getName();
if (m_list.contains(colname)) {
columns[z] = i;
z++;
learningCols.add(colname);
} else {
ignoreCols.add(colname);
}
}
ColumnRearranger colre = new ColumnRearranger(spec);
colre.keepOnly(columns);
BufferedDataTable filteredtable = exec.createColumnRearrangeTable(indata, colre, exec);
// get dimension of feature space
int dimension = filteredtable.getDataTableSpec().getNumColumns();
Random random = new Random();
if (m_useRandomSeed) {
random.setSeed(m_randomSeed);
}
m_fcmAlgo.init(nrRows, dimension, filteredtable, random);
// main loop - until clusters stop changing or maxNrIterations reached
int currentIteration = 0;
double totalchange = Double.MAX_VALUE;
while ((totalchange > 1e-7) && (currentIteration < m_maxNrIterations)) {
exec.checkCanceled();
exec.setProgress((double) currentIteration / (double) m_maxNrIterations, "Iteration " + currentIteration + " Total change of prototypes: " + totalchange);
totalchange = m_fcmAlgo.doOneIteration(exec);
currentIteration++;
}
if (m_measures) {
double[][] data = null;
if (m_fcmAlgo instanceof FCMAlgorithmMemory) {
data = ((FCMAlgorithmMemory) m_fcmAlgo).getConvertedData();
} else {
data = new double[nrRows][m_fcmAlgo.getDimension()];
int curRow = 0;
for (DataRow dRow : filteredtable) {
for (int j = 0; j < dRow.getNumCells(); j++) {
if (!(dRow.getCell(j).isMissing())) {
DoubleValue dv = (DoubleValue) dRow.getCell(j);
data[curRow][j] = dv.getDoubleValue();
} else {
data[curRow][j] = 0;
}
}
curRow++;
}
}
m_fcmmeasures = new FCMQualityMeasures(m_fcmAlgo.getClusterCentres(), m_fcmAlgo.getweightMatrix(), data, m_fuzzifier);
}
ColumnRearranger colRearranger = new ColumnRearranger(spec);
CellFactory membershipFac = new ClusterMembershipFactory(m_fcmAlgo);
colRearranger.append(membershipFac);
BufferedDataTable result = exec.createColumnRearrangeTable(indata, colRearranger, exec);
// don't write out the noise cluster!
double[][] clustercentres = m_fcmAlgo.getClusterCentres();
if (m_noise) {
double[][] cleaned = new double[clustercentres.length - 1][];
for (int i = 0; i < cleaned.length; i++) {
cleaned[i] = new double[clustercentres[i].length];
System.arraycopy(clustercentres[i], 0, cleaned[i], 0, clustercentres[i].length);
}
clustercentres = cleaned;
}
exec.setMessage("Creating PMML cluster model...");
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_enablePMMLInput ? (PMMLPortObject) inData[1] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec pmmlOutSpec = createPMMLPortObjectSpec(inPMMLSpec, spec, learningCols);
PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrClusters, clustercentres, null, new LinkedHashSet<String>(pmmlOutSpec.getLearningFields())));
return new PortObject[] { result, outPMMLPort };
}
use of org.knime.base.node.mine.cluster.PMMLClusterTranslator in project knime-core by knime.
the class ClusterNodeModel method execute.
/**
* Generate new clustering based on InputDataTable and specified number of
* clusters. Currently the objective function only looks for cluster centers
* that are extremely similar to the first n patterns...
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] data, final ExecutionContext exec) throws Exception {
// FIXME actually do something useful with missing values!
BufferedDataTable inData = (BufferedDataTable) data[0];
DataTableSpec spec = inData.getDataTableSpec();
// get dimension of feature space
m_dimension = inData.getDataTableSpec().getNumColumns();
HashMap<RowKey, Set<RowKey>> mapping = new HashMap<RowKey, Set<RowKey>>();
addExcludeColumnsToIgnoreList(spec);
double[][] clusters = initializeClusters(inData);
// also keep counts of how many patterns fall in a specific cluster
int[] clusterCoverage = new int[m_nrOfClusters.getIntValue()];
// --------- create clusters --------------
// reserve space for cluster center updates (do batch update!)
double[][] delta = new double[m_nrOfClusters.getIntValue()][];
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
delta[c] = new double[m_dimension - m_nrIgnoredColumns];
}
// main loop - until clusters stop changing or maxNrIterations reached
int currentIteration = 0;
boolean finished = false;
while ((!finished) && (currentIteration < m_nrMaxIterations.getIntValue())) {
exec.checkCanceled();
exec.setProgress((double) currentIteration / (double) m_nrMaxIterations.getIntValue(), "Iteration " + currentIteration);
// initialize counts and cluster-deltas
for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
clusterCoverage[c] = 0;
delta[c] = new double[m_dimension - m_nrIgnoredColumns];
int deltaPos = 0;
for (int i = 0; i < m_dimension; i++) {
if (!m_ignoreColumn[i]) {
delta[c][deltaPos++] = 0.0;
}
}
}
// assume that we are done (i.e. clusters have stopped changing)
finished = true;
// first training example
RowIterator rowIt = inData.iterator();
while (rowIt.hasNext()) {
DataRow currentRow = rowIt.next();
int winner = findClosestPrototypeFor(currentRow, clusters);
if (winner >= 0) {
// update winning cluster centers delta
int deltaPos = 0;
for (int i = 0; i < m_dimension; i++) {
DataCell currentCell = currentRow.getCell(i);
if (!m_ignoreColumn[i]) {
if (!currentCell.isMissing()) {
delta[winner][deltaPos] += ((DoubleValue) (currentCell)).getDoubleValue();
} else {
throw new Exception("Missing Values not (yet) allowed in k-Means.");
}
deltaPos++;
}
}
clusterCoverage[winner]++;
} else {
// let's report this during
assert (winner >= 0);
// otherwise just don't reproduce result
throw new IllegalStateException("No winner found: " + winner);
}
}
// update cluster centers
finished = updateClusterCenters(clusterCoverage, clusters, delta);
currentIteration++;
}
// while(!finished & nrIt<maxNrIt)
// create list of feature names
// index of not-ignored columns
int k = 0;
// index of column
int j = 0;
String[] featureNames = new String[m_dimension];
do {
if (!m_ignoreColumn[j]) {
featureNames[k] = spec.getColumnSpec(j).getName();
k++;
}
j++;
} while (j < m_dimension);
// create output container and also mapping for HiLiteing
BufferedDataContainer labeledInput = exec.createDataContainer(createAppendedSpec(spec));
for (DataRow row : inData) {
int winner = findClosestPrototypeFor(row, clusters);
DataCell cell = new StringCell(CLUSTER + winner);
labeledInput.addRowToTable(new AppendedColumnRow(row, cell));
if (m_enableHilite.getBooleanValue()) {
RowKey key = new RowKey(CLUSTER + winner);
if (mapping.get(key) == null) {
Set<RowKey> set = new HashSet<RowKey>();
set.add(row.getKey());
mapping.put(key, set);
} else {
mapping.get(key).add(row.getKey());
}
}
}
labeledInput.close();
if (m_enableHilite.getBooleanValue()) {
m_translator.setMapper(new DefaultHiLiteMapper(mapping));
}
BufferedDataTable outData = labeledInput.getTable();
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) data[1] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec pmmlOutSpec = createPMMLSpec(inPMMLSpec, spec);
PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
Set<String> columns = new LinkedHashSet<String>();
for (String s : pmmlOutSpec.getLearningFields()) {
columns.add(s);
}
outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrOfClusters.getIntValue(), clusters, clusterCoverage, columns));
m_viewData = new ClusterViewData(clusters, clusterCoverage, m_dimension - m_nrIgnoredColumns, featureNames);
if (m_outputCenters) {
DataContainer clusterCenterContainer = exec.createDataContainer(createClusterCentersSpec(spec));
int i = 0;
for (double[] cluster : clusters) {
List<DataCell> cells = new ArrayList<>();
for (double d : cluster) {
cells.add(new DoubleCell(d));
}
clusterCenterContainer.addRowToTable(new DefaultRow(new RowKey(PMMLClusterTranslator.CLUSTER_NAME_PREFIX + i++), cells));
}
clusterCenterContainer.close();
return new PortObject[] { outData, (BufferedDataTable) clusterCenterContainer.getTable(), outPMMLPort };
} else {
return new PortObject[] { outData, outPMMLPort };
}
}
Aggregations