Search in sources :

Example 1 with PMMLClusterTranslator

use of org.knime.base.node.mine.cluster.PMMLClusterTranslator in project knime-core by knime.

the class ClusterAssignerNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final PMMLPortObject port, final DataTableSpec inSpec) throws InvalidSettingsException {
    List<Node> models = port.getPMMLValue().getModels(PMMLModelType.ClusteringModel);
    if (models.isEmpty()) {
        String msg = "No Clustering Model found.";
        LOGGER.error(msg);
        throw new RuntimeException(msg);
    }
    PMMLClusterTranslator trans = new PMMLClusterTranslator();
    port.initializeModelTranslator(trans);
    ComparisonMeasure measure = trans.getComparisonMeasure();
    List<Prototype> prototypes = new ArrayList<Prototype>();
    String[] labels = trans.getLabels();
    double[][] protos = trans.getPrototypes();
    for (int i = 0; i < protos.length; i++) {
        double[] prototype = protos[i];
        prototypes.add(new Prototype(prototype, new StringCell(labels[i])));
    }
    ColumnRearranger colre = new ColumnRearranger(inSpec);
    colre.append(new ClusterAssignFactory(measure, prototypes, createNewOutSpec(inSpec), findLearnedColumnIndices(inSpec, trans.getUsedColumns())));
    return colre;
}
Also used : Node(org.w3c.dom.Node) ArrayList(java.util.ArrayList) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) PMMLClusterTranslator(org.knime.base.node.mine.cluster.PMMLClusterTranslator) StringCell(org.knime.core.data.def.StringCell) ComparisonMeasure(org.knime.base.node.mine.cluster.PMMLClusterTranslator.ComparisonMeasure)

Example 2 with PMMLClusterTranslator

use of org.knime.base.node.mine.cluster.PMMLClusterTranslator in project knime-core by knime.

the class FuzzyClusterNodeModel method execute.

/**
 * Generate new clustering based on InputDataTable and specified number of
 * clusters. In the output table, you will find the datarow with
 * supplementary information about the membership to each cluster center.
 * OUTPORT = original datarows with cluster membership information
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable indata = (BufferedDataTable) inData[0];
    m_clusters = null;
    m_betweenClusterVariation = Double.NaN;
    m_withinClusterVariation = null;
    if (m_noise) {
        if (m_calculateDelta) {
            if (m_memory) {
                m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
            } else {
                m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
            }
        } else {
            if (m_memory) {
                m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
            } else {
                m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
            }
        }
    } else {
        if (m_memory) {
            m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier);
        } else {
            m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier);
        }
    }
    int nrRows = indata.getRowCount();
    DataTableSpec spec = indata.getDataTableSpec();
    int nrCols = spec.getNumColumns();
    List<String> learningCols = new LinkedList<String>();
    List<String> ignoreCols = new LinkedList<String>();
    // counter for included columns
    int z = 0;
    final int[] columns = new int[m_list.size()];
    for (int i = 0; i < nrCols; i++) {
        // if include does contain current column name
        String colname = spec.getColumnSpec(i).getName();
        if (m_list.contains(colname)) {
            columns[z] = i;
            z++;
            learningCols.add(colname);
        } else {
            ignoreCols.add(colname);
        }
    }
    ColumnRearranger colre = new ColumnRearranger(spec);
    colre.keepOnly(columns);
    BufferedDataTable filteredtable = exec.createColumnRearrangeTable(indata, colre, exec);
    // get dimension of feature space
    int dimension = filteredtable.getDataTableSpec().getNumColumns();
    Random random = new Random();
    if (m_useRandomSeed) {
        random.setSeed(m_randomSeed);
    }
    m_fcmAlgo.init(nrRows, dimension, filteredtable, random);
    // main loop - until clusters stop changing or maxNrIterations reached
    int currentIteration = 0;
    double totalchange = Double.MAX_VALUE;
    while ((totalchange > 1e-7) && (currentIteration < m_maxNrIterations)) {
        exec.checkCanceled();
        exec.setProgress((double) currentIteration / (double) m_maxNrIterations, "Iteration " + currentIteration + " Total change of prototypes: " + totalchange);
        totalchange = m_fcmAlgo.doOneIteration(exec);
        currentIteration++;
    }
    if (m_measures) {
        double[][] data = null;
        if (m_fcmAlgo instanceof FCMAlgorithmMemory) {
            data = ((FCMAlgorithmMemory) m_fcmAlgo).getConvertedData();
        } else {
            data = new double[nrRows][m_fcmAlgo.getDimension()];
            int curRow = 0;
            for (DataRow dRow : filteredtable) {
                for (int j = 0; j < dRow.getNumCells(); j++) {
                    if (!(dRow.getCell(j).isMissing())) {
                        DoubleValue dv = (DoubleValue) dRow.getCell(j);
                        data[curRow][j] = dv.getDoubleValue();
                    } else {
                        data[curRow][j] = 0;
                    }
                }
                curRow++;
            }
        }
        m_fcmmeasures = new FCMQualityMeasures(m_fcmAlgo.getClusterCentres(), m_fcmAlgo.getweightMatrix(), data, m_fuzzifier);
    }
    ColumnRearranger colRearranger = new ColumnRearranger(spec);
    CellFactory membershipFac = new ClusterMembershipFactory(m_fcmAlgo);
    colRearranger.append(membershipFac);
    BufferedDataTable result = exec.createColumnRearrangeTable(indata, colRearranger, exec);
    // don't write out the noise cluster!
    double[][] clustercentres = m_fcmAlgo.getClusterCentres();
    if (m_noise) {
        double[][] cleaned = new double[clustercentres.length - 1][];
        for (int i = 0; i < cleaned.length; i++) {
            cleaned[i] = new double[clustercentres[i].length];
            System.arraycopy(clustercentres[i], 0, cleaned[i], 0, clustercentres[i].length);
        }
        clustercentres = cleaned;
    }
    exec.setMessage("Creating PMML cluster model...");
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_enablePMMLInput ? (PMMLPortObject) inData[1] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec pmmlOutSpec = createPMMLPortObjectSpec(inPMMLSpec, spec, learningCols);
    PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
    outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrClusters, clustercentres, null, new LinkedHashSet<String>(pmmlOutSpec.getLearningFields())));
    return new PortObject[] { result, outPMMLPort };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataRow(org.knime.core.data.DataRow) LinkedList(java.util.LinkedList) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) Random(java.util.Random) PMMLClusterTranslator(org.knime.base.node.mine.cluster.PMMLClusterTranslator) DoubleValue(org.knime.core.data.DoubleValue) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) CellFactory(org.knime.core.data.container.CellFactory) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)

Example 3 with PMMLClusterTranslator

use of org.knime.base.node.mine.cluster.PMMLClusterTranslator in project knime-core by knime.

the class ClusterNodeModel method execute.

/**
 * Generate new clustering based on InputDataTable and specified number of
 * clusters. Currently the objective function only looks for cluster centers
 * that are extremely similar to the first n patterns...
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] data, final ExecutionContext exec) throws Exception {
    // FIXME actually do something useful with missing values!
    BufferedDataTable inData = (BufferedDataTable) data[0];
    DataTableSpec spec = inData.getDataTableSpec();
    // get dimension of feature space
    m_dimension = inData.getDataTableSpec().getNumColumns();
    HashMap<RowKey, Set<RowKey>> mapping = new HashMap<RowKey, Set<RowKey>>();
    addExcludeColumnsToIgnoreList(spec);
    double[][] clusters = initializeClusters(inData);
    // also keep counts of how many patterns fall in a specific cluster
    int[] clusterCoverage = new int[m_nrOfClusters.getIntValue()];
    // --------- create clusters --------------
    // reserve space for cluster center updates (do batch update!)
    double[][] delta = new double[m_nrOfClusters.getIntValue()][];
    for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
        delta[c] = new double[m_dimension - m_nrIgnoredColumns];
    }
    // main loop - until clusters stop changing or maxNrIterations reached
    int currentIteration = 0;
    boolean finished = false;
    while ((!finished) && (currentIteration < m_nrMaxIterations.getIntValue())) {
        exec.checkCanceled();
        exec.setProgress((double) currentIteration / (double) m_nrMaxIterations.getIntValue(), "Iteration " + currentIteration);
        // initialize counts and cluster-deltas
        for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
            clusterCoverage[c] = 0;
            delta[c] = new double[m_dimension - m_nrIgnoredColumns];
            int deltaPos = 0;
            for (int i = 0; i < m_dimension; i++) {
                if (!m_ignoreColumn[i]) {
                    delta[c][deltaPos++] = 0.0;
                }
            }
        }
        // assume that we are done (i.e. clusters have stopped changing)
        finished = true;
        // first training example
        RowIterator rowIt = inData.iterator();
        while (rowIt.hasNext()) {
            DataRow currentRow = rowIt.next();
            int winner = findClosestPrototypeFor(currentRow, clusters);
            if (winner >= 0) {
                // update winning cluster centers delta
                int deltaPos = 0;
                for (int i = 0; i < m_dimension; i++) {
                    DataCell currentCell = currentRow.getCell(i);
                    if (!m_ignoreColumn[i]) {
                        if (!currentCell.isMissing()) {
                            delta[winner][deltaPos] += ((DoubleValue) (currentCell)).getDoubleValue();
                        } else {
                            throw new Exception("Missing Values not (yet) allowed in k-Means.");
                        }
                        deltaPos++;
                    }
                }
                clusterCoverage[winner]++;
            } else {
                // let's report this during
                assert (winner >= 0);
                // otherwise just don't reproduce result
                throw new IllegalStateException("No winner found: " + winner);
            }
        }
        // update cluster centers
        finished = updateClusterCenters(clusterCoverage, clusters, delta);
        currentIteration++;
    }
    // while(!finished & nrIt<maxNrIt)
    // create list of feature names
    // index of not-ignored columns
    int k = 0;
    // index of column
    int j = 0;
    String[] featureNames = new String[m_dimension];
    do {
        if (!m_ignoreColumn[j]) {
            featureNames[k] = spec.getColumnSpec(j).getName();
            k++;
        }
        j++;
    } while (j < m_dimension);
    // create output container and also mapping for HiLiteing
    BufferedDataContainer labeledInput = exec.createDataContainer(createAppendedSpec(spec));
    for (DataRow row : inData) {
        int winner = findClosestPrototypeFor(row, clusters);
        DataCell cell = new StringCell(CLUSTER + winner);
        labeledInput.addRowToTable(new AppendedColumnRow(row, cell));
        if (m_enableHilite.getBooleanValue()) {
            RowKey key = new RowKey(CLUSTER + winner);
            if (mapping.get(key) == null) {
                Set<RowKey> set = new HashSet<RowKey>();
                set.add(row.getKey());
                mapping.put(key, set);
            } else {
                mapping.get(key).add(row.getKey());
            }
        }
    }
    labeledInput.close();
    if (m_enableHilite.getBooleanValue()) {
        m_translator.setMapper(new DefaultHiLiteMapper(mapping));
    }
    BufferedDataTable outData = labeledInput.getTable();
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) data[1] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec pmmlOutSpec = createPMMLSpec(inPMMLSpec, spec);
    PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
    Set<String> columns = new LinkedHashSet<String>();
    for (String s : pmmlOutSpec.getLearningFields()) {
        columns.add(s);
    }
    outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrOfClusters.getIntValue(), clusters, clusterCoverage, columns));
    m_viewData = new ClusterViewData(clusters, clusterCoverage, m_dimension - m_nrIgnoredColumns, featureNames);
    if (m_outputCenters) {
        DataContainer clusterCenterContainer = exec.createDataContainer(createClusterCentersSpec(spec));
        int i = 0;
        for (double[] cluster : clusters) {
            List<DataCell> cells = new ArrayList<>();
            for (double d : cluster) {
                cells.add(new DoubleCell(d));
            }
            clusterCenterContainer.addRowToTable(new DefaultRow(new RowKey(PMMLClusterTranslator.CLUSTER_NAME_PREFIX + i++), cells));
        }
        clusterCenterContainer.close();
        return new PortObject[] { outData, (BufferedDataTable) clusterCenterContainer.getTable(), outPMMLPort };
    } else {
        return new PortObject[] { outData, outPMMLPort };
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) PMMLClusterTranslator(org.knime.base.node.mine.cluster.PMMLClusterTranslator) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) AppendedColumnRow(org.knime.core.data.append.AppendedColumnRow) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) StringCell(org.knime.core.data.def.StringCell) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

PMMLClusterTranslator (org.knime.base.node.mine.cluster.PMMLClusterTranslator)3 ArrayList (java.util.ArrayList)2 LinkedHashSet (java.util.LinkedHashSet)2 DataRow (org.knime.core.data.DataRow)2 DataTableSpec (org.knime.core.data.DataTableSpec)2 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)2 StringCell (org.knime.core.data.def.StringCell)2 BufferedDataTable (org.knime.core.node.BufferedDataTable)2 PortObject (org.knime.core.node.port.PortObject)2 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)2 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 Random (java.util.Random)1 Set (java.util.Set)1 ComparisonMeasure (org.knime.base.node.mine.cluster.PMMLClusterTranslator.ComparisonMeasure)1 DataCell (org.knime.core.data.DataCell)1 DoubleValue (org.knime.core.data.DoubleValue)1