Search in sources :

Example 21 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class BoxPlotNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    if (inData[0] == null) {
        return new BufferedDataTable[] {};
    }
    BufferedDataTable table = inData[0];
    m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
    m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    int colIdx = 0;
    List<DataColumnSpec> outputColSpecs = new ArrayList<DataColumnSpec>();
    double subProgress = 1.0 / getNumNumericColumns(table.getDataTableSpec());
    for (DataColumnSpec colSpec : table.getDataTableSpec()) {
        ExecutionContext colExec = exec.createSubExecutionContext(subProgress);
        exec.checkCanceled();
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            double[] statistic = new double[SIZE];
            outputColSpecs.add(colSpec);
            List<String> col = new ArrayList<String>();
            col.add(colSpec.getName());
            ExecutionContext sortExec = colExec.createSubExecutionContext(0.75);
            ExecutionContext findExec = colExec.createSubExecutionContext(0.25);
            SortedTable sorted = new SortedTable(table, col, new boolean[] { true }, sortExec);
            long currRowAbsolute = 0;
            int currCountingRow = 1;
            double lastValue = 1;
            long nrOfRows = table.size();
            boolean first = true;
            for (DataRow row : sorted) {
                exec.checkCanceled();
                double rowProgress = currRowAbsolute / (double) table.size();
                findExec.setProgress(rowProgress, "determining statistics for: " + table.getDataTableSpec().getColumnSpec(colIdx).getName());
                if (row.getCell(colIdx).isMissing()) {
                    // asserts that the missing values are sorted at
                    // the top of the table
                    currRowAbsolute++;
                    nrOfRows--;
                    continue;
                }
                // get the first value = actually observed minimum
                if (first) {
                    statistic[MIN] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    // initialize the statistics with first value
                    // if the table is large enough it will be overriden
                    // this is just for the case of tables with < 5 rows
                    statistic[MEDIAN] = statistic[MIN];
                    statistic[LOWER_QUARTILE] = statistic[MIN];
                    statistic[UPPER_QUARTILE] = statistic[MIN];
                    first = false;
                }
                // get the last value = actually observed maximum
                if (currRowAbsolute == table.size() - 1) {
                    statistic[MAX] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                }
                float medianPos = nrOfRows * 0.5f;
                float lowerQuartilePos = nrOfRows * 0.25f;
                float upperQuartilePos = nrOfRows * 0.75f;
                if (currCountingRow == (int) Math.floor(lowerQuartilePos) + 1) {
                    if (lowerQuartilePos % 1 != 0) {
                        // get the row's value
                        statistic[LOWER_QUARTILE] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[LOWER_QUARTILE] = (value + lastValue) / 2;
                    }
                }
                if (currCountingRow == (int) Math.floor(medianPos) + 1) {
                    if (medianPos % 1 != 0) {
                        // get the row's value
                        statistic[MEDIAN] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[MEDIAN] = (value + lastValue) / 2;
                    }
                }
                if (currCountingRow == (int) Math.floor(upperQuartilePos) + 1) {
                    if (upperQuartilePos % 1 != 0) {
                        // get the row's value
                        statistic[UPPER_QUARTILE] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[UPPER_QUARTILE] = (value + lastValue) / 2;
                    }
                }
                lastValue = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                currRowAbsolute++;
                currCountingRow++;
            }
            double iqr = statistic[UPPER_QUARTILE] - statistic[LOWER_QUARTILE];
            Map<Double, Set<RowKey>> mild = new LinkedHashMap<Double, Set<RowKey>>();
            Map<Double, Set<RowKey>> extreme = new LinkedHashMap<Double, Set<RowKey>>();
            // per default the whiskers are at min and max
            double[] whiskers = new double[] { statistic[MIN], statistic[MAX] };
            if (statistic[MIN] < (statistic[LOWER_QUARTILE] - (1.5 * iqr)) || statistic[MAX] > statistic[UPPER_QUARTILE] + (1.5 * iqr)) {
                detectOutliers(sorted, iqr, new double[] { statistic[LOWER_QUARTILE], statistic[UPPER_QUARTILE] }, mild, extreme, whiskers, colIdx);
            }
            statistic[LOWER_WHISKER] = whiskers[0];
            statistic[UPPER_WHISKER] = whiskers[1];
            m_mildOutliers.put(colSpec.getName(), mild);
            m_extremeOutliers.put(colSpec.getName(), extreme);
            m_statistics.put(colSpec, statistic);
        }
        colIdx++;
    }
    DataContainer container = createOutputTable(exec, outputColSpecs);
    // return a data array with just one row but with the data table spec
    // for the column selection panel
    m_array = new DefaultDataArray(table, 1, 2);
    return new BufferedDataTable[] { exec.createBufferedDataTable(container.getTable(), exec) };
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionContext(org.knime.core.node.ExecutionContext) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 22 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class BoxPlotNodeModel method createOutputTable.

private DataContainer createOutputTable(final ExecutionContext exec, final List<DataColumnSpec> outputColSpecs) {
    DataTableSpec outSpec = createOutputSpec(outputColSpecs);
    DataContainer container = exec.createDataContainer(outSpec);
    String[] rowKeys = new String[SIZE];
    rowKeys[MIN] = "Minimum";
    rowKeys[LOWER_WHISKER] = "Smallest";
    rowKeys[LOWER_QUARTILE] = "Lower Quartile";
    rowKeys[MEDIAN] = "Median";
    rowKeys[UPPER_QUARTILE] = "Upper Quartile";
    rowKeys[UPPER_WHISKER] = "Largest";
    rowKeys[MAX] = "Maximum";
    for (int i = 0; i < SIZE; i++) {
        DataCell[] cells = new DataCell[outputColSpecs.size()];
        for (int j = 0; j < cells.length; j++) {
            double[] stats = m_statistics.get(outputColSpecs.get(j));
            cells[j] = new DoubleCell(stats[i]);
        }
        DataRow row = new DefaultRow(rowKeys[i], cells);
        container.addRowToTable(row);
    }
    container.close();
    return container;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataContainer(org.knime.core.data.container.DataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) DataRow(org.knime.core.data.DataRow)

Example 23 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class BoxplotCalculator method calculateMultipleConditional.

/**
 * Calculates statistics for a conditional box plot.
 * @param table the data table
 * @param catCol the column with the category values
 * @param numCol the numeric column
 * @param exec an execution context
 * @return A linked hash map with BoxplotStatistics for each category
 * @throws CanceledExecutionException when the user cancels the execution
 * @throws InvalidSettingsException when the category column has no domain values
 */
public LinkedHashMap<String, LinkedHashMap<String, BoxplotStatistics>> calculateMultipleConditional(final BufferedDataTable table, final String catCol, final String[] numCol, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    DataTableSpec spec = table.getSpec();
    int catColIdx = spec.findColumnIndex(catCol);
    int[] numColIdxs = new int[numCol.length];
    for (int i = 0; i < numCol.length; i++) {
        numColIdxs[i] = spec.findColumnIndex(numCol[i]);
    }
    Set<DataCell> valuesSet = spec.getColumnSpec(catColIdx).getDomain().getValues();
    if (valuesSet == null) {
        throw new InvalidSettingsException("Selected category column has no domain values");
    }
    ArrayList<DataCell> vals = new ArrayList<>(valuesSet);
    Collections.sort(vals, new Comparator<DataCell>() {

        @Override
        public int compare(final DataCell o1, final DataCell o2) {
            return o1.toString().compareTo(o2.toString());
        }
    });
    // add Missing values class as it is never in specification
    vals.add(new MissingCell(null));
    // we need to have clear names, otherwise Missing values class will be taken as "?"
    ArrayList<String> catNames = new ArrayList<>(vals.size());
    for (DataCell cell : vals) {
        catNames.add(cell.isMissing() ? MISSING_VALUES_CLASS : cell.toString());
    }
    LinkedHashMap<String, LinkedHashMap<String, DataContainer>> containers = new LinkedHashMap<>();
    m_ignoredMissVals = new LinkedHashMap<>();
    for (int i = 0; i < numCol.length; i++) {
        LinkedHashMap<String, DataContainer> map = new LinkedHashMap<>();
        LinkedHashMap<String, Long> missValMap = new LinkedHashMap<>();
        for (DataCell c : vals) {
            String name = c.isMissing() ? MISSING_VALUES_CLASS : c.toString();
            map.put(name, exec.createDataContainer(new DataTableSpec(new String[] { "col" }, new DataType[] { DoubleCell.TYPE })));
            missValMap.put(name, 0L);
        }
        containers.put(numCol[i], map);
        m_ignoredMissVals.put(numCol[i], missValMap);
    }
    ExecutionContext subExec = exec.createSubExecutionContext(0.7);
    // long[][] ignoredMissVals = new long[numCol.length][vals.size()];  // count missing values per data col per class
    long count = 0;
    final long numOfRows = table.size();
    for (DataRow row : table) {
        exec.checkCanceled();
        subExec.setProgress(count++ / (double) numOfRows);
        DataCell catCell = row.getCell(catColIdx);
        String catName = catCell.isMissing() ? MISSING_VALUES_CLASS : catCell.toString();
        for (int i = 0; i < numCol.length; i++) {
            DataCell cell = row.getCell(numColIdxs[i]);
            if (!cell.isMissing()) {
                containers.get(numCol[i]).get(catName).addRowToTable(new DefaultRow(row.getKey(), cell));
            } else {
                // increment missing values
                LinkedHashMap<String, Long> missValMap = m_ignoredMissVals.get(numCol[i]);
                missValMap.replace(catName, missValMap.get(catName) + 1);
            }
        }
    }
    LinkedHashMap<String, LinkedHashMap<String, BoxplotStatistics>> statsMap = new LinkedHashMap<>();
    excludedClasses = new LinkedHashMap<>();
    List<String> colList = Arrays.asList(numCol);
    ExecutionContext subExec2 = exec.createSubExecutionContext(1.0);
    int count2 = 0;
    for (Entry<String, LinkedHashMap<String, DataContainer>> entry : containers.entrySet()) {
        exec.checkCanceled();
        subExec2.setProgress(count2++ / (double) containers.size());
        LinkedHashMap<String, DataContainer> containers2 = entry.getValue();
        LinkedHashMap<String, BoxplotStatistics> colStats = new LinkedHashMap<String, BoxplotStatistics>();
        String colName = entry.getKey();
        List<String> excludedColClassesList = new ArrayList<>();
        LinkedHashMap<String, Long> ignoredColMissVals = new LinkedHashMap<>();
        for (Entry<String, DataContainer> entry2 : containers2.entrySet()) {
            Set<Outlier> extremeOutliers = new HashSet<Outlier>();
            Set<Outlier> mildOutliers = new HashSet<Outlier>();
            entry2.getValue().close();
            String catName = entry2.getKey();
            BufferedDataTable catTable = (BufferedDataTable) entry2.getValue().getTable();
            LinkedHashMap<String, Long> missValMap = m_ignoredMissVals.get(colName);
            if (catTable.size() == 0) {
                if (!(catName.equals(MISSING_VALUES_CLASS) && missValMap.get(catName) == 0)) {
                    // we should add missing values to this list, only if they were there
                    excludedColClassesList.add(catName);
                }
                missValMap.remove(catName);
                continue;
            } else {
                if (missValMap.get(catName) == 0) {
                    missValMap.remove(catName);
                }
            }
            SortedTable st = new SortedTable(catTable, new Comparator<DataRow>() {

                @Override
                public int compare(final DataRow o1, final DataRow o2) {
                    double d1 = ((DoubleValue) o1.getCell(0)).getDoubleValue();
                    double d2 = ((DoubleValue) o2.getCell(0)).getDoubleValue();
                    if (d1 == d2) {
                        return 0;
                    } else {
                        return d1 < d2 ? -1 : 1;
                    }
                }
            }, false, exec);
            double min = 0, max = 0, q1 = 0, q3 = 0, median = 0;
            boolean dq1 = catTable.size() % 4 == 0;
            long q1Idx = catTable.size() / 4;
            boolean dq3 = 3 * catTable.size() % 4 == 0;
            long q3Idx = 3 * catTable.size() / 4;
            boolean dMedian = catTable.size() % 2 == 0;
            long medianIdx = catTable.size() / 2;
            int counter = 0;
            for (DataRow row : st) {
                double val = ((DoubleValue) row.getCell(0)).getDoubleValue();
                if (counter == 0) {
                    min = val;
                }
                if (counter == catTable.size() - 1) {
                    max = val;
                }
                if (counter == q1Idx - 1 && dq1) {
                    q1 = val;
                }
                if (counter == q1Idx || (counter == 0 && st.size() <= 3)) {
                    if (dq1) {
                        q1 = (q1 + val) / 2.0;
                    } else {
                        q1 = val;
                    }
                }
                if (counter == medianIdx - 1 && dMedian) {
                    median = val;
                }
                if (counter == medianIdx) {
                    if (dMedian) {
                        median = (median + val) / 2;
                    } else {
                        median = val;
                    }
                }
                if (counter == q3Idx - 1 && dq3) {
                    q3 = val;
                }
                if (counter == q3Idx || (counter == st.size() - 1 && st.size() <= 3)) {
                    if (dq3) {
                        q3 = (q3 + val) / 2.0;
                    } else {
                        q3 = val;
                    }
                }
                counter++;
            }
            double iqr = q3 - q1;
            double lowerWhisker = min;
            double upperWhisker = max;
            double upperWhiskerFence = q3 + (1.5 * iqr);
            double lowerWhiskerFence = q1 - (1.5 * iqr);
            double lowerFence = q1 - (3 * iqr);
            double upperFence = q3 + (3 * iqr);
            for (DataRow row : st) {
                double value = ((DoubleValue) row.getCell(0)).getDoubleValue();
                String rowKey = row.getKey().getString();
                if (value < lowerFence) {
                    extremeOutliers.add(new Outlier(value, rowKey));
                } else if (value < lowerWhiskerFence) {
                    mildOutliers.add(new Outlier(value, rowKey));
                } else if (lowerWhisker < lowerWhiskerFence && value >= lowerWhiskerFence) {
                    lowerWhisker = value;
                } else if (value <= upperWhiskerFence) {
                    upperWhisker = value;
                } else if (value > upperFence) {
                    extremeOutliers.add(new Outlier(value, rowKey));
                } else if (value > upperWhiskerFence) {
                    mildOutliers.add(new Outlier(value, rowKey));
                }
            }
            colStats.put(catName, new BoxplotStatistics(mildOutliers, extremeOutliers, min, max, lowerWhisker, q1, median, q3, upperWhisker));
        }
        statsMap.put(colName, colStats);
        // missing values part
        String[] excludedColClasses = excludedColClassesList.toArray(new String[excludedColClassesList.size()]);
        excludedClasses.put(colName, excludedColClasses);
    }
    return statsMap;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) ExecutionContext(org.knime.core.node.ExecutionContext) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) MissingCell(org.knime.core.data.MissingCell) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 24 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class ColorExtractNodeModel method extractColorTable.

/**
 * @param range
 * @return
 */
private DataTable extractColorTable(final ColorModelRange range) {
    DataTableSpec spec = createSpec(DoubleCell.TYPE);
    DataContainer cnt = new DataContainer(spec);
    RowKey[] keys = new RowKey[] { new RowKey("min"), new RowKey("max") };
    Color[] clrs = new Color[] { range.getMinColor(), range.getMaxColor() };
    double[] vals = new double[] { range.getMinValue(), range.getMaxValue() };
    for (int i = 0; i < 2; i++) {
        Color clr = clrs[i];
        DataRow row = new DefaultRow(keys[i], new DoubleCell(vals[i]), new IntCell(clr.getRed()), new IntCell(clr.getGreen()), new IntCell(clr.getBlue()), new IntCell(clr.getAlpha()), new IntCell(clr.getRGB()));
        cnt.addRowToTable(row);
    }
    cnt.close();
    return cnt.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataContainer(org.knime.core.data.container.DataContainer) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) Color(java.awt.Color) DefaultRow(org.knime.core.data.def.DefaultRow) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell)

Example 25 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class ClusterNodeModel method execute.

/**
 * Generate new clustering based on InputDataTable and specified number of
 * clusters. Currently the objective function only looks for cluster centers
 * that are extremely similar to the first n patterns...
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] data, final ExecutionContext exec) throws Exception {
    // FIXME actually do something useful with missing values!
    BufferedDataTable inData = (BufferedDataTable) data[0];
    DataTableSpec spec = inData.getDataTableSpec();
    // get dimension of feature space
    m_dimension = inData.getDataTableSpec().getNumColumns();
    HashMap<RowKey, Set<RowKey>> mapping = new HashMap<RowKey, Set<RowKey>>();
    addExcludeColumnsToIgnoreList(spec);
    double[][] clusters = initializeClusters(inData);
    // also keep counts of how many patterns fall in a specific cluster
    int[] clusterCoverage = new int[m_nrOfClusters.getIntValue()];
    // --------- create clusters --------------
    // reserve space for cluster center updates (do batch update!)
    double[][] delta = new double[m_nrOfClusters.getIntValue()][];
    for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
        delta[c] = new double[m_dimension - m_nrIgnoredColumns];
    }
    // main loop - until clusters stop changing or maxNrIterations reached
    int currentIteration = 0;
    boolean finished = false;
    while ((!finished) && (currentIteration < m_nrMaxIterations.getIntValue())) {
        exec.checkCanceled();
        exec.setProgress((double) currentIteration / (double) m_nrMaxIterations.getIntValue(), "Iteration " + currentIteration);
        // initialize counts and cluster-deltas
        for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
            clusterCoverage[c] = 0;
            delta[c] = new double[m_dimension - m_nrIgnoredColumns];
            int deltaPos = 0;
            for (int i = 0; i < m_dimension; i++) {
                if (!m_ignoreColumn[i]) {
                    delta[c][deltaPos++] = 0.0;
                }
            }
        }
        // assume that we are done (i.e. clusters have stopped changing)
        finished = true;
        // first training example
        RowIterator rowIt = inData.iterator();
        while (rowIt.hasNext()) {
            DataRow currentRow = rowIt.next();
            int winner = findClosestPrototypeFor(currentRow, clusters);
            if (winner >= 0) {
                // update winning cluster centers delta
                int deltaPos = 0;
                for (int i = 0; i < m_dimension; i++) {
                    DataCell currentCell = currentRow.getCell(i);
                    if (!m_ignoreColumn[i]) {
                        if (!currentCell.isMissing()) {
                            delta[winner][deltaPos] += ((DoubleValue) (currentCell)).getDoubleValue();
                        } else {
                            throw new Exception("Missing Values not (yet) allowed in k-Means.");
                        }
                        deltaPos++;
                    }
                }
                clusterCoverage[winner]++;
            } else {
                // let's report this during
                assert (winner >= 0);
                // otherwise just don't reproduce result
                throw new IllegalStateException("No winner found: " + winner);
            }
        }
        // update cluster centers
        finished = updateClusterCenters(clusterCoverage, clusters, delta);
        currentIteration++;
    }
    // while(!finished & nrIt<maxNrIt)
    // create list of feature names
    // index of not-ignored columns
    int k = 0;
    // index of column
    int j = 0;
    String[] featureNames = new String[m_dimension];
    do {
        if (!m_ignoreColumn[j]) {
            featureNames[k] = spec.getColumnSpec(j).getName();
            k++;
        }
        j++;
    } while (j < m_dimension);
    // create output container and also mapping for HiLiteing
    BufferedDataContainer labeledInput = exec.createDataContainer(createAppendedSpec(spec));
    for (DataRow row : inData) {
        int winner = findClosestPrototypeFor(row, clusters);
        DataCell cell = new StringCell(CLUSTER + winner);
        labeledInput.addRowToTable(new AppendedColumnRow(row, cell));
        if (m_enableHilite.getBooleanValue()) {
            RowKey key = new RowKey(CLUSTER + winner);
            if (mapping.get(key) == null) {
                Set<RowKey> set = new HashSet<RowKey>();
                set.add(row.getKey());
                mapping.put(key, set);
            } else {
                mapping.get(key).add(row.getKey());
            }
        }
    }
    labeledInput.close();
    if (m_enableHilite.getBooleanValue()) {
        m_translator.setMapper(new DefaultHiLiteMapper(mapping));
    }
    BufferedDataTable outData = labeledInput.getTable();
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) data[1] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec pmmlOutSpec = createPMMLSpec(inPMMLSpec, spec);
    PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
    Set<String> columns = new LinkedHashSet<String>();
    for (String s : pmmlOutSpec.getLearningFields()) {
        columns.add(s);
    }
    outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrOfClusters.getIntValue(), clusters, clusterCoverage, columns));
    m_viewData = new ClusterViewData(clusters, clusterCoverage, m_dimension - m_nrIgnoredColumns, featureNames);
    if (m_outputCenters) {
        DataContainer clusterCenterContainer = exec.createDataContainer(createClusterCentersSpec(spec));
        int i = 0;
        for (double[] cluster : clusters) {
            List<DataCell> cells = new ArrayList<>();
            for (double d : cluster) {
                cells.add(new DoubleCell(d));
            }
            clusterCenterContainer.addRowToTable(new DefaultRow(new RowKey(PMMLClusterTranslator.CLUSTER_NAME_PREFIX + i++), cells));
        }
        clusterCenterContainer.close();
        return new PortObject[] { outData, (BufferedDataTable) clusterCenterContainer.getTable(), outPMMLPort };
    } else {
        return new PortObject[] { outData, outPMMLPort };
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) PMMLClusterTranslator(org.knime.base.node.mine.cluster.PMMLClusterTranslator) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) AppendedColumnRow(org.knime.core.data.append.AppendedColumnRow) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) StringCell(org.knime.core.data.def.StringCell) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

DataContainer (org.knime.core.data.container.DataContainer)35 DataTableSpec (org.knime.core.data.DataTableSpec)25 DefaultRow (org.knime.core.data.def.DefaultRow)21 DataRow (org.knime.core.data.DataRow)19 DataCell (org.knime.core.data.DataCell)17 BufferedDataTable (org.knime.core.node.BufferedDataTable)15 RowKey (org.knime.core.data.RowKey)10 ArrayList (java.util.ArrayList)9 DoubleCell (org.knime.core.data.def.DoubleCell)9 IntCell (org.knime.core.data.def.IntCell)8 LinkedHashMap (java.util.LinkedHashMap)7 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)7 HashSet (java.util.HashSet)6 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 RowIterator (org.knime.core.data.RowIterator)6 StringCell (org.knime.core.data.def.StringCell)6 Map (java.util.Map)5 Set (java.util.Set)5 SortedTable (org.knime.base.data.sort.SortedTable)5 DataTable (org.knime.core.data.DataTable)5