Search in sources :

Example 6 with SortedTable

use of in project knime-core by knime.

the class AutoBinner method execute.

 * Determine bins.
 * @param data the input data
 * @param exec the execution context
 * @return the operation with the discretisation information
 * @throws Exception
public PMMLPreprocDiscretize execute(final BufferedDataTable data, final ExecutionContext exec) throws Exception {
    // Auto configuration when target is not set
    final DataTableSpec spec = data.getDataTableSpec();
    if (null == m_settings.getTargetColumn() || m_settings.getIncludeAll()) {
    // determine intervals
    if (m_settings.getMethod().equals(Method.fixedNumber)) {
        BufferedDataTable inData = calcDomainBoundsIfNeccessary(data, exec.createSubExecutionContext(0.9), Arrays.asList(m_settings.getTargetColumn()));
        Map<String, double[]> edgesMap = new HashMap<String, double[]>();
        for (String target : m_settings.getTargetColumn()) {
            DataTableSpec inSpec = inData.getDataTableSpec();
            DataColumnSpec targetCol = inSpec.getColumnSpec(target);
            // bounds of the domain
            double min = ((DoubleValue) targetCol.getDomain().getLowerBound()).getDoubleValue();
            double max = ((DoubleValue) targetCol.getDomain().getUpperBound()).getDoubleValue();
            // the edges of the bins
            double[] edges = new double[m_settings.getBinCount() + 1];
            edges[0] = min;
            edges[edges.length - 1] = max;
            for (int i = 1; i < edges.length - 1; i++) {
                edges[i] = min + i / (double) m_settings.getBinCount() * (max - min);
            edgesMap.put(target, edges);
        return createDisretizeOp(edgesMap);
    } else if (m_settings.getMethod().equals(Method.sampleQuantiles)) {
        Map<String, double[]> edgesMap = new LinkedHashMap<String, double[]>();
        final int colCount = m_settings.getTargetColumn().length;
        // contains all numeric columns if include all is set!
        for (String target : m_settings.getTargetColumn()) {
            exec.setMessage("Calculating quantiles (column \"" + target + "\")");
            ExecutionContext colSortContext = exec.createSubExecutionContext(0.7 / colCount);
            ExecutionContext colCalcContext = exec.createSubExecutionContext(0.3 / colCount);
            ColumnRearranger singleRearranger = new ColumnRearranger(spec);
            BufferedDataTable singleColSorted = colSortContext.createColumnRearrangeTable(data, singleRearranger, colSortContext);
            SortedTable sorted = new SortedTable(singleColSorted, Collections.singletonList(target), new boolean[] { true }, colSortContext);
            double[] edges = createEdgesFromQuantiles(sorted.getBufferedDataTable(), colCalcContext, m_settings.getSampleQuantiles());
            edgesMap.put(target, edges);
        return createDisretizeOp(edgesMap);
    } else {
        throw new IllegalStateException("Unknown binning method.");
Also used : DataTableSpec( HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec( ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger( DoubleValue( SortedTable( BufferedDataTable(org.knime.core.node.BufferedDataTable) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 7 with SortedTable

use of in project knime-core by knime.

the class AutoBinner method execute.

 * Determine bins.
 * @param data the input data
 * @param exec the execution context
 * @return the operation with the discretisation information
 * @throws Exception ...
public PMMLPreprocDiscretize execute(final BufferedDataTable data, final ExecutionContext exec) throws Exception {
    // Auto configuration when target is not set
    final DataTableSpec spec = data.getDataTableSpec();
    if (null == m_settings.getTargetColumn() || m_settings.getIncludeAll()) {
    // determine intervals
    if (m_settings.getMethod().equals(Method.fixedNumber)) {
        if (m_settings.getEqualityMethod().equals(EqualityMethod.width)) {
            BufferedDataTable inData = calcDomainBoundsIfNeccessary(data, exec.createSubExecutionContext(0.9), Arrays.asList(m_settings.getTargetColumn()));
            Map<String, double[]> edgesMap = new HashMap<String, double[]>();
            for (String target : m_settings.getTargetColumn()) {
                DataTableSpec inSpec = inData.getDataTableSpec();
                DataColumnSpec targetCol = inSpec.getColumnSpec(target);
                // bounds of the domain
                double min = ((DoubleValue) targetCol.getDomain().getLowerBound()).getDoubleValue();
                double max = ((DoubleValue) targetCol.getDomain().getUpperBound()).getDoubleValue();
                // the edges of the bins
                double[] edges = new double[m_settings.getBinCount() + 1];
                edges[0] = min;
                edges[edges.length - 1] = max;
                for (int i = 1; i < edges.length - 1; i++) {
                    edges[i] = min + i / (double) m_settings.getBinCount() * (max - min);
                if (m_settings.getIntegerBounds()) {
                    edges = toIntegerBounds(edges);
                edgesMap.put(target, edges);
            return createDisretizeOp(edgesMap);
        } else {
            // EqualityMethod.equalCount
            Map<String, double[]> edgesMap = new HashMap<String, double[]>();
            for (String target : m_settings.getTargetColumn()) {
                int colIndex = data.getDataTableSpec().findColumnIndex(target);
                List<Double> values = new ArrayList<Double>();
                for (DataRow row : data) {
                    if (!row.getCell(colIndex).isMissing()) {
                        values.add(((DoubleValue) row.getCell(colIndex)).getDoubleValue());
                edgesMap.put(target, findEdgesForEqualCount(values, m_settings.getBinCount()));
            return createDisretizeOp(edgesMap);
    } else if (m_settings.getMethod().equals(Method.sampleQuantiles)) {
        Map<String, double[]> edgesMap = new LinkedHashMap<String, double[]>();
        final int colCount = m_settings.getTargetColumn().length;
        // contains all numeric columns if include all is set!
        for (String target : m_settings.getTargetColumn()) {
            exec.setMessage("Calculating quantiles (column \"" + target + "\")");
            ExecutionContext colSortContext = exec.createSubExecutionContext(0.7 / colCount);
            ExecutionContext colCalcContext = exec.createSubExecutionContext(0.3 / colCount);
            ColumnRearranger singleRearranger = new ColumnRearranger(spec);
            BufferedDataTable singleColSorted = colSortContext.createColumnRearrangeTable(data, singleRearranger, colSortContext);
            SortedTable sorted = new SortedTable(singleColSorted, Collections.singletonList(target), new boolean[] { true }, colSortContext);
            double[] edges = createEdgesFromQuantiles(sorted.getBufferedDataTable(), colCalcContext, m_settings.getSampleQuantiles());
            if (m_settings.getIntegerBounds()) {
                edges = toIntegerBounds(edges);
            edgesMap.put(target, edges);
        return createDisretizeOp(edgesMap);
    } else {
        throw new IllegalStateException("Unknown binning method.");
Also used : DataTableSpec( HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) DataRow( DataColumnSpec( ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger( DoubleValue( SortedTable( BufferedDataTable(org.knime.core.node.BufferedDataTable) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 8 with SortedTable

use of in project knime-core by knime.

the class ColumnToGridNodeModel method execute.

 * {@inheritDoc}
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    String[] includes = m_configuration.getIncludes();
    String groupColumn = m_configuration.getGroupColumn();
    final ExecutionMonitor mainExec;
    final BufferedDataTable inputTable;
    if (groupColumn != null) {
        exec.setMessage("Sorting input table");
        BufferedDataTable in = inData[0];
        ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
        ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
        String[] relevantCols = new String[includes.length + 1];
        System.arraycopy(includes, 0, relevantCols, 0, includes.length);
        relevantCols[relevantCols.length - 1] = groupColumn;
        BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
        SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
        inputTable = sorter.getBufferedDataTable();
        mainExec = exec.createSubProgress(0.5);
    } else {
        inputTable = inData[0];
        mainExec = exec;
    exec.setMessage("Assembling output");
    DataTableSpec spec = inputTable.getDataTableSpec();
    DataTableSpec outSpec = createOutputSpec(spec);
    BufferedDataContainer cont = exec.createDataContainer(outSpec);
    int[] includeIndices = new int[includes.length];
    for (int i = 0; i < includes.length; i++) {
        int index = spec.findColumnIndex(includes[i]);
        includeIndices[i] = index;
    int gridCount = m_configuration.getColCount();
    final int cellCount;
    final int groupColIndex;
    if (groupColumn != null) {
        cellCount = includeIndices.length * gridCount + 1;
        groupColIndex = spec.findColumnIndex(groupColumn);
    } else {
        cellCount = includeIndices.length * gridCount;
        groupColIndex = -1;
    final DataCell[] cells = new DataCell[cellCount];
    PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
    long currentRow = 0;
    long totalRows = inputTable.size();
    long currentOutRow = 0;
    DataCell curGroupValue = null;
    while (it.hasNext()) {
        Arrays.fill(cells, DataType.getMissingCell());
        // assign group column (if enabled)
        if (groupColIndex >= 0) {
            DataRow row =;
            curGroupValue = row.getCell(groupColIndex);
            cells[cells.length - 1] = curGroupValue;
        for (int grid = 0; grid < gridCount; grid++) {
            if (!it.hasNext()) {
            DataRow inRow =;
            DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
            if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
                mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
                currentRow += 1;
                for (int i = 0; i < includeIndices.length; i++) {
                    cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
            } else {
                // start new group, i.e. new row
        RowKey key = RowKey.createRowKey(currentOutRow++);
        cont.addRowToTable(new DefaultRow(key, cells));
    return new BufferedDataTable[] { cont.getTable() };
Also used : DataTableSpec( BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey( SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow( ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger( SortedTable( BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell( ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(

Example 9 with SortedTable

use of in project knime-core by knime.

the class ROCCalculator method calculateCurveData.

 * Calculates the ROC curve.
 * @param table the table with the data
 * @param exec the execution context to use for reporting progress
 * @throws CanceledExecutionException when the user cancels the execution
public void calculateCurveData(final BufferedDataTable table, final ExecutionContext exec) throws CanceledExecutionException {
    m_warningMessage = null;
    List<ROCCurve> curves = new ArrayList<ROCCurve>();
    int classIndex = table.getDataTableSpec().findColumnIndex(m_classCol);
    int curvesSize = m_curves.size();
    int size = table.getRowCount();
    if (size == 0) {
        m_warningMessage = "Input table contains no rows";
    BufferedDataContainer outCont = exec.createDataContainer(OUT_SPEC);
    for (int i = 0; i < curvesSize; i++) {
        String c = m_curves.get(i);
        ExecutionContext subExec = exec.createSubExecutionContext(1.0 / curvesSize);
        SortedTable sortedTable = new SortedTable(table, Collections.singletonList(c), new boolean[] { false }, subExec);
        int tp = 0, fp = 0;
        // these contain the coordinates for the plot
        double[] xValues = new double[size + 1];
        double[] yValues = new double[size + 1];
        int k = 0;
        final int scoreColIndex = sortedTable.getDataTableSpec().findColumnIndex(c);
        DataCell lastScore = null;
        for (DataRow row : sortedTable) {
            DataCell realClass = row.getCell(classIndex);
            if (realClass.isMissing() || row.getCell(scoreColIndex).isMissing()) {
                if (m_ignoreMissingValues) {
                } else {
                    m_warningMessage = "Table contains missing values.";
            if (realClass.toString().equals(m_posClass)) {
            } else {
            // around ... the following lines circumvent this.
            if (!row.getCell(scoreColIndex).equals(lastScore)) {
                lastScore = row.getCell(scoreColIndex);
            xValues[k] = fp;
            yValues[k] = tp;
        xValues = Arrays.copyOf(xValues, k + 1);
        yValues = Arrays.copyOf(yValues, k + 1);
        for (int j = 0; j <= k; j++) {
            xValues[j] /= fp;
            yValues[j] /= tp;
        xValues[xValues.length - 1] = 1;
        yValues[yValues.length - 1] = 1;
        double area = 0;
        for (k = 1; k < xValues.length; k++) {
            if (xValues[k - 1] < xValues[k]) {
                // magical math: the rectangle + the triangle under
                // the segment xValues[k] to xValues[k - 1]
                area += 0.5 * (xValues[k] - xValues[k - 1]) * (yValues[k] + yValues[k - 1]);
        curves.add(new ROCCurve(c, xValues, yValues, area, m_maxPoints));
        outCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
    m_outCurves = curves;
    m_outTable = outCont.getTable();
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey( DoubleCell( ArrayList(java.util.ArrayList) DataRow( ExecutionContext(org.knime.core.node.ExecutionContext) SortedTable( DataCell( DefaultRow(

Example 10 with SortedTable

use of in project knime-core by knime.

the class TargetShufflingNodeModel method execute.

 * {@inheritDoc}
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final int colIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.columnName());
    final String colName = inData[0].getDataTableSpec().getColumnSpec(colIndex).getName();
    // create a new column rearranger from the input table
    ColumnRearranger colRe = new ColumnRearranger(inData[0].getDataTableSpec());
    for (DataColumnSpec c : inData[0].getDataTableSpec()) {
        if (!c.getName().equals(colName)) {
            // remove all columns except the selected one
    // append a new column with a random number for each cell
    String uniqueColumnName = DataTableSpec.getUniqueColumnName(inData[0].getDataTableSpec(), "random_col");
    colRe.append(new SingleCellFactory(new DataColumnSpecCreator(uniqueColumnName, LongCell.TYPE).createSpec()) {

        public DataCell getCell(final DataRow row) {
            return new LongCell(m_random.nextLong());
    BufferedDataTable toSort = exec.createColumnRearrangeTable(exec.createBufferedDataTable(inData[0], exec), colRe, exec.createSilentSubProgress(.2));
    // sort the random numbers ---> shuffles the sorted column
    List<String> include = new ArrayList<String>();
    SortedTable sort = new SortedTable(toSort, include, new boolean[] { true }, exec.createSubExecutionContext(.6));
    final BufferedDataTable sorted = sort.getBufferedDataTable();
    // replace the selected column with the shuffled one
    final DataColumnSpec colSpec = inData[0].getDataTableSpec().getColumnSpec(colIndex);
    ColumnRearranger crea = new ColumnRearranger(inData[0].getDataTableSpec());
    crea.replace(new SingleCellFactory(colSpec) {

        private final CloseableRowIterator m_iterator = sorted.iterator();

        public DataCell getCell(final DataRow row) {
    }, colName);
    return new BufferedDataTable[] { exec.createColumnRearrangeTable(inData[0], crea, exec.createSubProgress(0.2)) };
Also used : DataColumnSpecCreator( ArrayList(java.util.ArrayList) CloseableRowIterator( DataRow( ColumnRearranger( DataColumnSpec( LongCell( SortedTable( BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell( SingleCellFactory(


SortedTable ( DataRow ( BufferedDataTable (org.knime.core.node.BufferedDataTable)13 DataTableSpec ( ArrayList (java.util.ArrayList)11 DataCell ( ExecutionContext (org.knime.core.node.ExecutionContext)10 DefaultRow ( DataColumnSpec ( DoubleValue ( RowKey ( LinkedHashMap (java.util.LinkedHashMap)7 ColumnRearranger ( DataContainer ( BufferedDataContainer (org.knime.core.node.BufferedDataContainer)5 SettingsModelFilterString (org.knime.core.node.defaultnodesettings.SettingsModelFilterString)5 Map (java.util.Map)4 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 DataColumnSpecCreator (