Search in sources :

Example 11 with DataContainer

use of in project knime-core by knime.

the class ConditionalBoxPlotNodeModel method execute.

 * {@inheritDoc}
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
    m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    double nrRows = inData[0].size();
    int rowCount = 0;
    int numericIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.numericColumn());
    int nominalIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.nominalColumn());
    Map<String, Map<Double, Set<RowKey>>> data = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    // some default values .. if one column only has missing values.
    for (DataCell d : inData[0].getDataTableSpec().getColumnSpec(nominalIndex).getDomain().getValues()) {
        String name = ((StringValue) d).getStringValue();
        m_mildOutliers.put(name, new HashMap<Double, Set<RowKey>>());
        m_extremeOutliers.put(name, new HashMap<Double, Set<RowKey>>());
    for (DataRow r : inData[0]) {
        exec.setProgress(rowCount++ / nrRows, "Separating...");
        if (!m_settings.showMissingValues()) {
            if (r.getCell(nominalIndex).isMissing()) {
                // missing cell in nominal values is unwanted?
        String nominal = replaceSpaces(r.getCell(nominalIndex).toString());
        if (r.getCell(numericIndex).isMissing()) {
            // ignore missing cells in numeric column
        DoubleValue numeric = (DoubleValue) r.getCell(numericIndex);
        Map<Double, Set<RowKey>> map = data.get(nominal);
        if (map == null) {
            map = new LinkedHashMap<Double, Set<RowKey>>();
        Set<RowKey> set = map.get(numeric.getDoubleValue());
        if (set == null) {
            set = new HashSet<RowKey>();
        map.put(numeric.getDoubleValue(), set);
        data.put(nominal, map);
    List<String> keys = new ArrayList<String>(data.keySet());
    boolean ignoreMissingValues = false;
    if (m_settings.showMissingValues() && !keys.contains(DataType.getMissingCell().toString())) {
        // we promised to create data for missing values..
        // if there aren't any.. we have to create them ourselves
        setWarningMessage("No missing values found.");
        ignoreMissingValues = true;
    DataColumnSpec[] colSpecs = createColumnSpec(inData[0].getDataTableSpec().getColumnSpec(nominalIndex), ignoreMissingValues);
    if (keys.size() == 0) {
        setWarningMessage("All classes are empty.");
    int dataSetNr = 0;
    // for (String d : keys) {
    for (DataColumnSpec dcs : colSpecs) {
        String d = dcs.getName();
        if (data.get(d) == null || keys.size() == 0) {
        exec.setProgress(dataSetNr / (double) keys.size(), "Creating statistics");
        Map<Double, Set<RowKey>> extremeOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        Map<Double, Set<RowKey>> mildOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        double[] stats = calculateStatistic(data.get(d), mildOutliers, extremeOutliers);
        double minimum = stats[BoxPlotNodeModel.MIN];
        double maximum = stats[BoxPlotNodeModel.MAX];
        DataColumnSpecCreator creator = new DataColumnSpecCreator(colSpecs[dataSetNr]);
        creator.setDomain(new DataColumnDomainCreator(new DoubleCell(minimum), new DoubleCell(maximum)).createDomain());
        colSpecs[dataSetNr] = creator.createSpec();
        m_statistics.put(colSpecs[dataSetNr], stats);
        m_mildOutliers.put(d, mildOutliers);
        m_extremeOutliers.put(d, extremeOutliers);
    DataTableSpec dts = new DataTableSpec("MyTempTable", colSpecs);
    DataContainer cont = new DataContainer(dts);
    m_dataArray = new DefaultDataArray(cont.getTable(), 1, 2);
    if (ignoreMissingValues) {
        DataColumnSpec[] temp = new DataColumnSpec[colSpecs.length + 1];
        DataColumnSpec missing = new DataColumnSpecCreator(DataType.getMissingCell().toString(), DataType.getMissingCell().getType()).createSpec();
        int i = 0;
        while (missing.getName().compareTo(colSpecs[i].getName()) > 0) {
            temp[i] = colSpecs[i];
        temp[i++] = missing;
        while (i < temp.length) {
            temp[i] = colSpecs[i - 1];
        colSpecs = temp;
    /* Save inSpec of the numeric column to provide the view a way to
         * consider the input domain for normalization. */
    m_numColSpec = inData[0].getDataTableSpec().getColumnSpec(numericIndex);
    return new BufferedDataTable[] { createOutputTable(inData[0].getDataTableSpec(), colSpecs, exec).getTable() };
Also used : DataTableSpec( HashSet(java.util.HashSet) Set(java.util.Set) DataColumnSpecCreator( RowKey( DoubleCell( DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ArrayList(java.util.ArrayList) DataRow( LinkedHashMap(java.util.LinkedHashMap) DataContainer( BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataColumnSpec( BufferedDataTable(org.knime.core.node.BufferedDataTable) StringValue( DataColumnDomainCreator( DoubleValue( DataCell( HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 12 with DataContainer

use of in project knime-core by knime.

the class LiftCalculator method calculateLiftTables.

 * Calculates the tables necessary for displaying a lift chart.
 * @param table the data table
 * @param exec the execution context to report progress to
 * @return warning messages or null
 * @throws CanceledExecutionException when the user cancels the execution
public String calculateLiftTables(final BufferedDataTable table, final ExecutionContext exec) throws CanceledExecutionException {
    int predColIndex = table.getDataTableSpec().findColumnIndex(m_responseColumn);
    String warning = null;
    List<String> inclList = new LinkedList<String>();
    int probColInd = table.getDataTableSpec().findColumnIndex(m_probabilityColumn);
    boolean[] order = new boolean[] { false };
    m_sorted = new SortedTable(table, inclList, order, exec);
    long totalResponses = 0;
    double partWidth = m_intervalWidth;
    int nrParts = (int) Math.ceil(100.0 / partWidth);
    List<Integer> positiveResponses = new LinkedList<Integer>();
    int rowIndex = 0;
    for (DataRow row : m_sorted) {
        if (row.getCell(predColIndex).isMissing() || row.getCell(probColInd).isMissing()) {
            if (row.getCell(predColIndex).isMissing()) {
                // miss. values in class column we always ignore
            if (m_ignoreMissingValues) {
            } else {
                warning = "Table contains missing values.";
        String response = ((StringValue) row.getCell(predColIndex)).getStringValue().trim();
        if (response.equalsIgnoreCase(m_responseLabel)) {
    int[] counter = new int[nrParts];
    int partWidthAbsolute = (int) Math.ceil(rowIndex / (double) nrParts);
    double avgResponse = (double) positiveResponses.size() / rowIndex;
    for (int rIndex : positiveResponses) {
        int index = rIndex / partWidthAbsolute;
    DataColumnSpec[] colSpec = new DataColumnSpec[3];
    colSpec[0] = new DataColumnSpecCreator("Lift", DoubleCell.TYPE).createSpec();
    colSpec[1] = new DataColumnSpecCreator("Baseline", DoubleCell.TYPE).createSpec();
    colSpec[2] = new DataColumnSpecCreator("Cumulative Lift", DoubleCell.TYPE).createSpec();
    DataTableSpec tableSpec = new DataTableSpec(colSpec);
    // new DataContainer(tableSpec);
    DataContainer cont = exec.createDataContainer(tableSpec);
    colSpec = new DataColumnSpec[2];
    colSpec[0] = new DataColumnSpecCreator("Actual", DoubleCell.TYPE).createSpec();
    colSpec[1] = new DataColumnSpecCreator("Baseline", DoubleCell.TYPE).createSpec();
    tableSpec = new DataTableSpec(colSpec);
    // new DataContainer(tableSpec);
    DataContainer responseCont = exec.createDataContainer(tableSpec);
    long cumulativeCounter = 0;
    responseCont.addRowToTable(new DefaultRow(new RowKey("0"), 0.0, 0.0));
    for (int i = 0; i < counter.length; i++) {
        cumulativeCounter += counter[i];
        double responseRate = (double) counter[i] / partWidthAbsolute;
        double lift = responseRate / avgResponse;
        double cumResponseRate = (double) cumulativeCounter / totalResponses;
        long number = partWidthAbsolute * (i + 1);
        // well.. rounding problems
        if (number > rowIndex) {
            number = rowIndex;
        double cumulativeLift = // (double)cumulativeCounter / (partWidthAbsolute * (i + 1));
        (double) cumulativeCounter / number;
        cumulativeLift /= avgResponse;
        // cumulativeLift = lifts / (i+1);
        double rowKey = ((i + 1) * partWidth);
        if (rowKey > 100) {
            rowKey = 100;
        cont.addRowToTable(new DefaultRow(new RowKey("" + rowKey), lift, 1.0, cumulativeLift));
        double cumBaseline = (i + 1) * partWidth;
        if (cumBaseline > 100) {
            cumBaseline = 100;
        responseCont.addRowToTable(new DefaultRow(new RowKey("" + rowKey), cumResponseRate * 100, cumBaseline));
    m_lift = (BufferedDataTable) cont.getTable();
    m_response = (BufferedDataTable) responseCont.getTable();
    return warning;
Also used : DataTableSpec( DataColumnSpecCreator( RowKey( DataRow( LinkedList(java.util.LinkedList) DataContainer( DataColumnSpec( SortedTable( DefaultRow(

Example 13 with DataContainer

use of in project knime-core by knime.

the class BoxplotCalculator method calculateMultiple.

 * Calculates the necessary statistics for a non-conditional boxplot.
 * @param table the input data
 * @param numCol array of names of numeric columns to plot
 * @param exec Execution context to report progress to
 * @return LinkedHashMap with the column name as key and statistics as value
 * @throws CanceledExecutionException when the user cancels the execution
public LinkedHashMap<String, BoxplotStatistics> calculateMultiple(final BufferedDataTable table, final String[] numCol, final ExecutionContext exec) throws CanceledExecutionException {
    DataTableSpec spec = table.getSpec();
    int[] numColIdxs = new int[numCol.length];
    for (int i = 0; i < numCol.length; i++) {
        numColIdxs[i] = spec.findColumnIndex(numCol[i]);
    LinkedHashMap<String, DataContainer> containers = new LinkedHashMap<String, DataContainer>();
    for (int i = 0; i < numCol.length; i++) {
        containers.put(numCol[i], exec.createDataContainer(new DataTableSpec(new String[] { "col" }, new DataType[] { DoubleCell.TYPE })));
    ExecutionContext subExec = exec.createSilentSubExecutionContext(0.7);
    long[] numMissValPerCol = new long[numCol.length];
    int count = 0;
    for (DataRow row : table) {
        subExec.setProgress((double) count++ / table.size());
        for (int i = 0; i < numCol.length; i++) {
            DataCell cell = row.getCell(numColIdxs[i]);
            if (!cell.isMissing()) {
                containers.get(numCol[i]).addRowToTable(new DefaultRow(row.getKey(), cell));
            } else {
    LinkedHashMap<String, BoxplotStatistics> statsMap = new LinkedHashMap<>();
    ExecutionContext subExec2 = exec.createSilentSubExecutionContext(1.0);
    count = 0;
    List<String> excludedDataColList = new ArrayList<String>();
    for (Entry<String, DataContainer> entry : containers.entrySet()) {
        subExec2.setProgress((double) count++ / containers.size());
        Set<Outlier> extremeOutliers = new HashSet<Outlier>();
        Set<Outlier> mildOutliers = new HashSet<Outlier>();
        BufferedDataTable catTable = (BufferedDataTable) entry.getValue().getTable();
        if (catTable.size() == 0) {
        SortedTable st = new SortedTable(catTable, new Comparator<DataRow>() {

            public int compare(final DataRow o1, final DataRow o2) {
                DataCell c1 = o1.getCell(0);
                DataCell c2 = o2.getCell(0);
                double d1 = ((DoubleValue) c1).getDoubleValue();
                double d2 = ((DoubleValue) c2).getDoubleValue();
                if (d1 == d2) {
                    return 0;
                } else {
                    return d1 < d2 ? -1 : 1;
        }, false, exec);
        double min = 0, max = 0, q1 = 0, q3 = 0, median = 0;
        boolean dq1 = catTable.size() % 4 == 0;
        long q1Idx = catTable.size() / 4;
        boolean dq3 = 3 * catTable.size() % 4 == 0;
        long q3Idx = 3 * catTable.size() / 4;
        boolean dMedian = catTable.size() % 2 == 0;
        long medianIdx = catTable.size() / 2;
        int counter = 0;
        for (DataRow row : st) {
            double val = ((DoubleValue) row.getCell(0)).getDoubleValue();
            if (counter == 0) {
                min = val;
            if (counter == catTable.size() - 1) {
                max = val;
            if (counter == q1Idx - 1 && dq1) {
                q1 = val;
            if (counter == q1Idx || (counter == 0 && st.size() <= 3)) {
                if (dq1) {
                    q1 = (q1 + val) / 2.0;
                } else {
                    q1 = val;
            if (counter == medianIdx - 1 && dMedian) {
                median = val;
            if (counter == medianIdx) {
                if (dMedian) {
                    median = (median + val) / 2;
                } else {
                    median = val;
            if (counter == q3Idx - 1 && dq3) {
                q3 = val;
            if (counter == q3Idx || (counter == st.size() - 1 && st.size() <= 3)) {
                if (dq3) {
                    q3 = (q3 + val) / 2.0;
                } else {
                    q3 = val;
        double iqr = q3 - q1;
        double lowerWhisker = min;
        double upperWhisker = max;
        double upperWhiskerFence = q3 + (1.5 * iqr);
        double lowerWhiskerFence = q1 - (1.5 * iqr);
        double lowerFence = q1 - (3 * iqr);
        double upperFence = q3 + (3 * iqr);
        for (DataRow row : st) {
            double value = ((DoubleValue) row.getCell(0)).getDoubleValue();
            String rowKey = row.getKey().getString();
            if (value < lowerFence) {
                extremeOutliers.add(new Outlier(value, rowKey));
            } else if (value < lowerWhiskerFence) {
                mildOutliers.add(new Outlier(value, rowKey));
            } else if (lowerWhisker < lowerWhiskerFence && value >= lowerWhiskerFence) {
                lowerWhisker = value;
            } else if (value <= upperWhiskerFence) {
                upperWhisker = value;
            } else if (value > upperFence) {
                extremeOutliers.add(new Outlier(value, rowKey));
            } else if (value > upperWhiskerFence) {
                mildOutliers.add(new Outlier(value, rowKey));
        statsMap.put(entry.getKey(), new BoxplotStatistics(mildOutliers, extremeOutliers, min, max, lowerWhisker, q1, median, q3, upperWhisker));
    // missing values part
    m_excludedDataCols = excludedDataColList.toArray(new String[excludedDataColList.size()]);
    m_numMissValPerCol = new LinkedHashMap<String, Long>();
    for (int i = 0; i < numCol.length; i++) {
        if (numMissValPerCol[i] > 0 && !excludedDataColList.contains(numCol[i])) {
            m_numMissValPerCol.put(numCol[i], numMissValPerCol[i]);
    return statsMap;
Also used : DataTableSpec( ArrayList(java.util.ArrayList) DataRow( LinkedHashMap(java.util.LinkedHashMap) DataContainer( BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) ExecutionContext(org.knime.core.node.ExecutionContext) DoubleValue( SortedTable( DataCell( DefaultRow(

Example 14 with DataContainer

use of in project knime-core by knime.

the class MissingValueHandlerNodeModel method execute.

 * {@inheritDoc}
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getDataTableSpec();
    MissingCellReplacingDataTable mvTable = new MissingCellReplacingDataTable(inSpec, m_settings);
    // Calculate the statistics
    exec.setMessage("Calculating statistics");
    mvTable.init(inTable, exec.createSubExecutionContext(0.5));
    long rowCounter = 0;
    final long numOfRows = inTable.size();
    DataContainer container = exec.createDataContainer(mvTable.getDataTableSpec());
    ExecutionContext tableSubExec = exec.createSubExecutionContext(0.4);
    exec.setMessage("Replacing missing values");
    for (DataRow row : mvTable) {
        if (row != null) {
            tableSubExec.setProgress(++rowCounter / (double) numOfRows, "Processed row " + rowCounter + "/" + numOfRows + " (\"" + row.getKey() + "\")");
        } else {
            tableSubExec.setProgress(++rowCounter / (double) numOfRows, "Processed row " + rowCounter + "/" + numOfRows);
    // Collect warning messages
    String warnings = mvTable.finish();
    // Handle the warnings
    if (warnings.length() > 0) {
    exec.setMessage("Generating PMML");
    // Init PMML output port
    PMMLPortObject pmmlPort = new PMMLPortObject(new PMMLPortObjectSpecCreator(inSpec).createSpec());
    return new PortObject[] { (BufferedDataTable) container.getTable(), pmmlPort };
Also used : DataTableSpec( DataContainer( MissingCellReplacingDataTable(org.knime.base.node.preproc.pmml.missingval.MissingCellReplacingDataTable) ExecutionContext(org.knime.core.node.ExecutionContext) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataRow( PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 15 with DataContainer

use of in project knime-core by knime.

the class MappingTableInterpolationStatistic method init.

 * {@inheritDoc}
protected void init(final DataTableSpec spec, final int amountOfColumns) {
    m_index = spec.findColumnIndex(m_columnName);
    m_nextCells = new DataContainer(new DataTableSpec(new DataColumnSpecCreator("value", spec.getColumnSpec(m_index).getType()).createSpec()));
    m_previous = DataType.getMissingCell();
Also used : DataContainer( DataTableSpec( DataColumnSpecCreator(


DataContainer ( DataTableSpec ( DefaultRow ( DataRow ( DataCell ( BufferedDataTable (org.knime.core.node.BufferedDataTable)15 RowKey ( ArrayList (java.util.ArrayList)9 DoubleCell ( IntCell ( LinkedHashMap (java.util.LinkedHashMap)7 DataColumnSpecCreator ( HashSet (java.util.HashSet)6 DataColumnSpec ( RowIterator ( StringCell ( Map (java.util.Map)5 Set (java.util.Set)5 SortedTable ( DataTable (