Search in sources :

Example 11 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class BoxplotCalculator method calculateMultipleConditional.

/**
 * Calculates statistics for a conditional box plot.
 * @param table the data table
 * @param catCol the column with the category values
 * @param numCol the numeric column
 * @param exec an execution context
 * @return A linked hash map with BoxplotStatistics for each category
 * @throws CanceledExecutionException when the user cancels the execution
 * @throws InvalidSettingsException when the category column has no domain values
 */
public LinkedHashMap<String, LinkedHashMap<String, BoxplotStatistics>> calculateMultipleConditional(final BufferedDataTable table, final String catCol, final String[] numCol, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    DataTableSpec spec = table.getSpec();
    int catColIdx = spec.findColumnIndex(catCol);
    int[] numColIdxs = new int[numCol.length];
    for (int i = 0; i < numCol.length; i++) {
        numColIdxs[i] = spec.findColumnIndex(numCol[i]);
    }
    Set<DataCell> valuesSet = spec.getColumnSpec(catColIdx).getDomain().getValues();
    if (valuesSet == null) {
        throw new InvalidSettingsException("Selected category column has no domain values");
    }
    ArrayList<DataCell> vals = new ArrayList<>(valuesSet);
    Collections.sort(vals, new Comparator<DataCell>() {

        @Override
        public int compare(final DataCell o1, final DataCell o2) {
            return o1.toString().compareTo(o2.toString());
        }
    });
    // add Missing values class as it is never in specification
    vals.add(new MissingCell(null));
    // we need to have clear names, otherwise Missing values class will be taken as "?"
    ArrayList<String> catNames = new ArrayList<>(vals.size());
    for (DataCell cell : vals) {
        catNames.add(cell.isMissing() ? MISSING_VALUES_CLASS : cell.toString());
    }
    LinkedHashMap<String, LinkedHashMap<String, DataContainer>> containers = new LinkedHashMap<>();
    m_ignoredMissVals = new LinkedHashMap<>();
    for (int i = 0; i < numCol.length; i++) {
        LinkedHashMap<String, DataContainer> map = new LinkedHashMap<>();
        LinkedHashMap<String, Long> missValMap = new LinkedHashMap<>();
        for (DataCell c : vals) {
            String name = c.isMissing() ? MISSING_VALUES_CLASS : c.toString();
            map.put(name, exec.createDataContainer(new DataTableSpec(new String[] { "col" }, new DataType[] { DoubleCell.TYPE })));
            missValMap.put(name, 0L);
        }
        containers.put(numCol[i], map);
        m_ignoredMissVals.put(numCol[i], missValMap);
    }
    ExecutionContext subExec = exec.createSubExecutionContext(0.7);
    // long[][] ignoredMissVals = new long[numCol.length][vals.size()];  // count missing values per data col per class
    long count = 0;
    final long numOfRows = table.size();
    for (DataRow row : table) {
        exec.checkCanceled();
        subExec.setProgress(count++ / (double) numOfRows);
        DataCell catCell = row.getCell(catColIdx);
        String catName = catCell.isMissing() ? MISSING_VALUES_CLASS : catCell.toString();
        for (int i = 0; i < numCol.length; i++) {
            DataCell cell = row.getCell(numColIdxs[i]);
            if (!cell.isMissing()) {
                containers.get(numCol[i]).get(catName).addRowToTable(new DefaultRow(row.getKey(), cell));
            } else {
                // increment missing values
                LinkedHashMap<String, Long> missValMap = m_ignoredMissVals.get(numCol[i]);
                missValMap.replace(catName, missValMap.get(catName) + 1);
            }
        }
    }
    LinkedHashMap<String, LinkedHashMap<String, BoxplotStatistics>> statsMap = new LinkedHashMap<>();
    excludedClasses = new LinkedHashMap<>();
    List<String> colList = Arrays.asList(numCol);
    ExecutionContext subExec2 = exec.createSubExecutionContext(1.0);
    int count2 = 0;
    for (Entry<String, LinkedHashMap<String, DataContainer>> entry : containers.entrySet()) {
        exec.checkCanceled();
        subExec2.setProgress(count2++ / (double) containers.size());
        LinkedHashMap<String, DataContainer> containers2 = entry.getValue();
        LinkedHashMap<String, BoxplotStatistics> colStats = new LinkedHashMap<String, BoxplotStatistics>();
        String colName = entry.getKey();
        List<String> excludedColClassesList = new ArrayList<>();
        LinkedHashMap<String, Long> ignoredColMissVals = new LinkedHashMap<>();
        for (Entry<String, DataContainer> entry2 : containers2.entrySet()) {
            Set<Outlier> extremeOutliers = new HashSet<Outlier>();
            Set<Outlier> mildOutliers = new HashSet<Outlier>();
            entry2.getValue().close();
            String catName = entry2.getKey();
            BufferedDataTable catTable = (BufferedDataTable) entry2.getValue().getTable();
            LinkedHashMap<String, Long> missValMap = m_ignoredMissVals.get(colName);
            if (catTable.size() == 0) {
                if (!(catName.equals(MISSING_VALUES_CLASS) && missValMap.get(catName) == 0)) {
                    // we should add missing values to this list, only if they were there
                    excludedColClassesList.add(catName);
                }
                missValMap.remove(catName);
                continue;
            } else {
                if (missValMap.get(catName) == 0) {
                    missValMap.remove(catName);
                }
            }
            SortedTable st = new SortedTable(catTable, new Comparator<DataRow>() {

                @Override
                public int compare(final DataRow o1, final DataRow o2) {
                    double d1 = ((DoubleValue) o1.getCell(0)).getDoubleValue();
                    double d2 = ((DoubleValue) o2.getCell(0)).getDoubleValue();
                    if (d1 == d2) {
                        return 0;
                    } else {
                        return d1 < d2 ? -1 : 1;
                    }
                }
            }, false, exec);
            double min = 0, max = 0, q1 = 0, q3 = 0, median = 0;
            boolean dq1 = catTable.size() % 4 == 0;
            long q1Idx = catTable.size() / 4;
            boolean dq3 = 3 * catTable.size() % 4 == 0;
            long q3Idx = 3 * catTable.size() / 4;
            boolean dMedian = catTable.size() % 2 == 0;
            long medianIdx = catTable.size() / 2;
            int counter = 0;
            for (DataRow row : st) {
                double val = ((DoubleValue) row.getCell(0)).getDoubleValue();
                if (counter == 0) {
                    min = val;
                }
                if (counter == catTable.size() - 1) {
                    max = val;
                }
                if (counter == q1Idx - 1 && dq1) {
                    q1 = val;
                }
                if (counter == q1Idx || (counter == 0 && st.size() <= 3)) {
                    if (dq1) {
                        q1 = (q1 + val) / 2.0;
                    } else {
                        q1 = val;
                    }
                }
                if (counter == medianIdx - 1 && dMedian) {
                    median = val;
                }
                if (counter == medianIdx) {
                    if (dMedian) {
                        median = (median + val) / 2;
                    } else {
                        median = val;
                    }
                }
                if (counter == q3Idx - 1 && dq3) {
                    q3 = val;
                }
                if (counter == q3Idx || (counter == st.size() - 1 && st.size() <= 3)) {
                    if (dq3) {
                        q3 = (q3 + val) / 2.0;
                    } else {
                        q3 = val;
                    }
                }
                counter++;
            }
            double iqr = q3 - q1;
            double lowerWhisker = min;
            double upperWhisker = max;
            double upperWhiskerFence = q3 + (1.5 * iqr);
            double lowerWhiskerFence = q1 - (1.5 * iqr);
            double lowerFence = q1 - (3 * iqr);
            double upperFence = q3 + (3 * iqr);
            for (DataRow row : st) {
                double value = ((DoubleValue) row.getCell(0)).getDoubleValue();
                String rowKey = row.getKey().getString();
                if (value < lowerFence) {
                    extremeOutliers.add(new Outlier(value, rowKey));
                } else if (value < lowerWhiskerFence) {
                    mildOutliers.add(new Outlier(value, rowKey));
                } else if (lowerWhisker < lowerWhiskerFence && value >= lowerWhiskerFence) {
                    lowerWhisker = value;
                } else if (value <= upperWhiskerFence) {
                    upperWhisker = value;
                } else if (value > upperFence) {
                    extremeOutliers.add(new Outlier(value, rowKey));
                } else if (value > upperWhiskerFence) {
                    mildOutliers.add(new Outlier(value, rowKey));
                }
            }
            colStats.put(catName, new BoxplotStatistics(mildOutliers, extremeOutliers, min, max, lowerWhisker, q1, median, q3, upperWhisker));
        }
        statsMap.put(colName, colStats);
        // missing values part
        String[] excludedColClasses = excludedColClassesList.toArray(new String[excludedColClassesList.size()]);
        excludedClasses.put(colName, excludedColClasses);
    }
    return statsMap;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) ExecutionContext(org.knime.core.node.ExecutionContext) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) MissingCell(org.knime.core.data.MissingCell) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 12 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class DecisionTreeNode method getWinnerAndClasscounts.

/**
 * Classify a new pattern given as a row of values. Returns the winning
 * class and the class counts of all classes.
 *
 * @param row input pattern
 * @param spec the corresponding table spec
 * @return class of pattern the decision tree predicts
 * @throws Exception if something went wrong (unknown attribute for example)
 */
public final Pair<DataCell, LinkedHashMap<DataCell, Double>> getWinnerAndClasscounts(final DataRow row, final DataTableSpec spec) throws Exception {
    DecisionTreeNode winnerNode = getWinnerNode(row, spec);
    LinkedHashMap<DataCell, Double> classCounts;
    DataCell winner = null;
    if (winnerNode == null) {
        // Missing value encountered, return null and empty map
        classCounts = new LinkedHashMap<DataCell, Double>();
        winner = new MissingCell("Error in decision tree prediction");
    } else if (winnerNode instanceof DecisionTreeNodeSplit) {
        // We stopped before reaching a leaf (eg due to a missing value)
        classCounts = ((DecisionTreeNodeSplit) winnerNode).getNodeClassWeights();
        double max = 0;
        for (DataCell key : classCounts.keySet()) {
            Double val = classCounts.get(key);
            if (val != null && val > max) {
                max = val;
                winner = key;
            }
        }
    } else {
        // We reached a leaf node, return its score and class counts
        DecisionTreeNodeLeaf leaf = (DecisionTreeNodeLeaf) winnerNode;
        winner = leaf.getMajorityClass();
        classCounts = leaf.getClassCounts();
    }
    return new Pair<DataCell, LinkedHashMap<DataCell, Double>>(winner, classCounts);
}
Also used : MissingCell(org.knime.core.data.MissingCell) DataCell(org.knime.core.data.DataCell) Pair(org.knime.core.util.Pair)

Example 13 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class DBRowIteratorImpl method next.

/**
 * {@inheritDoc}
 */
@Override
public DataRow next() {
    DataCell[] cells = new DataCell[m_spec.getNumColumns()];
    for (int i = 0; i < cells.length; i++) {
        DataType type = m_spec.getColumnSpec(i).getType();
        int dbType = Types.NULL;
        final DataCell cell;
        try {
            dbType = m_result.getMetaData().getColumnType(i + 1);
            if (type.isCompatible(BooleanValue.class)) {
                switch(dbType) {
                    // all types that can be interpreted as boolean
                    case Types.BIT:
                    case Types.BOOLEAN:
                        cell = readBoolean(i);
                        break;
                    default:
                        cell = readBoolean(i);
                }
            } else if (type.isCompatible(IntValue.class)) {
                switch(dbType) {
                    // all types that can be interpreted as integer
                    case Types.TINYINT:
                        cell = readByte(i);
                        break;
                    case Types.SMALLINT:
                        cell = readShort(i);
                        break;
                    case Types.INTEGER:
                        cell = readInt(i);
                        break;
                    default:
                        cell = readInt(i);
                }
            } else if (type.isCompatible(LongValue.class)) {
                switch(dbType) {
                    // all types that can be interpreted as long
                    case Types.BIGINT:
                        cell = readLong(i);
                        break;
                    default:
                        cell = readLong(i);
                }
            } else if (type.isCompatible(DoubleValue.class)) {
                switch(dbType) {
                    // all types that can be interpreted as double
                    case Types.REAL:
                        cell = readFloat(i);
                        break;
                    default:
                        cell = readDouble(i);
                }
            } else if (type.isCompatible(DateAndTimeValue.class)) {
                switch(dbType) {
                    case Types.DATE:
                        cell = readDate(i);
                        break;
                    case Types.TIME:
                        cell = readTime(i);
                        break;
                    case Types.TIMESTAMP:
                        cell = readTimestamp(i);
                        break;
                    default:
                        cell = readString(i);
                }
            } else if (type.isCompatible(BinaryObjectDataValue.class)) {
                switch(dbType) {
                    case Types.BLOB:
                        DataCell c = null;
                        try {
                            c = readBlob(i);
                        } catch (SQLException ex) {
                            // probably not supported (e.g. SQLite), therefore try another method
                            c = readBytesAsBLOB(i);
                        }
                        cell = c;
                        break;
                    case Types.LONGVARCHAR:
                    case Types.LONGNVARCHAR:
                        cell = readAsciiStream(i);
                        break;
                    case Types.BINARY:
                    case Types.LONGVARBINARY:
                    case Types.VARBINARY:
                        cell = readBinaryStream(i);
                        break;
                    default:
                        cell = readString(i);
                }
            } else {
                switch(dbType) {
                    case Types.CLOB:
                        cell = readClob(i);
                        break;
                    case Types.ARRAY:
                        cell = readArray(i);
                        break;
                    case Types.CHAR:
                    case Types.VARCHAR:
                    case Types.LONGVARCHAR:
                        cell = readString(i);
                        break;
                    case Types.VARBINARY:
                        cell = readBytesAsString(i);
                        break;
                    case Types.REF:
                        cell = readRef(i);
                        break;
                    case Types.NCHAR:
                    case Types.NVARCHAR:
                    case Types.LONGNVARCHAR:
                        cell = readNString(i);
                        break;
                    case Types.NCLOB:
                        cell = readNClob(i);
                        break;
                    case Types.DATALINK:
                        cell = readURL(i);
                        break;
                    case Types.STRUCT:
                    case Types.JAVA_OBJECT:
                        cell = readObject(i);
                        break;
                    default:
                        cell = readObject(i);
                        break;
                }
            }
            // finally set the new cell into the array of cells
            cells[i] = cell;
        } catch (SQLException sqle) {
            handlerException("SQL Exception reading Object of type \"" + dbType + "\": ", sqle);
            cells[i] = new MissingCell(sqle.getMessage());
        } catch (IOException ioe) {
            handlerException("I/O Exception reading Object of type \"" + dbType + "\": ", ioe);
            cells[i] = new MissingCell(ioe.getMessage());
        }
    }
    long rowId;
    try {
        rowId = m_result.getRow();
        // Bug 2729: ResultSet#getRow return 0 if there is no row id
        if (rowId <= 0 || !m_useDbRowId) {
            // use row counter
            rowId = m_rowCounter;
        } else if (m_rowIdsStartWithZero) {
            // first row in SQL always is 1, KNIME starts with 0
            rowId--;
        }
    } catch (SQLException sqle) {
        // ignored: use m_rowCounter
        rowId = m_rowCounter;
    }
    m_rowCounter++;
    return new DefaultRow(RowKey.createRowKey(rowId), cells);
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) SQLException(java.sql.SQLException) MissingCell(org.knime.core.data.MissingCell) BinaryObjectDataValue(org.knime.core.data.blob.BinaryObjectDataValue) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) IOException(java.io.IOException) DefaultRow(org.knime.core.data.def.DefaultRow) IntValue(org.knime.core.data.IntValue)

Example 14 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class DoubleMovingAverageMissingCellHandler method getCell.

/**
 * {@inheritDoc}
 */
@Override
public DataCell getCell(final RowKey key, final DataColumnWindow window) {
    double sum = 0.0;
    int count = 0;
    for (int i = -m_lookbehindSize.getIntValue(); i < m_lookaheadSize.getIntValue() + 1; i++) {
        DataCell cell = window.getNthCell(i);
        if (cell != null && !cell.isMissing()) {
            sum += ((DoubleValue) cell).getDoubleValue();
            count++;
        }
    }
    if (count > 0) {
        return new DoubleCell(sum / count);
    } else {
        return new MissingCell("No cells for average calculation available");
    }
}
Also used : MissingCell(org.knime.core.data.MissingCell) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell)

Example 15 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class TestDataGenerator method createNumericAttributeColumnData.

public TreeOrdinaryNumericColumnData createNumericAttributeColumnData(final double[] values, final String name, final int attributeIndex) {
    DataColumnSpec colSpec = new DataColumnSpecCreator(name, DoubleCell.TYPE).createSpec();
    TreeOrdinaryNumericColumnDataCreator colCreator = new TreeOrdinaryNumericColumnDataCreator(colSpec);
    for (int i = 0; i < values.length; i++) {
        final RowKey key = RowKey.createRowKey((long) i);
        if (Double.isNaN(values[i])) {
            colCreator.add(key, new MissingCell(null));
        } else {
            colCreator.add(key, new DoubleCell(values[i]));
        }
    }
    TreeOrdinaryNumericColumnData col = colCreator.createColumnData(0, m_config);
    col.getMetaData().setAttributeIndex(attributeIndex);
    return col;
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) MissingCell(org.knime.core.data.MissingCell) DoubleCell(org.knime.core.data.def.DoubleCell)

Aggregations

MissingCell (org.knime.core.data.MissingCell)18 DataCell (org.knime.core.data.DataCell)13 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 DoubleCell (org.knime.core.data.def.DoubleCell)6 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)5 DefaultRow (org.knime.core.data.def.DefaultRow)5 StringCell (org.knime.core.data.def.StringCell)5 ArrayList (java.util.ArrayList)4 DataRow (org.knime.core.data.DataRow)4 RowKey (org.knime.core.data.RowKey)4 IntCell (org.knime.core.data.def.IntCell)4 LinkedHashMap (java.util.LinkedHashMap)3 DataType (org.knime.core.data.DataType)3 DoubleValue (org.knime.core.data.DoubleValue)3 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)3 IOException (java.io.IOException)2 Test (org.junit.Test)2 BinaryObjectDataCell (org.knime.core.data.blob.BinaryObjectDataCell)2 ListCell (org.knime.core.data.collection.ListCell)2 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)2