Search in sources :

Example 1 with StatisticsTable

use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.

the class Normalizer method doDecimalScaling.

/**
 * Does the decimal scaling.
 *
 * @param exec an object to check for user cancelations. Can be
 *            <code>null</code>.
 * @throws CanceledExecutionException if user canceled
 * @return the normalized DataTable
 */
public AffineTransTable doDecimalScaling(final ExecutionMonitor exec) throws CanceledExecutionException {
    StatisticsTable st;
    if (m_table instanceof StatisticsTable) {
        st = (StatisticsTable) m_table;
    } else {
        st = new StatisticsTable(m_table, exec);
    }
    checkForMissVals(st);
    String[] includes = getNames();
    double[] max = st.getdoubleMax();
    double[] min = st.getdoubleMin();
    double[] scales = new double[m_colindices.length];
    double[] transforms = new double[m_colindices.length];
    double[] mins = new double[m_colindices.length];
    double[] maxs = new double[m_colindices.length];
    for (int i = 0; i < m_colindices.length; i++) {
        int trueIndex = m_colindices[i];
        double absMax = Math.abs(max[trueIndex]);
        double absMin = Math.abs(min[trueIndex]);
        double maxvalue = absMax > absMin ? absMax : absMin;
        int exp = 0;
        while (Math.abs(maxvalue) > 1) {
            maxvalue = maxvalue / 10;
            exp++;
        }
        scales[i] = 1.0 / Math.pow(10, exp);
        transforms[i] = 0.0;
        mins[i] = -1.0;
        maxs[i] = 1.0;
    }
    String summary = "Decimal Scaling normalization on " + includes.length + " column(s)";
    AffineTransConfiguration configuration = new AffineTransConfiguration(includes, scales, transforms, mins, maxs, summary);
    return new AffineTransTable(m_table, configuration);
}
Also used : StatisticsTable(org.knime.base.data.statistics.StatisticsTable)

Example 2 with StatisticsTable

use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.

the class MissingValueHandlingTable method createMissingValueHandlingTable.

// getColSetting(DataTableSpec, ColSetting[])
/**
 * Does missing value handling to the argument table given the col settings
 * in an array and also reports progress.
 *
 * @param table the table to do missing value handling on
 * @param colSettings the settings
 * @param exec for progress/cancel and to create the buffered data table
 * @param warningBuffer To which potential warning messages are added.
 * @return a cache table, cleaned up
 * @throws CanceledExecutionException if canceled
 */
public static BufferedDataTable createMissingValueHandlingTable(final DataTable table, final ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
    ColSetting[] colSetting;
    try {
        colSetting = getColSetting(table.getDataTableSpec(), colSettings, false);
    } catch (InvalidSettingsException ise) {
        LOGGER.coding("getColSetting method is not supposed to throw " + "an exception, ignoring settings", ise);
        DataTableSpec s = table.getDataTableSpec();
        colSetting = new ColSetting[s.getNumColumns()];
        for (int i = 0; i < s.getNumColumns(); i++) {
            colSetting[i] = new ColSetting(s.getColumnSpec(i));
            colSetting[i].setMethod(ColSetting.METHOD_NO_HANDLING);
        }
    }
    boolean needStatistics = false;
    int mostFrequentColCount = 0;
    for (int i = 0; i < colSetting.length; i++) {
        ColSetting c = colSetting[i];
        switch(c.getMethod()) {
            case ColSetting.METHOD_MOST_FREQUENT:
                mostFrequentColCount++;
            case ColSetting.METHOD_MAX:
            case ColSetting.METHOD_MIN:
            case ColSetting.METHOD_MEAN:
                needStatistics = true;
                break;
            default:
        }
    }
    int[] mostFrequentCols = new int[mostFrequentColCount];
    if (mostFrequentColCount > 0) {
        int index = 0;
        for (int i = 0; i < colSetting.length; i++) {
            ColSetting c = colSetting[i];
            switch(c.getMethod()) {
                case ColSetting.METHOD_MOST_FREQUENT:
                    mostFrequentCols[index++] = i;
                    break;
                default:
            }
        }
    }
    DataTable t;
    ExecutionMonitor e;
    if (needStatistics && !(table instanceof StatisticsTable)) {
        // for creating statistics table
        ExecutionMonitor subExec = exec.createSubProgress(0.5);
        t = new MyStatisticsTable(table, subExec, mostFrequentCols);
        if (((MyStatisticsTable) t).m_warningMessage != null) {
            warningBuffer.append(((MyStatisticsTable) t).m_warningMessage);
        }
        // for the iterator
        e = exec.createSubProgress(0.5);
    } else {
        t = table;
        e = exec;
    }
    MissingValueHandlingTable mvht = new MissingValueHandlingTable(t, colSetting);
    BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
    e.setMessage("Adding rows...");
    int count = 0;
    try {
        MissingValueHandlingTableIterator it = new MissingValueHandlingTableIterator(mvht, e);
        while (it.hasNext()) {
            DataRow next;
            next = it.next();
            e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
            container.addRowToTable(next);
            count++;
        }
    } catch (MissingValueHandlingTableIterator.RuntimeCanceledExecutionException rcee) {
        throw rcee.getCause();
    } finally {
        container.close();
    }
    return container.getTable();
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) StatisticsTable(org.knime.base.data.statistics.StatisticsTable) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 3 with StatisticsTable

use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.

the class LowVarFilterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    StatisticsTable statTable = new StatisticsTable(inData[0], exec);
    ArrayList<String> includes = new ArrayList<String>();
    DataTableSpec s = inData[0].getDataTableSpec();
    int colCount = s.getNumColumns();
    double threshold = m_varianceThreshold;
    HashSet<String> includesHash = new HashSet<String>(Arrays.asList(m_includedColumns));
    for (int i = 0; i < colCount; i++) {
        DataColumnSpec cs = s.getColumnSpec(i);
        if (!includesHash.contains(cs.getName()) || !cs.getType().isCompatible(DoubleValue.class) || statTable.getVariance(i) > threshold) {
            includes.add(cs.getName());
        }
    }
    int filteredOutCount = s.getNumColumns() - includes.size();
    LOGGER.info("Filtered out " + filteredOutCount + " column(s)");
    if (filteredOutCount == 0) {
        setWarningMessage("No columns were filtered out.");
    }
    ColumnRearranger rearranger = new ColumnRearranger(s);
    rearranger.keepOnly(includes.toArray(new String[includes.size()]));
    BufferedDataTable t = exec.createColumnRearrangeTable(inData[0], rearranger, exec);
    return new BufferedDataTable[] { t };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) StatisticsTable(org.knime.base.data.statistics.StatisticsTable) ArrayList(java.util.ArrayList) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 4 with StatisticsTable

use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.

the class StatisticsNodeModel method execute.

/**
 * Computes the statistics for the DataTable at the inport. Use the view on
 * this node to see them.
 *
 * @see org.knime.core.node.NodeModel
 *      #execute(BufferedDataTable[],ExecutionContext)
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    StatisticsTable statTable = new StatisticsTable(inData[0], exec);
    m_min = statTable.getdoubleMin();
    m_max = statTable.getdoubleMax();
    m_mean = statTable.getMean();
    m_stddev = statTable.getStandardDeviation();
    m_variance = statTable.getVariance();
    DataTableSpec inspec = inData[0].getDataTableSpec();
    m_columnNames = new String[inspec.getNumColumns()];
    int position = 0;
    for (DataColumnSpec colspec : inspec) {
        m_columnNames[position] = colspec.getName();
        position++;
    }
    return new BufferedDataTable[] {};
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) StatisticsTable(org.knime.base.data.statistics.StatisticsTable) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 5 with StatisticsTable

use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.

the class MissingValueHandling2Table method createMissingValueHandlingTable.

// getColSetting(DataTableSpec, ColSetting[])
/**
 * Does missing value handling to the argument table given the col settings
 * in an array and also reports progress.
 *
 * @param table the table to do missing value handling on
 * @param colSettings the settings
 * @param exec for progress/cancel and to create the buffered data table
 * @param warningBuffer To which potential warning messages are added.
 * @return a cache table, cleaned up
 * @throws CanceledExecutionException if canceled
 */
public static BufferedDataTable createMissingValueHandlingTable(final DataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
    MissingValueHandling2ColSetting[] colSetting;
    try {
        colSetting = getColSetting(table.getDataTableSpec(), colSettings, false);
    } catch (InvalidSettingsException ise) {
        LOGGER.coding("getColSetting method is not supposed to throw " + "an exception, ignoring settings", ise);
        DataTableSpec s = table.getDataTableSpec();
        colSetting = new MissingValueHandling2ColSetting[s.getNumColumns()];
        for (int i = 0; i < s.getNumColumns(); i++) {
            colSetting[i] = new MissingValueHandling2ColSetting(s.getColumnSpec(i));
            colSetting[i].setMethod(MissingValueHandling2ColSetting.METHOD_NO_HANDLING);
        }
    }
    boolean needStatistics = false;
    int mostFrequentColCount = 0;
    for (int i = 0; i < colSetting.length; i++) {
        MissingValueHandling2ColSetting c = colSetting[i];
        switch(c.getMethod()) {
            case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
                mostFrequentColCount++;
            case MissingValueHandling2ColSetting.METHOD_MAX:
            case MissingValueHandling2ColSetting.METHOD_MIN:
            case MissingValueHandling2ColSetting.METHOD_MEAN:
                needStatistics = true;
                break;
            default:
        }
    }
    int[] mostFrequentCols = new int[mostFrequentColCount];
    if (mostFrequentColCount > 0) {
        int index = 0;
        for (int i = 0; i < colSetting.length; i++) {
            MissingValueHandling2ColSetting c = colSetting[i];
            switch(c.getMethod()) {
                case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
                    mostFrequentCols[index++] = i;
                    break;
                default:
            }
        }
    }
    DataTable t;
    ExecutionMonitor e;
    if (needStatistics && !(table instanceof StatisticsTable)) {
        // for creating statistics table
        ExecutionMonitor subExec = exec.createSubProgress(0.5);
        t = new MyStatisticsTable(table, subExec, mostFrequentCols);
        if (((MyStatisticsTable) t).m_warningMessage != null) {
            warningBuffer.append(((MyStatisticsTable) t).m_warningMessage);
        }
        // for the iterator
        e = exec.createSubProgress(0.5);
    } else {
        t = table;
        e = exec;
    }
    MissingValueHandling2Table mvht = new MissingValueHandling2Table(t, colSetting);
    BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
    e.setMessage("Adding rows...");
    int count = 0;
    try {
        MissingValueHandling2TableIterator it = new MissingValueHandling2TableIterator(mvht, e);
        while (it.hasNext()) {
            DataRow next;
            next = it.next();
            e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
            container.addRowToTable(next);
            count++;
        }
    } catch (MissingValueHandling2TableIterator.RuntimeCanceledExecutionException rcee) {
        throw rcee.getCause();
    } finally {
        container.close();
    }
    return container.getTable();
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) StatisticsTable(org.knime.base.data.statistics.StatisticsTable) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Aggregations

StatisticsTable (org.knime.base.data.statistics.StatisticsTable)8 DataTableSpec (org.knime.core.data.DataTableSpec)6 BufferedDataTable (org.knime.core.node.BufferedDataTable)5 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)5 DataColumnSpec (org.knime.core.data.DataColumnSpec)4 DataRow (org.knime.core.data.DataRow)3 DataTable (org.knime.core.data.DataTable)3 DoubleValue (org.knime.core.data.DoubleValue)3 LinkedHashSet (java.util.LinkedHashSet)2 DataCell (org.knime.core.data.DataCell)2 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)2 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 FilterColumnTable (org.knime.base.data.filter.column.FilterColumnTable)1 Normalizer (org.knime.base.data.normalize.Normalizer)1 HalfDoubleMatrix (org.knime.base.util.HalfDoubleMatrix)1 DataType (org.knime.core.data.DataType)1