Search in sources :

Example 1 with TrueRowFilter

use of org.knime.base.node.preproc.filter.row.rowfilter.TrueRowFilter in project knime-core by knime.

the class Sampler method createRandomNumberRowFilter.

/*
     * Creates random number row filter that samples count rows from a table
     * with overall allCount rows.
     */
private static final IRowFilter createRandomNumberRowFilter(final long count, final long allCount, final Random rand) {
    Random random = rand != null ? rand : new Random();
    if (allCount <= count) {
        return new TrueRowFilter();
    }
    if (allCount <= Integer.MAX_VALUE) {
        BitSet bitset = new BitSet((int) allCount);
        // hm, I'm sure there is a better way to draw arbitrary bits
        int[] vals = new int[(int) allCount];
        for (int i = 0; i < vals.length; i++) {
            vals[i] = i;
        }
        for (int i = vals.length; --i >= 0; ) {
            int swapIndex = random.nextInt(i + 1);
            int swap = vals[swapIndex];
            vals[swapIndex] = vals[i];
            vals[i] = swap;
        }
        for (int i = 0; i < count; i++) {
            bitset.set(vals[i]);
        }
        return new RandomNumberRowFilter(bitset);
    } else {
        // Sampling based on Fan's selection rejection algorithm (1962)
        return new AbstractRowFilter() {

            private long m_i;

            @Override
            public boolean matches(final DataRow row, final long rowIndex) throws EndOfTableException, IncludeFromNowOn {
                double p = (count - m_i) / (double) (allCount - rowIndex + 1);
                if (random.nextDouble() <= p) {
                    m_i++;
                    return true;
                }
                return false;
            }

            @Override
            protected void saveSettings(final NodeSettingsWO cfg) {
                throw new UnsupportedOperationException();
            }

            @Override
            public void loadSettingsFrom(final NodeSettingsRO cfg) throws InvalidSettingsException {
                throw new UnsupportedOperationException();
            }

            @Override
            public DataTableSpec configure(final DataTableSpec inSpec) throws InvalidSettingsException {
                throw new UnsupportedOperationException();
            }
        };
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) BitSet(java.util.BitSet) DataRow(org.knime.core.data.DataRow) Random(java.util.Random) AbstractRowFilter(org.knime.base.node.preproc.filter.row.rowfilter.AbstractRowFilter) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) TrueRowFilter(org.knime.base.node.preproc.filter.row.rowfilter.TrueRowFilter)

Aggregations

BitSet (java.util.BitSet)1 Random (java.util.Random)1 AbstractRowFilter (org.knime.base.node.preproc.filter.row.rowfilter.AbstractRowFilter)1 TrueRowFilter (org.knime.base.node.preproc.filter.row.rowfilter.TrueRowFilter)1 DataRow (org.knime.core.data.DataRow)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 NodeSettingsRO (org.knime.core.node.NodeSettingsRO)1 NodeSettingsWO (org.knime.core.node.NodeSettingsWO)1