Search in sources :

Example 1 with FrequentItemSet

use of org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet in project knime-core by knime.

the class SubgroupMinerModel2 method createAssociationRulesOutput.

private BufferedDataTable createAssociationRulesOutput(final DataTableSpec inputSpec, final ExecutionContext exec, final AprioriAlgorithm apriori, final List<DataCell> nameMapping) {
    DataTableSpec outSpec = createAssociationRulesSpec(inputSpec);
    BufferedDataContainer ruleRows = exec.createDataContainer(outSpec);
    assert nameMapping != null;
    List<AssociationRule> associationRules = apriori.getAssociationRules(m_confidence.getDoubleValue());
    // for every association rule
    int rowKeyCounter = 0;
    for (AssociationRule r : associationRules) {
        // get the support
        double support = r.getSupport();
        // get the confidence
        double confidence = r.getConfidence();
        // get lift
        double lift = r.getLift();
        // get the antecedence (which is one item) -> cell
        FrequentItemSet antecedent = r.getAntecedent();
        // get the consequence
        FrequentItemSet consequent = r.getConsequent();
        DataCell[] allCells = new DataCell[6];
        allCells[0] = new DoubleCell(support);
        allCells[1] = new DoubleCell(confidence);
        allCells[2] = new DoubleCell(lift);
        // consequent is always only one item -> access with get(0) ok
        if (nameMapping.size() > consequent.getItems().get(0)) {
            allCells[3] = nameMapping.get(consequent.getItems().get(0));
        } else {
            allCells[3] = new StringCell("Item" + consequent.getItems().get(0));
        }
        allCells[4] = new StringCell("<---");
        Set<DataCell> allcells = new HashSet<DataCell>();
        for (int i = 0; i < antecedent.getItems().size() && i < m_maxItemSetLength.getIntValue() + 5; i++) {
            if (nameMapping.size() > antecedent.getItems().get(i)) {
                allcells.add(nameMapping.get(antecedent.getItems().get(i)));
            } else {
                allcells.add(new StringCell("Item" + antecedent.getItems().get(i)));
            }
        }
        allCells[5] = CollectionCellFactory.createSetCell(allcells);
        if (antecedent.getItems().size() > 0) {
            DataRow row = new DefaultRow("rule" + (rowKeyCounter++), allCells);
            ruleRows.addRowToTable(row);
        }
    }
    ruleRows.close();
    return ruleRows.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) FrequentItemSet(org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet) DataRow(org.knime.core.data.DataRow) AssociationRule(org.knime.base.node.mine.subgroupminer.freqitemset.AssociationRule) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) HashSet(java.util.HashSet)

Example 2 with FrequentItemSet

use of org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet in project knime-core by knime.

the class ArrayApriori method filterClosedItemsets.

private List<FrequentItemSet> filterClosedItemsets(final List<FrequentItemSet> completeList) {
    Collections.sort(completeList, new Comparator<FrequentItemSet>() {

        @Override
        public int compare(final FrequentItemSet s1, final FrequentItemSet s2) {
            if (s1.getSupport() == s2.getSupport()) {
                return s1.getItems().size() - s2.getItems().size();
            }
            return Double.compare(s1.getSupport(), s2.getSupport());
        }
    });
    FrequentItemSet[] array = new FrequentItemSet[completeList.size()];
    completeList.toArray(array);
    for (int outer = 0; outer < array.length; outer++) {
        FrequentItemSet underSuspicion = array[outer];
        underSuspicion.setClosed(true);
        for (int inner = outer + 1; inner < array.length; inner++) {
            FrequentItemSet next = array[inner];
            if (next.getSupport() == underSuspicion.getSupport()) {
                if (underSuspicion.isSubsetOf(next)) {
                    underSuspicion.setClosed(false);
                    break;
                }
            }
        }
    }
    List<FrequentItemSet> closedList = new LinkedList<FrequentItemSet>();
    for (int i = 0; i < array.length; i++) {
        if (array[i].isClosed()) {
            closedList.add(array[i]);
        }
    }
    return closedList;
}
Also used : FrequentItemSet(org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet) LinkedList(java.util.LinkedList)

Example 3 with FrequentItemSet

use of org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet in project knime-core by knime.

the class ArrayApriori method getAssociationRules.

/*
     * new idea for mining association rules: getFrequentitemsets -> have to do
     * the mapping again create possible candidates: for each item i in the set
     * s create the set without this item s' go down the tree for both s and s'
     * compute the confidence with getCounterFor(last item in s) /
     * getCounterFor(last item in s') if confidence is large enough - create
     * association rule (i, s', counterFor(s), confidence) store it
     */
/**
 * {@inheritDoc}
 */
@Override
public List<AssociationRule> getAssociationRules(final double confidence) {
    List<FrequentItemSet> frequentItemSets = getFrequentItemSets(FrequentItemSet.Type.CLOSED);
    List<AssociationRule> associationRules = new ArrayList<AssociationRule>();
    /*
         * handle always frequent items seperately: since they are always
         * frequent each association rule of the itemset -> item must have
         * confidence = 1 and support = dbsize go once through the list and
         * create an association rule for every item x, like
         * {alwaysFrequentItems\x}-> x
         */
    for (Integer i : m_alwaysFrequentItems) {
        List<Integer> withoutI = new ArrayList<Integer>(m_alwaysFrequentItems);
        withoutI.remove(i);
        List<Integer> iList = new ArrayList<Integer>(1);
        iList.add(i);
        AssociationRule rule = new AssociationRule(new FrequentItemSet(Integer.toString(m_idCounter++), withoutI, 1.0), new FrequentItemSet(Integer.toString(m_idCounter++), iList, 1.0), 1.0, 1.0, 1.0);
        associationRules.add(rule);
    }
    // for each itemset s in frequentitemsets
    for (FrequentItemSet s : frequentItemSets) {
        if (s.getItems().size() > 1) {
            double supportS = s.getSupport();
            for (Iterator<Integer> iterator = s.iterator(); iterator.hasNext(); ) {
                Integer i = iterator.next();
                List<Integer> sWithoutI = new ArrayList<Integer>(s.getItems());
                sWithoutI.remove(i);
                // now go down the tree for both s and s'
                double newSupport = getSupportFor(sWithoutI);
                // logger.debug("support(s'): " + newSupport);
                double c = supportS / newSupport;
                if (c >= confidence) {
                    // create association rule (i, s', counterFor(s),
                    // confidence)
                    // AssociationRule rule = new AssociationRule(i,
                    // sWithoutI, c, supportS);
                    List<Integer> iList = new ArrayList<Integer>();
                    iList.add(i);
                    AssociationRule rule = new AssociationRule(new FrequentItemSet(Integer.toString(m_idCounter++), sWithoutI, newSupport), new FrequentItemSet(Integer.toString(m_idCounter++), iList, getSupportFor(iList)), s.getSupport(), c, c / getSupportFor(iList));
                    associationRules.add(rule);
                // logger.debug("found association rule: " + rule);
                }
            }
        }
    }
    return associationRules;
}
Also used : AssociationRule(org.knime.base.node.mine.subgroupminer.freqitemset.AssociationRule) ArrayList(java.util.ArrayList) FrequentItemSet(org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet)

Example 4 with FrequentItemSet

use of org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet in project knime-core by knime.

the class TIDApriori method getAssociationRules.

/**
 * {@inheritDoc}
 */
@Override
public List<AssociationRule> getAssociationRules(final double confidence) {
    List<FrequentItemSet> frequentItemSets = getFrequentItemSets(FrequentItemSet.Type.CLOSED);
    List<AssociationRule> associationRules = new ArrayList<AssociationRule>();
    // handle always frequent items seperately
    List<Integer> alwaysFrequentIds = new ArrayList<Integer>();
    for (TIDItem item : m_alwaysFrequentItems) {
        alwaysFrequentIds.add(item.getId());
    }
    for (TIDItem item : m_alwaysFrequentItems) {
        // create for each item an association
        // rule with the rest of them in them in the antecendent
        // support = dbsize, confidence = 1
        List<Integer> rest = new ArrayList<Integer>(alwaysFrequentIds);
        // we want to remove the object with the value and not at position
        // thus we the argument needs to be an object!
        rest.remove(Integer.valueOf(item.getId()));
        List<Integer> itemList = new ArrayList<Integer>();
        itemList.add(item.getId());
        AssociationRule rule = new AssociationRule(new FrequentItemSet(Integer.toString(m_idCounter++), rest, 1.0), new FrequentItemSet(Integer.toString(m_idCounter++), itemList, 1.0), 1.0, 1.0, 1.0);
        associationRules.add(rule);
    }
    // for each itemset
    for (FrequentItemSet s : frequentItemSets) {
        if (s.getItems().size() > 1) {
            // for each item
            for (Integer i : s.getItems()) {
                // create the set without the item
                List<Integer> sWithoutI = new ArrayList<Integer>(s.getItems());
                sWithoutI.remove(i);
                // create an empty TIDItemSet
                TIDItemSet itemSet = TIDItemSet.createEmptyTIDItemSet("" + m_idCounter, m_dbsize);
                for (Integer item : sWithoutI) {
                    int index = m_frequentItems.indexOf(new TIDItem(item));
                    TIDItem tidItem = m_frequentItems.get(index);
                    itemSet.addItem(tidItem);
                }
                double newSupport = itemSet.getSupport();
                double oldSupport = s.getSupport();
                double c = oldSupport / newSupport;
                if (c >= confidence) {
                    List<Integer> iList = new ArrayList<Integer>();
                    iList.add(i);
                    int index = m_frequentItems.indexOf(new TIDItem(i));
                    TIDItem tidItem = m_frequentItems.get(index);
                    if (tidItem == null) {
                    // TODO: what if ???
                    }
                    AssociationRule rule = new AssociationRule(new FrequentItemSet(Integer.toString(m_idCounter++), sWithoutI, newSupport), new FrequentItemSet(Integer.toString(m_idCounter++), iList, // TODO: support of single item
                    tidItem.getSupport()), s.getSupport(), c, c / tidItem.getSupport() * m_dbsize);
                    associationRules.add(rule);
                }
            }
        }
    }
    return associationRules;
}
Also used : AssociationRule(org.knime.base.node.mine.subgroupminer.freqitemset.AssociationRule) ArrayList(java.util.ArrayList) FrequentItemSet(org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet) TIDFrequentItemSet(org.knime.base.node.mine.subgroupminer.freqitemset.TIDFrequentItemSet)

Example 5 with FrequentItemSet

use of org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet in project knime-core by knime.

the class TIDApriori method getFrequentItemSets.

/**
 * {@inheritDoc}
 */
@Override
public List<FrequentItemSet> getFrequentItemSets(final FrequentItemSet.Type type) {
    List<FrequentItemSet> freqSets = new ArrayList<FrequentItemSet>();
    List<Integer> tids = new ArrayList<Integer>();
    for (int i = 0; i < m_dbsize; i++) {
        tids.add(i);
    }
    for (TIDItem i : m_alwaysFrequentItems) {
        List<Integer> id = new ArrayList<Integer>();
        id.add(i.getId());
        TIDFrequentItemSet freqSet = new TIDFrequentItemSet(Integer.toString(m_idCounter++), id, 1.0, tids);
        freqSets.add(freqSet);
    }
    if (type.equals(FrequentItemSet.Type.FREE)) {
        getFrequentItemSets(m_prefixTree, freqSets);
    } else if (type.equals(FrequentItemSet.Type.CLOSED)) {
        freqSets.addAll(getClosedItemSets());
    } else if (type.equals(FrequentItemSet.Type.MAXIMAL)) {
        freqSets.addAll(getMaximalItemSets());
    }
    return freqSets;
}
Also used : TIDFrequentItemSet(org.knime.base.node.mine.subgroupminer.freqitemset.TIDFrequentItemSet) ArrayList(java.util.ArrayList) FrequentItemSet(org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet) TIDFrequentItemSet(org.knime.base.node.mine.subgroupminer.freqitemset.TIDFrequentItemSet)

Aggregations

FrequentItemSet (org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSet)14 ArrayList (java.util.ArrayList)7 AssociationRule (org.knime.base.node.mine.subgroupminer.freqitemset.AssociationRule)5 TIDFrequentItemSet (org.knime.base.node.mine.subgroupminer.freqitemset.TIDFrequentItemSet)4 DataTableSpec (org.knime.core.data.DataTableSpec)4 DataCell (org.knime.core.data.DataCell)3 DataRow (org.knime.core.data.DataRow)3 DefaultRow (org.knime.core.data.def.DefaultRow)3 DoubleCell (org.knime.core.data.def.DoubleCell)3 StringCell (org.knime.core.data.def.StringCell)3 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3 HashSet (java.util.HashSet)2 LinkedList (java.util.LinkedList)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 FrequentItemSetRow (org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSetRow)1 FrequentItemSetTable (org.knime.base.node.mine.subgroupminer.freqitemset.FrequentItemSetTable)1 RowKey (org.knime.core.data.RowKey)1 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)1 SAXException (org.xml.sax.SAXException)1 AttributesImpl (org.xml.sax.helpers.AttributesImpl)1