Search in sources :

Example 1 with Itemset

use of org.dmg.pmml.association.Itemset in project jpmml-sparkml by jpmml.

the class FPGrowthModelConverter method ensureItemset.

private static Itemset ensureItemset(Feature feature, List<String> values, Map<List<String>, Itemset> itemsets, Map<String, Item> items) {
    Itemset itemset = itemsets.get(values);
    if (itemset == null) {
        itemset = new Itemset(String.valueOf(itemsets.size() + 1));
        for (String value : values) {
            Item item = items.get(value);
            if (item == null) {
                item = new Item(String.valueOf(items.size() + 1), value).setField(feature.getName());
                items.put(value, item);
            }
            itemset.addItemRefs(new ItemRef(item.getId()));
        }
        List<ItemRef> itemRefs = itemset.getItemRefs();
        if (itemRefs.size() > 1) {
            Comparator<ItemRef> comparator = new Comparator<ItemRef>() {

                @Override
                public int compare(ItemRef left, ItemRef right) {
                    int leftId = Integer.parseInt(left.getItemRef());
                    int rightId = Integer.parseInt(right.getItemRef());
                    return Integer.compare(leftId, rightId);
                }
            };
            Collections.sort(itemRefs, comparator);
        }
        itemsets.put(values, itemset);
    }
    return itemset;
}
Also used : Item(org.dmg.pmml.association.Item) Itemset(org.dmg.pmml.association.Itemset) ItemRef(org.dmg.pmml.association.ItemRef) Comparator(java.util.Comparator)

Example 2 with Itemset

use of org.dmg.pmml.association.Itemset in project jpmml-sparkml by jpmml.

the class FPGrowthModelConverter method encodeModel.

@Override
public AssociationModel encodeModel(Schema schema) {
    FPGrowthModel model = getTransformer();
    List<? extends Feature> features = schema.getFeatures();
    SchemaUtil.checkSize(1, features);
    Feature feature = features.get(0);
    Map<String, Item> items = new LinkedHashMap<>();
    Map<List<String>, Itemset> itemsets = new LinkedHashMap<>();
    List<AssociationRule> associationRules = new ArrayList<>();
    List<Row> associationRuleRows = (model.associationRules()).collectAsList();
    for (Row associationRuleRow : associationRuleRows) {
        List<String> antecedent = JavaConversions.seqAsJavaList((Seq) associationRuleRow.apply(0));
        List<String> consequent = JavaConversions.seqAsJavaList((Seq) associationRuleRow.apply(1));
        Double confidence = (Double) associationRuleRow.apply(2);
        // XXX
        Double lift = 0d;
        Double support = 0d;
        Itemset antecedentItemset = ensureItemset(feature, antecedent, itemsets, items);
        Itemset consequentItemset = ensureItemset(feature, consequent, itemsets, items);
        AssociationRule associationRule = new AssociationRule().setAntecedent(antecedentItemset.getId()).setConsequent(consequentItemset.getId());
        associationRule = associationRule.setConfidence(confidence).setLift(lift).setSupport(support);
        associationRules.add(associationRule);
    }
    // XXX
    int numberOfTransactions = 0;
    MiningField transactionMiningField = ModelUtil.createMiningField(FieldName.create("transaction"), MiningField.UsageType.GROUP);
    MiningSchema miningSchema = new MiningSchema().addMiningFields(transactionMiningField);
    AssociationModel associationModel = new AssociationModel(MiningFunction.ASSOCIATION_RULES, numberOfTransactions, model.getMinSupport(), model.getMinConfidence(), items.size(), itemsets.size(), associationRules.size(), miningSchema);
    (associationModel.getItems()).addAll(items.values());
    (associationModel.getItemsets()).addAll(itemsets.values());
    (associationModel.getAssociationRules()).addAll(associationRules);
    return associationModel;
}
Also used : MiningField(org.dmg.pmml.MiningField) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ItemSetFeature(org.jpmml.sparkml.ItemSetFeature) LinkedHashMap(java.util.LinkedHashMap) Item(org.dmg.pmml.association.Item) AssociationRule(org.dmg.pmml.association.AssociationRule) Itemset(org.dmg.pmml.association.Itemset) MiningSchema(org.dmg.pmml.MiningSchema) FPGrowthModel(org.apache.spark.ml.fpm.FPGrowthModel) ArrayList(java.util.ArrayList) List(java.util.List) Row(org.apache.spark.sql.Row) AssociationModel(org.dmg.pmml.association.AssociationModel)

Aggregations

Item (org.dmg.pmml.association.Item)2 Itemset (org.dmg.pmml.association.Itemset)2 ArrayList (java.util.ArrayList)1 Comparator (java.util.Comparator)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 FPGrowthModel (org.apache.spark.ml.fpm.FPGrowthModel)1 Row (org.apache.spark.sql.Row)1 MiningField (org.dmg.pmml.MiningField)1 MiningSchema (org.dmg.pmml.MiningSchema)1 AssociationModel (org.dmg.pmml.association.AssociationModel)1 AssociationRule (org.dmg.pmml.association.AssociationRule)1 ItemRef (org.dmg.pmml.association.ItemRef)1 Feature (org.jpmml.converter.Feature)1 ItemSetFeature (org.jpmml.sparkml.ItemSetFeature)1