Search in sources :

Example 26 with MiningField

use of org.dmg.pmml.MiningField in project drools by kiegroup.

the class KiePMMLMiningFieldInstanceFactoryTest method getKiePMMLMiningField.

@Test
public void getKiePMMLMiningField() {
    DataField dataField = getRandomDataField();
    MiningField toConvert = getRandomMiningField(dataField);
    KiePMMLMiningField toVerify = KiePMMLMiningFieldInstanceFactory.getKiePMMLMiningField(toConvert, dataField);
    commonVerifyKiePMMLMiningField(toVerify, toConvert, dataField);
}
Also used : InstanceFactoriesTestCommon.commonVerifyKiePMMLMiningField(org.kie.pmml.compiler.commons.factories.InstanceFactoriesTestCommon.commonVerifyKiePMMLMiningField) KiePMMLMiningField(org.kie.pmml.commons.model.KiePMMLMiningField) PMMLModelTestUtils.getRandomMiningField(org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getRandomMiningField) MiningField(org.dmg.pmml.MiningField) DataField(org.dmg.pmml.DataField) PMMLModelTestUtils.getRandomDataField(org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getRandomDataField) InstanceFactoriesTestCommon.commonVerifyKiePMMLMiningField(org.kie.pmml.compiler.commons.factories.InstanceFactoriesTestCommon.commonVerifyKiePMMLMiningField) KiePMMLMiningField(org.kie.pmml.commons.model.KiePMMLMiningField) Test(org.junit.Test)

Example 27 with MiningField

use of org.dmg.pmml.MiningField in project drools by kiegroup.

the class KiePMMLUtil method populateMissingMiningTargetField.

/**
 * Method to populate <code>MiningSchema</code> with a n ad-hoc created target <code>MiningField</code>.
 * It also populate the given <code>List&lt;DataField&gt;</code> with the relative <code>DataField</code>.
 * This method has to be called <b>after</b> the model name has been set
 * @param model
 * @param dataFields
 */
static void populateMissingMiningTargetField(final Model model, final List<DataField> dataFields) {
    List<MiningField> miningTargetFields = getMiningTargetFields(model.getMiningSchema().getMiningFields());
    if (miningTargetFields.isEmpty()) {
        Optional<DataField> targetDataField = getTargetDataField(model);
        targetDataField.ifPresent(dataField -> {
            dataFields.add(dataField);
            MiningField targetMiningField = getTargetMiningField(dataField);
            model.getMiningSchema().addMiningFields(targetMiningField);
            correctTargetFields(targetMiningField, model.getTargets());
        });
    }
}
Also used : MiningField(org.dmg.pmml.MiningField) DataField(org.dmg.pmml.DataField)

Example 28 with MiningField

use of org.dmg.pmml.MiningField in project jpmml-sparkml by jpmml.

the class ConverterUtil method toPMML.

public static PMML toPMML(StructType schema, PipelineModel pipelineModel) {
    checkVersion();
    SparkMLEncoder encoder = new SparkMLEncoder(schema);
    List<org.dmg.pmml.Model> models = new ArrayList<>();
    Iterable<Transformer> transformers = getTransformers(pipelineModel);
    for (Transformer transformer : transformers) {
        TransformerConverter<?> converter = ConverterUtil.createConverter(transformer);
        if (converter instanceof FeatureConverter) {
            FeatureConverter<?> featureConverter = (FeatureConverter<?>) converter;
            featureConverter.registerFeatures(encoder);
        } else if (converter instanceof ModelConverter) {
            ModelConverter<?> modelConverter = (ModelConverter<?>) converter;
            org.dmg.pmml.Model model = modelConverter.registerModel(encoder);
            models.add(model);
        } else {
            throw new IllegalArgumentException("Expected a " + FeatureConverter.class.getName() + " or " + ModelConverter.class.getName() + " instance, got " + converter);
        }
    }
    org.dmg.pmml.Model rootModel;
    if (models.size() == 1) {
        rootModel = Iterables.getOnlyElement(models);
    } else if (models.size() > 1) {
        List<MiningField> targetMiningFields = new ArrayList<>();
        for (org.dmg.pmml.Model model : models) {
            MiningSchema miningSchema = model.getMiningSchema();
            List<MiningField> miningFields = miningSchema.getMiningFields();
            for (MiningField miningField : miningFields) {
                MiningField.UsageType usageType = miningField.getUsageType();
                switch(usageType) {
                    case PREDICTED:
                    case TARGET:
                        targetMiningFields.add(miningField);
                        break;
                    default:
                        break;
                }
            }
        }
        MiningSchema miningSchema = new MiningSchema(targetMiningFields);
        MiningModel miningModel = MiningModelUtil.createModelChain(models, new Schema(null, Collections.<Feature>emptyList())).setMiningSchema(miningSchema);
        rootModel = miningModel;
    } else {
        throw new IllegalArgumentException("Expected a pipeline with one or more models, got a pipeline with zero models");
    }
    PMML pmml = encoder.encodePMML(rootModel);
    return pmml;
}
Also used : MiningField(org.dmg.pmml.MiningField) Transformer(org.apache.spark.ml.Transformer) MiningSchema(org.dmg.pmml.MiningSchema) Schema(org.jpmml.converter.Schema) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) MiningSchema(org.dmg.pmml.MiningSchema) MiningModel(org.dmg.pmml.mining.MiningModel) MiningModel(org.dmg.pmml.mining.MiningModel) PipelineModel(org.apache.spark.ml.PipelineModel) TrainValidationSplitModel(org.apache.spark.ml.tuning.TrainValidationSplitModel) CrossValidatorModel(org.apache.spark.ml.tuning.CrossValidatorModel) PMML(org.dmg.pmml.PMML) ArrayList(java.util.ArrayList) List(java.util.List)

Example 29 with MiningField

use of org.dmg.pmml.MiningField in project drools by kiegroup.

the class KiePMMLUtil method populateMissingOutputFieldDataType.

/**
 * Method to populate the <b>dataType</b> property of <code>OutputField</code>s.
 * Such property was optional until 4.4.1 spec
 * @param toPopulate
 * @param miningFields
 * @param dataFields
 */
static void populateMissingOutputFieldDataType(List<OutputField> toPopulate, List<MiningField> miningFields, List<DataField> dataFields) {
    // partial implementation to fix missing "dataType" inside OutputField; "dataType" became mandatory only in 4.4.1 version
    List<MiningField> targetFields = getMiningTargetFields(miningFields);
    toPopulate.stream().filter(outputField -> outputField.getDataType() == null).forEach(outputField -> {
        MiningField referencedField = null;
        if (outputField.getTargetField() != null) {
            referencedField = targetFields.stream().filter(targetField -> outputField.getTargetField().equals(targetField.getName())).findFirst().orElseThrow(() -> new KiePMMLException("Failed to find a target field for OutputField " + outputField.getName().getValue()));
        }
        if (referencedField == null && (outputField.getResultFeature() == null || outputField.getResultFeature().equals(ResultFeature.PREDICTED_VALUE))) {
            // default predictedValue
            referencedField = targetFields.stream().findFirst().orElse(// It is allowed to not have any "target" field inside MiningSchema
            null);
        }
        if (referencedField == null && ResultFeature.PROBABILITY.equals(outputField.getResultFeature())) {
            // we set the "dataType" to "double" because outputField is a "probability", we may return
            outputField.setDataType(DataType.DOUBLE);
            return;
        }
        if (referencedField != null) {
            FieldName targetFieldName = referencedField.getName();
            DataField dataField = dataFields.stream().filter(df -> df.getName().equals(targetFieldName)).findFirst().orElseThrow(() -> new KiePMMLException("Failed to find a DataField field for " + "MiningField " + targetFieldName.toString()));
            outputField.setDataType(dataField.getDataType());
        }
    });
}
Also used : PMML(org.dmg.pmml.PMML) Model(org.dmg.pmml.Model) OutputField(org.dmg.pmml.OutputField) Targets(org.dmg.pmml.Targets) DataType(org.dmg.pmml.DataType) ResultFeature(org.dmg.pmml.ResultFeature) MiningSchema(org.dmg.pmml.MiningSchema) Collectors(java.util.stream.Collectors) JAXBException(javax.xml.bind.JAXBException) Target(org.dmg.pmml.Target) DataField(org.dmg.pmml.DataField) FieldName(org.dmg.pmml.FieldName) OpType(org.dmg.pmml.OpType) List(java.util.List) Segment(org.dmg.pmml.mining.Segment) ByteArrayInputStream(java.io.ByteArrayInputStream) SAXException(org.xml.sax.SAXException) Optional(java.util.Optional) MiningFunction(org.dmg.pmml.MiningFunction) MiningField(org.dmg.pmml.MiningField) KiePMMLException(org.kie.pmml.api.exceptions.KiePMMLException) MathContext(org.dmg.pmml.MathContext) InputStream(java.io.InputStream) MiningModel(org.dmg.pmml.mining.MiningModel) MiningField(org.dmg.pmml.MiningField) DataField(org.dmg.pmml.DataField) KiePMMLException(org.kie.pmml.api.exceptions.KiePMMLException) FieldName(org.dmg.pmml.FieldName)

Example 30 with MiningField

use of org.dmg.pmml.MiningField in project drools by kiegroup.

the class KiePMMLUtil method getTargetMiningField.

/**
 * Returns a model-specific <b>target</b> <code>MiningField</code>
 * @param dataField
 * @return
 */
static MiningField getTargetMiningField(final DataField dataField) {
    MiningField toReturn = new MiningField();
    toReturn.setName(dataField.getName());
    toReturn.setUsageType(MiningField.UsageType.TARGET);
    return toReturn;
}
Also used : MiningField(org.dmg.pmml.MiningField)

Aggregations

MiningField (org.dmg.pmml.MiningField)59 DataField (org.dmg.pmml.DataField)40 Test (org.junit.Test)39 MiningSchema (org.dmg.pmml.MiningSchema)33 DataDictionary (org.dmg.pmml.DataDictionary)25 RegressionModel (org.dmg.pmml.regression.RegressionModel)24 Model (org.dmg.pmml.Model)22 PMMLModelTestUtils.getRandomDataField (org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getRandomDataField)20 PMML (org.dmg.pmml.PMML)18 PMMLModelTestUtils.getRandomMiningField (org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getRandomMiningField)18 PMMLModelTestUtils.getMiningField (org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getMiningField)17 PMMLModelTestUtils.getDataField (org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getDataField)16 OutputField (org.dmg.pmml.OutputField)15 CommonTestingUtils.getFieldsFromDataDictionary (org.kie.pmml.compiler.api.CommonTestingUtils.getFieldsFromDataDictionary)15 FieldName (org.dmg.pmml.FieldName)12 Target (org.dmg.pmml.Target)11 Targets (org.dmg.pmml.Targets)11 OP_TYPE (org.kie.pmml.api.enums.OP_TYPE)11 HashMap (java.util.HashMap)10 List (java.util.List)10