use of org.dmg.pmml.MiningSchemaDocument.MiningSchema in project knime-core by knime.
the class PMMLMiningSchemaTranslator method initializeFrom.
/**
* Initializes the mining schema translator based on a PMML document.
* See {@link PMMLTranslator#initializeFrom(PMMLDocument)}
* @param pmmlDoc the PMML document
*/
public void initializeFrom(final PMMLDocument pmmlDoc) {
Map<PMMLModelType, Integer> models = PMMLUtils.getNumberOfModels(pmmlDoc);
if (models.isEmpty()) {
LOGGER.warn("The PMML document contains no model. Hence no " + "mining schema could be found.");
return;
}
// retrieve the first models mining schema
MiningSchema miningSchema = PMMLUtils.getFirstMiningSchema(pmmlDoc, models.keySet().iterator().next().getXmlBeansType());
for (MiningField miningField : miningSchema.getMiningFieldArray()) {
if (miningField.isSetMissingValueReplacement()) {
LOGGER.warn("\"missingValueReplacement\" is not supported and " + "will be ignored. Skipping it");
}
if (miningField.isSetMissingValueTreatment()) {
LOGGER.warn("\"missingValueTreatment\" is not supported and " + "will be ignored. Skipping it");
}
if (miningField.isSetOutliers()) {
LOGGER.warn("\"outliers\" is not supported and " + "will be ignored. Skipping it");
}
checkInvalidValueTreatment(pmmlDoc, miningField);
String name = miningField.getName();
FIELDUSAGETYPE.Enum usageType = miningField.getUsageType();
if (FIELDUSAGETYPE.ACTIVE == usageType) {
m_learningFields.add(name);
} else if (FIELDUSAGETYPE.PREDICTED == usageType || FIELDUSAGETYPE.TARGET == usageType) {
m_targetFields.add(name);
}
}
}
use of org.dmg.pmml.MiningSchemaDocument.MiningSchema in project knime-core by knime.
the class PMMLPortObject method addModelTranslater.
/**
* Adds the model of the content translater to the PMML document.
* @param modelTranslator the model translator containing the model to be
* added
*/
public void addModelTranslater(final PMMLTranslator modelTranslator) {
SchemaType type = modelTranslator.exportTo(m_pmmlDoc, m_spec);
LocalTransformations localTransformations = moveDerivedFields(type);
/* Remove mining fields from mining schema that where created as a
* derived field. In KNIME the origin of columns is not distinguished
* and all columns are added to the mining schema. But in PMML this
* results in duplicate entries. Those columns should only appear once
* as derived field in the transformation dictionary or local
* transformations. */
Set<String> derivedFields = new HashSet<String>();
for (DerivedField derivedField : getDerivedFields()) {
derivedFields.add(derivedField.getName());
}
MiningSchema miningSchema = PMMLUtils.getFirstMiningSchema(m_pmmlDoc, type);
if (miningSchema == null) {
LOGGER.info("No mining schema found.");
return;
}
MiningField[] miningFieldArray = miningSchema.getMiningFieldArray();
List<MiningField> miningFields = new ArrayList<MiningField>(Arrays.asList(miningFieldArray));
Set<String> miningFieldNames = new HashSet<String>();
for (MiningField miningField : miningFieldArray) {
String miningFieldName = miningField.getName();
if (derivedFields.contains(miningFieldName)) {
LOGGER.debug("Removing field \"" + miningFieldName + "\" from MiningFields as it is a DerivedField.");
miningFields.remove(miningField);
} else {
miningFieldNames.add(miningFieldName);
}
}
/* According to the PMML Spec DerivedFields must ultimately refer back
* to active MiningFields of the model's MiningSchema. Therefore we
* have to add all referred DataFields to the MiningSchema. */
String fullPath = NAMESPACE_DECLARATION + "$this/pmml:DerivedField/*/@field" + "| $this/pmml:DerivedField//pmml:FieldColumnPair/@field";
XmlObject[] xmlDescendants = localTransformations.selectPath(fullPath);
Set<String> referencedFields = new LinkedHashSet<String>();
// collect all referred field names
for (XmlObject xo : xmlDescendants) {
XmlCursor xmlCursor = xo.newCursor();
referencedFields.add(xmlCursor.getTextValue());
xmlCursor.dispose();
}
for (String referencedField : referencedFields) {
if (!derivedFields.contains(referencedField) && !miningFieldNames.contains(referencedField)) {
/* Add them to the mining schema if they are not already
* contained there and if they don't refer to derived fields. */
MiningField miningField = MiningField.Factory.newInstance();
miningField.setName(referencedField);
miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
LOGGER.debug("Adding field \"" + referencedField + "\" to MiningSchema because it is referenced in " + "LocalTransformations.");
miningFields.add(miningField);
}
}
miningSchema.setMiningFieldArray(miningFields.toArray(new MiningField[0]));
}
use of org.dmg.pmml.MiningSchemaDocument.MiningSchema in project knime-core by knime.
the class PMMLMiningSchemaTranslator method writeMiningSchema.
/**
* Writes the MiningSchema based upon the fields of the passed
* {@link PMMLPortObjectSpec}.
*
* @param portSpec based upon this port object spec the mining schema is
* written
* @param model the PMML model element to write the mining schema to
*/
public static void writeMiningSchema(final PMMLPortObjectSpec portSpec, final XmlObject model) {
MiningSchema miningSchema = MiningSchema.Factory.newInstance();
// avoid duplicate entries
Set<String> learningNames = new HashSet<String>(portSpec.getLearningFields());
Set<String> targetNames = new HashSet<String>(portSpec.getTargetFields());
for (String colName : portSpec.getLearningFields()) {
if (!targetNames.contains(colName)) {
MiningField miningField = miningSchema.addNewMiningField();
miningField.setName(colName);
miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
// don't write usageType = active (is default)
}
}
// add all fields referenced in local transformations
for (String colName : portSpec.getPreprocessingFields()) {
if (!learningNames.contains(colName) && !targetNames.contains(colName)) {
MiningField miningField = miningSchema.addNewMiningField();
miningField.setName(colName);
miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
// don't write usageType = active (is default)
}
}
// target columns = predicted
for (String colName : portSpec.getTargetFields()) {
MiningField miningField = miningSchema.addNewMiningField();
miningField.setName(colName);
miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
miningField.setUsageType(FIELDUSAGETYPE.TARGET);
}
/* Unfortunately the PMML models have no common base class. Therefore
* a cast to the specific type is necessary for being able to add the
* mining schema. */
SchemaType type = model.schemaType();
if (AssociationModel.type.equals(type)) {
((AssociationModel) model).setMiningSchema(miningSchema);
} else if (ClusteringModel.type.equals(type)) {
((ClusteringModel) model).setMiningSchema(miningSchema);
} else if (GeneralRegressionModel.type.equals(type)) {
((GeneralRegressionModel) model).setMiningSchema(miningSchema);
} else if (MiningModel.type.equals(type)) {
((MiningModel) model).setMiningSchema(miningSchema);
} else if (NaiveBayesModel.type.equals(type)) {
((NaiveBayesModel) model).setMiningSchema(miningSchema);
} else if (NeuralNetwork.type.equals(type)) {
((NeuralNetwork) model).setMiningSchema(miningSchema);
} else if (RegressionModel.type.equals(type)) {
((RegressionModel) model).setMiningSchema(miningSchema);
} else if (RuleSetModel.type.equals(type)) {
((RuleSetModel) model).setMiningSchema(miningSchema);
} else if (SequenceModel.type.equals(type)) {
((SequenceModel) model).setMiningSchema(miningSchema);
} else if (SupportVectorMachineModel.type.equals(type)) {
((SupportVectorMachineModel) model).setMiningSchema(miningSchema);
} else if (TextModel.type.equals(type)) {
((TextModel) model).setMiningSchema(miningSchema);
} else if (TimeSeriesModel.type.equals(type)) {
((TimeSeriesModel) model).setMiningSchema(miningSchema);
} else if (TreeModel.type.equals(type)) {
((TreeModel) model).setMiningSchema(miningSchema);
} else if (NearestNeighborModel.type.equals(type)) {
((NearestNeighborModel) model).setMiningSchema(miningSchema);
}
}
use of org.dmg.pmml.MiningSchemaDocument.MiningSchema in project knime-core by knime.
the class DataColumnSpecFilterPMMLNodeModel method createPMMLOut.
private PMMLPortObject createPMMLOut(final PMMLPortObject pmmlIn, final DataTableSpec outSpec, final FilterResult res) throws XmlException {
StringBuffer warningBuffer = new StringBuffer();
if (pmmlIn == null) {
return new PMMLPortObject(createPMMLSpec(null, outSpec, res));
} else {
PMMLDocument pmmldoc;
try (LockedSupplier<Document> supplier = pmmlIn.getPMMLValue().getDocumentSupplier()) {
pmmldoc = PMMLDocument.Factory.parse(supplier.get());
}
// Inspect models to check if they use any excluded columns
List<PMMLModelWrapper> models = PMMLModelWrapper.getModelListFromPMMLDocument(pmmldoc);
for (PMMLModelWrapper model : models) {
MiningSchema ms = model.getMiningSchema();
for (MiningField mf : ms.getMiningFieldList()) {
if (isExcluded(mf.getName(), res)) {
if (warningBuffer.length() != 0) {
warningBuffer.append("\n");
}
warningBuffer.append(model.getModelType().name() + " uses excluded column " + mf.getName());
}
}
}
ArrayList<String> warningFields = new ArrayList<String>();
PMML pmml = pmmldoc.getPMML();
// Now check the transformations if they exist
if (pmml.getTransformationDictionary() != null) {
for (DerivedField df : pmml.getTransformationDictionary().getDerivedFieldList()) {
FieldRef fr = df.getFieldRef();
if (fr != null && isExcluded(fr.getField(), res)) {
warningFields.add(fr.getField());
}
Aggregate a = df.getAggregate();
if (a != null && isExcluded(a.getField(), res)) {
warningFields.add(a.getField());
}
Apply ap = df.getApply();
if (ap != null) {
for (FieldRef fieldRef : ap.getFieldRefList()) {
if (isExcluded(fieldRef.getField(), res)) {
warningFields.add(fieldRef.getField());
break;
}
}
}
Discretize d = df.getDiscretize();
if (d != null && isExcluded(d.getField(), res)) {
warningFields.add(d.getField());
}
MapValues mv = df.getMapValues();
if (mv != null) {
for (FieldColumnPair fcp : mv.getFieldColumnPairList()) {
if (isExcluded(fcp.getField(), res)) {
warningFields.add(fcp.getField());
}
}
}
NormContinuous nc = df.getNormContinuous();
if (nc != null && isExcluded(nc.getField(), res)) {
warningFields.add(nc.getField());
}
NormDiscrete nd = df.getNormDiscrete();
if (nd != null && isExcluded(nd.getField(), res)) {
warningFields.add(nd.getField());
}
}
}
DataDictionary dict = pmml.getDataDictionary();
List<DataField> fields = dict.getDataFieldList();
// Apply filter to spec
int numFields = 0;
for (int i = fields.size() - 1; i >= 0; i--) {
if (isExcluded(fields.get(i).getName(), res)) {
dict.removeDataField(i);
} else {
numFields++;
}
}
dict.setNumberOfFields(new BigInteger(Integer.toString(numFields)));
pmml.setDataDictionary(dict);
pmmldoc.setPMML(pmml);
// generate warnings and set as warning message
for (String s : warningFields) {
if (warningBuffer.length() != 0) {
warningBuffer.append("\n");
}
warningBuffer.append("Transformation dictionary uses excluded column " + s);
}
if (warningBuffer.length() > 0) {
setWarningMessage(warningBuffer.toString().trim());
}
PMMLPortObject outport = null;
try {
outport = new PMMLPortObject(createPMMLSpec(pmmlIn.getSpec(), outSpec, res), pmmldoc);
} catch (IllegalArgumentException e) {
if (res.getIncludes().length == 0) {
throw new IllegalArgumentException("Excluding all columns produces invalid PMML", e);
} else {
throw e;
}
}
return outport;
}
}
use of org.dmg.pmml.MiningSchemaDocument.MiningSchema in project knime-core by knime.
the class ClassificationGBTModelExporter method addAggregationMiningScheme.
private void addAggregationMiningScheme(final RegressionModel regression) {
MiningSchema miningSchema = regression.addNewMiningSchema();
// add target field
MiningField targetField = miningSchema.addNewMiningField();
targetField.setName(getGBTModel().getMetaData().getTargetMetaData().getAttributeName());
targetField.setUsageType(FIELDUSAGETYPE.TARGET);
// add class logits
for (int i = 0; i < getGBTModel().getNrClasses(); i++) {
MiningField logit = miningSchema.addNewMiningField();
logit.setName(logitName(i));
}
}
Aggregations