use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLMissingValueReplacementTranslator method exportTo.
/**
* {@inheritDoc}
*/
@Override
public SchemaType exportTo(final PMMLDocument pmmlDoc, final PMMLPortObjectSpec spec) {
TransformationDictionary td = pmmlDoc.getPMML().getTransformationDictionary();
if (td == null) {
td = pmmlDoc.getPMML().addNewTransformationDictionary();
}
DerivedFieldMapper mapper = new DerivedFieldMapper(pmmlDoc);
for (MissingCellHandler handler : m_handlers) {
DerivedField f = handler.getPMMLDerivedField();
if (f != null) {
f.setDisplayName(f.getName());
f.setName(mapper.createDerivedFieldName(f.getName()));
td.getDerivedFieldList().add(f);
}
}
return null;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class DBAutoBinner method intoBinnerMaps.
/**
* This method translates a {@link PMMLPortObject} into a {@link DBBinnerMaps} object which holds several Maps
* needed to create a binner statement in {@link StatementManipulator}
*
* @param pmmlPortObject A {@link PMMLPortObject} containing all necessary information about binning operation
* @param dataTableSpec Incoming {@link DataTableSpec}
* @return a {@link DBBinnerMaps} object containing required parameters for {@link StatementManipulator}
*/
public static DBBinnerMaps intoBinnerMaps(final PMMLPortObject pmmlPortObject, final DataTableSpec dataTableSpec) {
Map<String, List<Pair<Double, Double>>> boundariesMap = new LinkedHashMap<>();
Map<String, List<Pair<Boolean, Boolean>>> boundariesOpenMap = new LinkedHashMap<>();
Map<String, List<String>> namingMap = new LinkedHashMap<>();
Map<String, String> appendMap = new LinkedHashMap<>();
DerivedField[] derivedFields = pmmlPortObject.getDerivedFields();
for (int i = 0; i < derivedFields.length; i++) {
// each column has its own derived fields
List<Pair<Double, Double>> boundaries = new ArrayList<>();
List<String> names = new ArrayList<>();
List<Pair<Boolean, Boolean>> boundariesOpen = new ArrayList<>();
List<DiscretizeBin> discretizeBinList = derivedFields[i].getDiscretize().getDiscretizeBinList();
String replacedColumnName = DataTableSpec.getUniqueColumnName(dataTableSpec, derivedFields[i].getName());
String originalColumnName = derivedFields[i].getDiscretize().getField();
for (DiscretizeBin discBin : discretizeBinList) {
Interval interval = discBin.getInterval();
double left = interval.isSetLeftMargin() ? interval.getLeftMargin() : Double.NEGATIVE_INFINITY;
double right = interval.isSetRightMargin() ? interval.getRightMargin() : Double.POSITIVE_INFINITY;
boundaries.add(new Pair<>(left, right));
names.add(discBin.getBinValue());
boolean leftOpen;
boolean rightOpen;
int closure = discBin.getInterval().xgetClosure().enumValue().intValue();
/*
*static final int INT_OPEN_CLOSED = 1;
*static final int INT_OPEN_OPEN = 2;
*static final int INT_CLOSED_OPEN = 3;
*static final int INT_CLOSED_CLOSED = 4;
*/
switch(closure) {
case 1:
leftOpen = true;
rightOpen = false;
break;
case 2:
leftOpen = true;
rightOpen = true;
break;
case 3:
leftOpen = false;
rightOpen = true;
break;
case 4:
leftOpen = false;
rightOpen = false;
break;
default:
leftOpen = true;
rightOpen = false;
break;
}
boundariesOpen.add(new Pair<>(leftOpen, rightOpen));
}
boundariesMap.put(originalColumnName, boundaries);
namingMap.put(originalColumnName, names);
boundariesOpenMap.put(originalColumnName, boundariesOpen);
if (replacedColumnName.matches("(.*)" + originalColumnName + "\\*" + "(.*)")) {
appendMap.put(originalColumnName, null);
} else {
appendMap.put(originalColumnName, replacedColumnName);
}
}
DBBinnerMaps maps = new DBBinnerMaps(boundariesMap, boundariesOpenMap, namingMap, appendMap);
return maps;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLNormalizeTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
int num = m_affineTrans.getNames().length;
DerivedField[] derivedFields = new DerivedField[num];
for (int i = 0; i < num; i++) {
DerivedField df = DerivedField.Factory.newInstance();
df.setExtensionArray(createSummaryExtension());
String name = m_affineTrans.getNames()[i];
df.setDisplayName(name);
/* The field name must be retrieved before creating a new derived
* name for this derived field as the map only contains the
* current mapping. */
String fieldName = m_mapper.getDerivedFieldName(name);
df.setName(m_mapper.createDerivedFieldName(name));
df.setOptype(OPTYPE.CONTINUOUS);
df.setDataType(DATATYPE.DOUBLE);
NormContinuous cont = df.addNewNormContinuous();
cont.setField(fieldName);
double trans = m_affineTrans.getTranslations()[i];
double scale = m_affineTrans.getScales()[i];
LinearNorm firstNorm = cont.addNewLinearNorm();
firstNorm.setOrig(0.0);
firstNorm.setNorm(trans);
LinearNorm secondNorm = cont.addNewLinearNorm();
secondNorm.setOrig(1.0);
secondNorm.setNorm(scale + trans);
derivedFields[i] = df;
}
return derivedFields;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLPortObject method addModelTranslater.
/**
* Adds the model of the content translater to the PMML document.
* @param modelTranslator the model translator containing the model to be
* added
*/
public void addModelTranslater(final PMMLTranslator modelTranslator) {
SchemaType type = modelTranslator.exportTo(m_pmmlDoc, m_spec);
LocalTransformations localTransformations = moveDerivedFields(type);
/* Remove mining fields from mining schema that where created as a
* derived field. In KNIME the origin of columns is not distinguished
* and all columns are added to the mining schema. But in PMML this
* results in duplicate entries. Those columns should only appear once
* as derived field in the transformation dictionary or local
* transformations. */
Set<String> derivedFields = new HashSet<String>();
for (DerivedField derivedField : getDerivedFields()) {
derivedFields.add(derivedField.getName());
}
MiningSchema miningSchema = PMMLUtils.getFirstMiningSchema(m_pmmlDoc, type);
if (miningSchema == null) {
LOGGER.info("No mining schema found.");
return;
}
MiningField[] miningFieldArray = miningSchema.getMiningFieldArray();
List<MiningField> miningFields = new ArrayList<MiningField>(Arrays.asList(miningFieldArray));
Set<String> miningFieldNames = new HashSet<String>();
for (MiningField miningField : miningFieldArray) {
String miningFieldName = miningField.getName();
if (derivedFields.contains(miningFieldName)) {
LOGGER.debug("Removing field \"" + miningFieldName + "\" from MiningFields as it is a DerivedField.");
miningFields.remove(miningField);
} else {
miningFieldNames.add(miningFieldName);
}
}
/* According to the PMML Spec DerivedFields must ultimately refer back
* to active MiningFields of the model's MiningSchema. Therefore we
* have to add all referred DataFields to the MiningSchema. */
String fullPath = NAMESPACE_DECLARATION + "$this/pmml:DerivedField/*/@field" + "| $this/pmml:DerivedField//pmml:FieldColumnPair/@field";
XmlObject[] xmlDescendants = localTransformations.selectPath(fullPath);
Set<String> referencedFields = new LinkedHashSet<String>();
// collect all referred field names
for (XmlObject xo : xmlDescendants) {
XmlCursor xmlCursor = xo.newCursor();
referencedFields.add(xmlCursor.getTextValue());
xmlCursor.dispose();
}
for (String referencedField : referencedFields) {
if (!derivedFields.contains(referencedField) && !miningFieldNames.contains(referencedField)) {
/* Add them to the mining schema if they are not already
* contained there and if they don't refer to derived fields. */
MiningField miningField = MiningField.Factory.newInstance();
miningField.setName(referencedField);
miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
LOGGER.debug("Adding field \"" + referencedField + "\" to MiningSchema because it is referenced in " + "LocalTransformations.");
miningFields.add(miningField);
}
}
miningSchema.setMiningFieldArray(miningFields.toArray(new MiningField[0]));
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLPortObject method moveGlobalTransformationsToModel.
/**
* Moves the content of the transformation dictionary to local
* transformations of the model if a model exists.
*/
public void moveGlobalTransformationsToModel() {
PMML pmml = m_pmmlDoc.getPMML();
TransformationDictionary transDict = pmml.getTransformationDictionary();
if (transDict == null || transDict.getDerivedFieldArray() == null || transDict.getDerivedFieldArray().length == 0) {
// nothing to be moved
return;
}
DerivedField[] globalDerivedFields = transDict.getDerivedFieldArray();
LocalTransformations localTrans = null;
if (pmml.getTreeModelArray().length > 0) {
TreeModel model = pmml.getTreeModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getClusteringModelArray().length > 0) {
ClusteringModel model = pmml.getClusteringModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getNeuralNetworkArray().length > 0) {
NeuralNetwork model = pmml.getNeuralNetworkArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getSupportVectorMachineModelArray().length > 0) {
SupportVectorMachineModel model = pmml.getSupportVectorMachineModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getRegressionModelArray().length > 0) {
RegressionModel model = pmml.getRegressionModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getGeneralRegressionModelArray().length > 0) {
GeneralRegressionModel model = pmml.getGeneralRegressionModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.sizeOfRuleSetModelArray() > 0) {
RuleSetModel model = pmml.getRuleSetModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
}
if (localTrans != null) {
DerivedField[] derivedFields = appendDerivedFields(localTrans.getDerivedFieldArray(), globalDerivedFields);
localTrans.setDerivedFieldArray(derivedFields);
// remove derived fields from TransformationDictionary
transDict.setDerivedFieldArray(new DerivedField[0]);
}
// else do nothing as no model exists yet
}
Aggregations