use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLBinningTranslator method initializeFrom.
/**
* {@inheritDoc}
*/
@Override
public List<Integer> initializeFrom(final DerivedField[] derivedFields) {
m_mapper = new DerivedFieldMapper(derivedFields);
List<Integer> consumed = new ArrayList(derivedFields.length);
for (int i = 0; i < derivedFields.length; i++) {
DerivedField df = derivedFields[i];
if (!df.isSetDiscretize()) {
// only reading discretize entries other entries are skipped
continue;
}
consumed.add(i);
Discretize discretize = df.getDiscretize();
DiscretizeBin[] pmmlBins = discretize.getDiscretizeBinArray();
NumericBin[] knimeBins = new NumericBin[pmmlBins.length];
for (int j = 0; j < pmmlBins.length; j++) {
DiscretizeBin bin = pmmlBins[j];
String binName = bin.getBinValue();
Interval interval = bin.getInterval();
double leftValue = interval.getLeftMargin();
double rightValue = interval.getRightMargin();
Closure.Enum closure = interval.getClosure();
boolean leftOpen = true;
boolean rightOpen = true;
if (Closure.OPEN_CLOSED == closure) {
rightOpen = false;
} else if (Closure.CLOSED_OPEN == closure) {
leftOpen = false;
} else if (Closure.CLOSED_CLOSED == closure) {
leftOpen = false;
rightOpen = false;
}
knimeBins[j] = new NumericBin(binName, leftOpen, leftValue, rightOpen, rightValue);
}
/**
* This field contains the name of the column in KNIME that
* corresponds to the derived field in PMML. This is necessary if
* derived fields are defined on other derived fields and the
* columns in KNIME are replaced with the preprocessed values.
* In this case KNIME has to know the original names (e.g. A) while
* PMML references to A', A'' etc.
*/
String displayName = df.getDisplayName();
if (displayName != null) {
m_columnToBins.put(displayName, knimeBins);
m_columnToAppend.put(displayName, null);
} else if (df.getName() != null) {
String field = m_mapper.getColumnName(discretize.getField());
m_columnToBins.put(field, knimeBins);
m_columnToAppend.put(field, df.getName());
}
}
return consumed;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLBinningTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
int num = m_columnToBins.size();
DerivedField[] derivedFields = new DerivedField[num];
int i = 0;
for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
Bin[] bins = entry.getValue();
DerivedField df = DerivedField.Factory.newInstance();
String name = entry.getKey();
/* The field name must be retrieved before creating a new derived
* name for this derived field as the map only contains the
* current mapping. */
String fieldName = m_mapper.getDerivedFieldName(name);
Discretize dis = df.addNewDiscretize();
dis.setField(fieldName);
String derivedName = m_columnToAppend.get(name);
if (derivedName != null) {
df.setName(derivedName);
} else {
df.setName(m_mapper.createDerivedFieldName(name));
df.setDisplayName(name);
}
df.setOptype(OPTYPE.CATEGORICAL);
df.setDataType(DATATYPE.STRING);
for (int j = 0; j < bins.length; j++) {
NumericBin knimeBin = (NumericBin) bins[j];
boolean leftOpen = knimeBin.isLeftOpen();
boolean rightOpen = knimeBin.isRightOpen();
double leftValue = knimeBin.getLeftValue();
double rightValue = knimeBin.getRightValue();
DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
pmmlBin.setBinValue(knimeBin.getBinName());
Interval interval = pmmlBin.addNewInterval();
if (!Double.isInfinite(leftValue)) {
interval.setLeftMargin(leftValue);
}
if (!Double.isInfinite(rightValue)) {
interval.setRightMargin(rightValue);
}
if (leftOpen && rightOpen) {
interval.setClosure(Closure.OPEN_OPEN);
} else if (leftOpen && !rightOpen) {
interval.setClosure(Closure.OPEN_CLOSED);
} else if (!leftOpen && rightOpen) {
interval.setClosure(Closure.CLOSED_OPEN);
} else if (!leftOpen && !rightOpen) {
interval.setClosure(Closure.CLOSED_CLOSED);
}
}
derivedFields[i++] = df;
}
return derivedFields;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLMany2OneTranslator method createDerivedField.
private DerivedField createDerivedField() {
final DerivedField derivedField = DerivedField.Factory.newInstance();
derivedField.setName(m_appendedCol);
derivedField.setDataType(DATATYPE.STRING);
derivedField.setOptype(OPTYPE.CATEGORICAL);
Apply parentApply = null;
for (String col : m_sourceCols) {
Apply ifApply;
if (parentApply == null) {
ifApply = derivedField.addNewApply();
} else {
ifApply = parentApply.addNewApply();
}
ifApply.setFunction("if");
Apply innerIf = ifApply.addNewApply();
innerIf.setFunction("equal");
innerIf.addNewFieldRef().setField(col);
if (m_method == IncludeMethod.Maximum || m_method == IncludeMethod.Minimum) {
Apply a = innerIf.addNewApply();
a.setFunction(IncludeMethod.Maximum == m_method ? "max" : "min");
for (String s : m_sourceCols) {
a.addNewFieldRef().setField(s);
}
} else {
// if (m_method == IncludeMethod.Binary) {
innerIf.addNewConstant().setStringValue("1");
}
ifApply.addNewConstant().setStringValue(col);
parentApply = ifApply;
}
if (parentApply != null) {
parentApply.addNewConstant().setStringValue("missing");
}
return derivedField;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class MissingCellHandler method createExtensionDerivedField.
/**
* Creates a derived field that contains an extension which
* contains the name of the factory to use for the replacement.
* The result may be adjusted to contain necessary information for the handler.
* @param dataType the data type of the derived field
* @param factoryID the id of the factory
* @return the derived field
*/
protected DerivedField createExtensionDerivedField(final DATATYPE.Enum dataType, final String factoryID) {
DerivedField field = DerivedField.Factory.newInstance();
if (dataType == org.dmg.pmml.DATATYPE.STRING || dataType == org.dmg.pmml.DATATYPE.BOOLEAN) {
field.setOptype(org.dmg.pmml.OPTYPE.CATEGORICAL);
} else {
field.setOptype(org.dmg.pmml.OPTYPE.CONTINUOUS);
}
Extension e = field.addNewExtension();
e.setName(CUSTOM_HANDLER_EXTENSION_NAME);
e.setValue(factoryID);
field.setDataType(dataType);
field.setName(m_col.getName());
field.setDisplayName(m_col.getName());
// Insert settings
NodeSettings nodeSettings = new NodeSettings("");
saveSettingsTo(nodeSettings);
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
nodeSettings.saveToXML(baos);
Document doc = javax.xml.parsers.DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new ByteArrayInputStream(baos.toByteArray()));
Node copy = e.getDomNode().getOwnerDocument().importNode(doc.getFirstChild(), true);
e.getDomNode().appendChild(copy);
} catch (Exception ex) {
LOGGER.error("An error occurred while writing settings to PMML.\n" + ex.getMessage());
return null;
}
return field;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class MissingCellHandler method createValueReplacingDerivedField.
/**
* Helper method for creating a derived field that replaces a field's value with a fixed value.
* @param dataType the data type of the field.
* @param value the replacement value for the field
* @return the derived field
*/
protected DerivedField createValueReplacingDerivedField(final DATATYPE.Enum dataType, final String value) {
DerivedField field = DerivedField.Factory.newInstance();
if (dataType == org.dmg.pmml.DATATYPE.STRING || dataType == org.dmg.pmml.DATATYPE.BOOLEAN) {
field.setOptype(org.dmg.pmml.OPTYPE.CATEGORICAL);
} else {
field.setOptype(org.dmg.pmml.OPTYPE.CONTINUOUS);
}
/*
* Create the PMML equivalent of: "if fieldVal is missing then x else fieldVal"
* <Apply function="if">
* <Apply function="isMissing">
* <FieldRef field="fieldVal"/>
* </Apply>
* <Constant dataType="___" value="x"/>
* <FieldRef field="fieldVal"/>
* </Apply>
*/
Apply ifApply = field.addNewApply();
ifApply.setFunction(IF_FUNCTION_NAME);
Apply isMissingApply = Apply.Factory.newInstance();
FieldRef fieldRef = FieldRef.Factory.newInstance();
fieldRef.setField(m_col.getName());
isMissingApply.setFieldRefArray(new FieldRef[] { fieldRef });
isMissingApply.setFunction(IS_MISSING_FUNCTION_NAME);
ifApply.setApplyArray(new Apply[] { isMissingApply });
Constant replacement = Constant.Factory.newInstance();
replacement.setDataType(dataType);
replacement.setStringValue(value);
ifApply.setConstantArray(new Constant[] { replacement });
ifApply.setFieldRefArray(new FieldRef[] { fieldRef });
field.setDataType(dataType);
field.setName(m_col.getName());
field.setDisplayName(m_col.getName());
return field;
}
Aggregations