use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLStringConversionTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
DATATYPE.Enum dataType = PMMLDataDictionaryTranslator.getPMMLDataType(m_parseType);
OPTYPE.Enum optype = PMMLDataDictionaryTranslator.getOptype(m_parseType);
int num = m_includeCols.size();
DerivedField[] derivedFields = new DerivedField[num];
for (int i = 0; i < num; i++) {
DerivedField df = DerivedField.Factory.newInstance();
String name = m_includeCols.get(i);
df.setDisplayName(name);
/* The field name must be retrieved before creating a new derived
* name for this derived field as the map only contains the
* current mapping. */
String fieldName = m_mapper.getDerivedFieldName(name);
df.setName(m_mapper.createDerivedFieldName(name));
df.setDataType(dataType);
df.setOptype(optype);
FieldRef fieldRef = df.addNewFieldRef();
fieldRef.setField(fieldName);
derivedFields[i] = df;
}
return derivedFields;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLStringConversionTranslator method initializeFrom.
/**
* {@inheritDoc}
*/
@Override
@SuppressWarnings("unchecked")
public List<Integer> initializeFrom(final DerivedField[] derivedFields) {
if (derivedFields == null) {
return Collections.EMPTY_LIST;
}
int num = derivedFields.length;
List<Integer> consumed = new ArrayList<Integer>(num);
for (int i = 0; i < derivedFields.length; i++) {
DerivedField df = derivedFields[i];
/**
* This field contains the name of the column in KNIME that
* corresponds to the derived field in PMML. This is necessary if
* derived fields are defined on other derived fields and the
* columns in KNIME are replaced with the preprocessed values.
* In this case KNIME has to know the original names (e.g. A) while
* PMML references to A*, A** etc.
*/
String displayName = df.getDisplayName();
if (!df.isSetFieldRef()) {
// only reading field references
continue;
}
DataType dataType = PMMLDataDictionaryTranslator.getKNIMEDataType(df.getDataType());
if (dataType.isCompatible(IntValue.class)) {
m_parseType = IntCell.TYPE;
} else if (dataType.isCompatible(DoubleValue.class)) {
m_parseType = DoubleCell.TYPE;
} else if (dataType == StringCell.TYPE) {
m_parseType = StringCell.TYPE;
} else {
// only processing int, double and string conversions
continue;
}
FieldRef fieldRef = df.getFieldRef();
if (displayName != null) {
m_includeCols.add(displayName);
} else {
m_includeCols.add(m_mapper.getColumnName(fieldRef.getField()));
}
consumed.add(i);
}
return consumed;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class CategoryToNumberApplyNodeModel method getMapValues.
/**
* @param model the PMML model
* @return the field in the first FieldColumnPair of the MapValues mapped
* to the MapValues Model
*/
private Map<String, DerivedField> getMapValues(final PMMLPortObject model) {
Map<String, DerivedField> mapValues = new LinkedHashMap<String, DerivedField>();
DerivedField[] derivedFields = model.getDerivedFields();
for (DerivedField derivedField : derivedFields) {
MapValues map = derivedField.getMapValues();
if (null != map) {
// This is the field name the mapValues is based on
String name = map.getFieldColumnPairArray()[0].getField();
mapValues.put(name, derivedField);
}
}
return mapValues;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class NumberToCategoryApplyNodeModel method createRearranger.
/**
* Creates a rearranger that processes the derived fields with MapValues
* in the given model.
*/
private ColumnRearranger createRearranger(final DataTableSpec spec, final PMMLPortObject model) {
// Retrieve columns with string data in the spec
Set<String> intCols = new LinkedHashSet<String>();
Set<String> otherCols = new LinkedHashSet<String>();
for (DataColumnSpec colSpec : spec) {
if (colSpec.getType().isCompatible(IntValue.class)) {
intCols.add(colSpec.getName());
} else {
otherCols.add(colSpec.getName());
}
}
if (intCols.isEmpty()) {
if (null == model) {
// during configure
setWarningMessage("No columns to process.");
} else {
// during execute
setWarningMessage("No columns to process, returning input.");
}
}
// The map values in the model if present
Map<String, DerivedField> mapValues = null != model ? getMapValues(model) : Collections.<String, DerivedField>emptyMap();
// Create rearranger
ColumnRearranger rearranger = new ColumnRearranger(spec);
for (String col : mapValues.keySet()) {
DerivedField derivedField = mapValues.get(col);
MapValues map = derivedField.getMapValues();
// this PMML MapValues model is found but has wrong type.
if (null != model && otherCols.contains(col)) {
String outColumn = null == derivedField.getDisplayName() || derivedField.getDisplayName().trim().isEmpty() ? derivedField.getName() : derivedField.getDisplayName();
LOGGER.warn("Cannot create column \"" + outColumn + "\" since the input column \"" + col + "\" is not of type IntValue.");
continue;
}
// this PMML MapValues model.
if (null != model && !intCols.contains(col)) {
String outColumn = derivedField.getMapValues().getFieldColumnPairList().get(0).getField();
LOGGER.warn("Cannot create column \"" + outColumn + "\" since the column \"" + col + "\" is not in the input.");
continue;
}
NumberToCategoryApplyCellFactory factory = new NumberToCategoryApplyCellFactory(spec, col, m_settings, map);
if (m_settings.getAppendColumns()) {
rearranger.append(factory);
} else {
rearranger.replace(factory, col);
}
}
return rearranger;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLNeuralNetworkTranslator method addOutputLayer.
/**
* Writes the PMML output layer of the MLP.
*
* @param nnModel
* the neural network model.
* @param mlp
* the underlying {@link MultiLayerPerceptron}.
* @param spec
* the port object spec
*/
protected void addOutputLayer(final NeuralNetwork nnModel, final MultiLayerPerceptron mlp, final PMMLPortObjectSpec spec) {
int lastlayer = mlp.getNrLayers() - 1;
String targetCol = spec.getTargetFields().iterator().next();
Layer outputlayer = mlp.getLayer(lastlayer);
Perceptron[] outputperceptrons = outputlayer.getPerceptrons();
HashMap<DataCell, Integer> outputmap = mlp.getClassMapping();
NeuralOutputs neuralOuts = nnModel.addNewNeuralOutputs();
neuralOuts.setNumberOfOutputs(BigInteger.valueOf(outputperceptrons.length));
for (int i = 0; i < outputperceptrons.length; i++) {
NeuralOutput neuralOutput = neuralOuts.addNewNeuralOutput();
neuralOutput.setOutputNeuron(lastlayer + "," + i);
// search corresponding output value
String colname = "";
for (Entry<DataCell, Integer> e : outputmap.entrySet()) {
if (e.getValue().equals(i)) {
colname = ((StringValue) e.getKey()).getStringValue();
}
}
DerivedField df = neuralOutput.addNewDerivedField();
df.setOptype(OPTYPE.CATEGORICAL);
df.setDataType(DATATYPE.STRING);
if (mlp.getMode() == MultiLayerPerceptron.CLASSIFICATION_MODE) {
df.setOptype(OPTYPE.CATEGORICAL);
df.setDataType(DATATYPE.STRING);
} else if (mlp.getMode() == MultiLayerPerceptron.REGRESSION_MODE) {
df.setOptype(OPTYPE.CONTINUOUS);
df.setDataType(DATATYPE.DOUBLE);
}
if (mlp.getMode() == MultiLayerPerceptron.CLASSIFICATION_MODE) {
NormDiscrete normDiscrete = df.addNewNormDiscrete();
normDiscrete.setField(targetCol);
normDiscrete.setValue(colname);
} else if (mlp.getMode() == MultiLayerPerceptron.REGRESSION_MODE) {
FieldRef fieldRef = df.addNewFieldRef();
fieldRef.setField(targetCol);
}
}
}
Aggregations