use of org.dmg.pmml.TrainingInstancesDocument.TrainingInstances in project knime-core by knime.
the class PMMLKNNTranslator method exportTo.
/**
* {@inheritDoc}
*/
@Override
public SchemaType exportTo(final PMMLDocument pmmlDoc, final PMMLPortObjectSpec spec) {
LinkedHashMap<Integer, String> columnNames = new LinkedHashMap<Integer, String>();
DataTableSpec tSpec = m_table.getDataTableSpec();
// Find learning columns and store them in the map for later
for (String lc : m_includes) {
columnNames.put(tSpec.findColumnIndex(lc), "col" + columnNames.size());
}
// Create initial XML elements
PMML pmml = pmmlDoc.getPMML();
NearestNeighborModel knn = pmml.addNewNearestNeighborModel();
PMMLMiningSchemaTranslator.writeMiningSchema(spec, knn);
knn.setAlgorithmName("K-Nearest Neighbors");
knn.setFunctionName(org.dmg.pmml.MININGFUNCTION.CLASSIFICATION);
knn.setNumberOfNeighbors(BigInteger.valueOf(m_numNeighbors));
// Only euclidean is supported so far
ComparisonMeasure cm = knn.addNewComparisonMeasure();
cm.addNewEuclidean();
// KNNInputs is a list of the fields used for determining the distance
KNNInputs inputs = knn.addNewKNNInputs();
for (int i : columnNames.keySet()) {
KNNInput input = inputs.addNewKNNInput();
String col = tSpec.getColumnSpec(i).getName();
input.setField(col);
input.setCompareFunction(COMPAREFUNCTION.ABS_DIFF);
}
TrainingInstances ti = knn.addNewTrainingInstances();
// Here we create a mapping from column name to name of the XML element for the column's values
InstanceFields instanceFields = ti.addNewInstanceFields();
for (int i : columnNames.keySet()) {
InstanceField instanceField = instanceFields.addNewInstanceField();
String col = tSpec.getColumnSpec(i).getName();
instanceField.setField(col);
instanceField.setColumn(columnNames.get(i));
}
int targetIdx = tSpec.findColumnIndex(spec.getTargetFields().get(0));
InstanceField target = instanceFields.addNewInstanceField();
target.setField(spec.getTargetFields().get(0));
target.setColumn("target");
// The inline table holds the actual data.
// We use the map we created in the beginning to determine the element xml-element-names
InlineTable it = ti.addNewInlineTable();
Document doc = it.getDomNode().getOwnerDocument();
int counter = 0;
for (DataRow row : m_table) {
// Stop if we have reached the maximum number of records
if (m_maxRecords > -1 && ++counter > m_maxRecords) {
break;
}
Row inlineRow = it.addNewRow();
Element rowNode = (Element) inlineRow.getDomNode();
for (int col : columnNames.keySet()) {
Element field = doc.createElementNS(PMMLUtils.getPMMLCurrentVersionNamespace(), columnNames.get(col));
field.appendChild(doc.createTextNode(row.getCell(col).toString()));
rowNode.appendChild(field);
}
Element targetField = doc.createElementNS(PMMLUtils.getPMMLCurrentVersionNamespace(), "target");
targetField.appendChild(doc.createTextNode(row.getCell(targetIdx).toString()));
rowNode.appendChild(targetField);
}
return NearestNeighborModel.type;
}
Aggregations