Search in sources :

Example 1 with InstanceFields

use of org.dmg.pmml.InstanceFieldsDocument.InstanceFields in project knime-core by knime.

the class PMMLKNNTranslator method exportTo.

/**
 * {@inheritDoc}
 */
@Override
public SchemaType exportTo(final PMMLDocument pmmlDoc, final PMMLPortObjectSpec spec) {
    LinkedHashMap<Integer, String> columnNames = new LinkedHashMap<Integer, String>();
    DataTableSpec tSpec = m_table.getDataTableSpec();
    // Find learning columns and store them in the map for later
    for (String lc : m_includes) {
        columnNames.put(tSpec.findColumnIndex(lc), "col" + columnNames.size());
    }
    // Create initial XML elements
    PMML pmml = pmmlDoc.getPMML();
    NearestNeighborModel knn = pmml.addNewNearestNeighborModel();
    PMMLMiningSchemaTranslator.writeMiningSchema(spec, knn);
    knn.setAlgorithmName("K-Nearest Neighbors");
    knn.setFunctionName(org.dmg.pmml.MININGFUNCTION.CLASSIFICATION);
    knn.setNumberOfNeighbors(BigInteger.valueOf(m_numNeighbors));
    // Only euclidean is supported so far
    ComparisonMeasure cm = knn.addNewComparisonMeasure();
    cm.addNewEuclidean();
    // KNNInputs is a list of the fields used for determining the distance
    KNNInputs inputs = knn.addNewKNNInputs();
    for (int i : columnNames.keySet()) {
        KNNInput input = inputs.addNewKNNInput();
        String col = tSpec.getColumnSpec(i).getName();
        input.setField(col);
        input.setCompareFunction(COMPAREFUNCTION.ABS_DIFF);
    }
    TrainingInstances ti = knn.addNewTrainingInstances();
    // Here we create a mapping from column name to name of the XML element for the column's values
    InstanceFields instanceFields = ti.addNewInstanceFields();
    for (int i : columnNames.keySet()) {
        InstanceField instanceField = instanceFields.addNewInstanceField();
        String col = tSpec.getColumnSpec(i).getName();
        instanceField.setField(col);
        instanceField.setColumn(columnNames.get(i));
    }
    int targetIdx = tSpec.findColumnIndex(spec.getTargetFields().get(0));
    InstanceField target = instanceFields.addNewInstanceField();
    target.setField(spec.getTargetFields().get(0));
    target.setColumn("target");
    // The inline table holds the actual data.
    // We use the map we created in the beginning to determine the element xml-element-names
    InlineTable it = ti.addNewInlineTable();
    Document doc = it.getDomNode().getOwnerDocument();
    int counter = 0;
    for (DataRow row : m_table) {
        // Stop if we have reached the maximum number of records
        if (m_maxRecords > -1 && ++counter > m_maxRecords) {
            break;
        }
        Row inlineRow = it.addNewRow();
        Element rowNode = (Element) inlineRow.getDomNode();
        for (int col : columnNames.keySet()) {
            Element field = doc.createElementNS(PMMLUtils.getPMMLCurrentVersionNamespace(), columnNames.get(col));
            field.appendChild(doc.createTextNode(row.getCell(col).toString()));
            rowNode.appendChild(field);
        }
        Element targetField = doc.createElementNS(PMMLUtils.getPMMLCurrentVersionNamespace(), "target");
        targetField.appendChild(doc.createTextNode(row.getCell(targetIdx).toString()));
        rowNode.appendChild(targetField);
    }
    return NearestNeighborModel.type;
}
Also used : NearestNeighborModel(org.dmg.pmml.NearestNeighborModelDocument.NearestNeighborModel) InlineTable(org.dmg.pmml.InlineTableDocument.InlineTable) DataTableSpec(org.knime.core.data.DataTableSpec) InstanceFields(org.dmg.pmml.InstanceFieldsDocument.InstanceFields) Element(org.w3c.dom.Element) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) KNNInput(org.dmg.pmml.KNNInputDocument.KNNInput) BigInteger(java.math.BigInteger) TrainingInstances(org.dmg.pmml.TrainingInstancesDocument.TrainingInstances) KNNInputs(org.dmg.pmml.KNNInputsDocument.KNNInputs) InstanceField(org.dmg.pmml.InstanceFieldDocument.InstanceField) PMML(org.dmg.pmml.PMMLDocument.PMML) DataRow(org.knime.core.data.DataRow) Row(org.dmg.pmml.RowDocument.Row) ComparisonMeasure(org.dmg.pmml.ComparisonMeasureDocument.ComparisonMeasure)

Aggregations

BigInteger (java.math.BigInteger)1 LinkedHashMap (java.util.LinkedHashMap)1 ComparisonMeasure (org.dmg.pmml.ComparisonMeasureDocument.ComparisonMeasure)1 InlineTable (org.dmg.pmml.InlineTableDocument.InlineTable)1 InstanceField (org.dmg.pmml.InstanceFieldDocument.InstanceField)1 InstanceFields (org.dmg.pmml.InstanceFieldsDocument.InstanceFields)1 KNNInput (org.dmg.pmml.KNNInputDocument.KNNInput)1 KNNInputs (org.dmg.pmml.KNNInputsDocument.KNNInputs)1 NearestNeighborModel (org.dmg.pmml.NearestNeighborModelDocument.NearestNeighborModel)1 PMMLDocument (org.dmg.pmml.PMMLDocument)1 PMML (org.dmg.pmml.PMMLDocument.PMML)1 Row (org.dmg.pmml.RowDocument.Row)1 TrainingInstances (org.dmg.pmml.TrainingInstancesDocument.TrainingInstances)1 DataRow (org.knime.core.data.DataRow)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 Document (org.w3c.dom.Document)1 Element (org.w3c.dom.Element)1