Search in sources :

Example 1 with ClusteringField

use of org.dmg.pmml.ClusteringFieldDocument.ClusteringField in project knime-core by knime.

the class PMMLClusterTranslator method initializeFrom.

/**
 * {@inheritDoc}
 */
@Override
public void initializeFrom(final PMMLDocument pmmlDoc) {
    PMML pmml = pmmlDoc.getPMML();
    DerivedFieldMapper mapper = new DerivedFieldMapper(pmmlDoc);
    ClusteringModelDocument.ClusteringModel pmmlClusteringModel = pmml.getClusteringModelArray(0);
    // initialize ClusteringFields
    for (ClusteringField cf : pmmlClusteringModel.getClusteringFieldArray()) {
        m_usedColumns.add(mapper.getColumnName(cf.getField()));
        if (COMPAREFUNCTION.ABS_DIFF != cf.getCompareFunction()) {
            LOGGER.error("Comparison Function " + cf.getCompareFunction().toString() + " is not supported!");
            throw new IllegalArgumentException("Only the absolute difference (\"absDiff\") as " + "compare function is supported!");
        }
    }
    // ---------------------------------------------------
    // initialize Clusters
    m_nrOfClusters = pmmlClusteringModel.sizeOfClusterArray();
    m_prototypes = new double[m_nrOfClusters][m_usedColumns.size()];
    m_labels = new String[m_nrOfClusters];
    m_clusterCoverage = new int[m_nrOfClusters];
    for (int i = 0; i < m_nrOfClusters; i++) {
        ClusterDocument.Cluster currentCluster = pmmlClusteringModel.getClusterArray(i);
        m_labels[i] = currentCluster.getName();
        // in KNIME learner: m_labels[i] = "cluster_" + i;
        ArrayType clusterArray = currentCluster.getArray();
        String content = clusterArray.newCursor().getTextValue();
        String[] stringValues;
        content = content.trim();
        if (content.contains(DOUBLE_QUOT)) {
            content = content.replace(BACKSLASH + DOUBLE_QUOT, TAB);
            /* TODO We need to take care of the cases with double quots,
                 * e.g ==> <Array n="3" type="string">"Cheval  Blanc" "TABTAB"
                 "Latour"</Array> */
            stringValues = content.split(DOUBLE_QUOT + SPACE);
            for (int j = 0; j < stringValues.length; j++) {
                stringValues[j] = stringValues[j].replace(DOUBLE_QUOT, "");
                stringValues[j] = stringValues[j].replace(TAB, DOUBLE_QUOT);
                stringValues[j] = stringValues[j].trim();
            }
        } else {
            stringValues = content.split("\\s+");
        }
        for (int j = 0; j < m_usedColumns.size(); j++) {
            m_prototypes[i][j] = Double.valueOf(stringValues[j]);
        }
        if (currentCluster.isSetSize()) {
            m_clusterCoverage[i] = currentCluster.getSize().intValue();
        }
    }
    if (pmmlClusteringModel.isSetMissingValueWeights()) {
        ArrayType weights = pmmlClusteringModel.getMissingValueWeights().getArray();
        String content = weights.newCursor().getTextValue();
        String[] stringValues;
        Double[] weightValues;
        content = content.trim();
        if (content.contains(DOUBLE_QUOT)) {
            content = content.replace(BACKSLASH + DOUBLE_QUOT, TAB);
            /* TODO We need to take care of the cases with double quots,
                 * e.g ==> <Array n="3" type="string">"Cheval  Blanc" "TABTAB"
                 "Latour"</Array> */
            stringValues = content.split(DOUBLE_QUOT + SPACE);
            weightValues = new Double[stringValues.length];
            for (int j = 0; j < stringValues.length; j++) {
                stringValues[j] = stringValues[j].replace(DOUBLE_QUOT, "");
                stringValues[j] = stringValues[j].replace(TAB, DOUBLE_QUOT);
                stringValues[j] = stringValues[j].trim();
                weightValues[j] = Double.valueOf(stringValues[j]);
                if (weightValues[j] == null || weightValues[j].doubleValue() != 1.0) {
                    String msg = "Missing Value Weight not equals one" + " is not supported!";
                    LOGGER.error(msg);
                }
            }
        } else {
            stringValues = content.split("\\s+");
        }
    }
    // ------------------------------------------
    // initialize m_usedColumns from ClusteringField
    ClusteringFieldDocument.ClusteringField[] clusteringFieldArray = pmmlClusteringModel.getClusteringFieldArray();
    for (ClusteringField cf : clusteringFieldArray) {
        m_usedColumns.add(mapper.getColumnName(cf.getField()));
    }
    // --------------------------------------------
    // initialize Comparison Measure
    ComparisonMeasureDocument.ComparisonMeasure pmmlComparisonMeasure = pmmlClusteringModel.getComparisonMeasure();
    if (pmmlComparisonMeasure.isSetSquaredEuclidean()) {
        m_measure = ComparisonMeasure.squaredEuclidean;
    } else if (pmmlComparisonMeasure.isSetEuclidean()) {
        m_measure = ComparisonMeasure.euclidean;
    } else {
        String measure = pmmlComparisonMeasure.getDomNode().getFirstChild().getNodeName();
        throw new IllegalArgumentException("\"" + ComparisonMeasure.euclidean + "\" and \"" + ComparisonMeasure.squaredEuclidean + "\" are the only supported comparison " + "measures! Found " + measure + ".");
    }
    if (Kind.SIMILARITY == pmmlComparisonMeasure.getKind()) {
        LOGGER.error("A Similarity Kind of Comparison Measure is not " + "supported!");
    }
}
Also used : ClusteringModel(org.dmg.pmml.ClusteringModelDocument.ClusteringModel) ClusteringModelDocument(org.dmg.pmml.ClusteringModelDocument) ClusteringField(org.dmg.pmml.ClusteringFieldDocument.ClusteringField) ComparisonMeasureDocument(org.dmg.pmml.ComparisonMeasureDocument) ClusterDocument(org.dmg.pmml.ClusterDocument) ArrayType(org.dmg.pmml.ArrayType) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) PMML(org.dmg.pmml.PMMLDocument.PMML)

Aggregations

ArrayType (org.dmg.pmml.ArrayType)1 ClusterDocument (org.dmg.pmml.ClusterDocument)1 ClusteringField (org.dmg.pmml.ClusteringFieldDocument.ClusteringField)1 ClusteringModelDocument (org.dmg.pmml.ClusteringModelDocument)1 ClusteringModel (org.dmg.pmml.ClusteringModelDocument.ClusteringModel)1 ComparisonMeasureDocument (org.dmg.pmml.ComparisonMeasureDocument)1 PMML (org.dmg.pmml.PMMLDocument.PMML)1 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)1