use of org.dmg.pmml.ClusteringFieldDocument.ClusteringField in project knime-core by knime.
the class PMMLClusterTranslator method initializeFrom.
/**
* {@inheritDoc}
*/
@Override
public void initializeFrom(final PMMLDocument pmmlDoc) {
PMML pmml = pmmlDoc.getPMML();
DerivedFieldMapper mapper = new DerivedFieldMapper(pmmlDoc);
ClusteringModelDocument.ClusteringModel pmmlClusteringModel = pmml.getClusteringModelArray(0);
// initialize ClusteringFields
for (ClusteringField cf : pmmlClusteringModel.getClusteringFieldArray()) {
m_usedColumns.add(mapper.getColumnName(cf.getField()));
if (COMPAREFUNCTION.ABS_DIFF != cf.getCompareFunction()) {
LOGGER.error("Comparison Function " + cf.getCompareFunction().toString() + " is not supported!");
throw new IllegalArgumentException("Only the absolute difference (\"absDiff\") as " + "compare function is supported!");
}
}
// ---------------------------------------------------
// initialize Clusters
m_nrOfClusters = pmmlClusteringModel.sizeOfClusterArray();
m_prototypes = new double[m_nrOfClusters][m_usedColumns.size()];
m_labels = new String[m_nrOfClusters];
m_clusterCoverage = new int[m_nrOfClusters];
for (int i = 0; i < m_nrOfClusters; i++) {
ClusterDocument.Cluster currentCluster = pmmlClusteringModel.getClusterArray(i);
m_labels[i] = currentCluster.getName();
// in KNIME learner: m_labels[i] = "cluster_" + i;
ArrayType clusterArray = currentCluster.getArray();
String content = clusterArray.newCursor().getTextValue();
String[] stringValues;
content = content.trim();
if (content.contains(DOUBLE_QUOT)) {
content = content.replace(BACKSLASH + DOUBLE_QUOT, TAB);
/* TODO We need to take care of the cases with double quots,
* e.g ==> <Array n="3" type="string">"Cheval Blanc" "TABTAB"
"Latour"</Array> */
stringValues = content.split(DOUBLE_QUOT + SPACE);
for (int j = 0; j < stringValues.length; j++) {
stringValues[j] = stringValues[j].replace(DOUBLE_QUOT, "");
stringValues[j] = stringValues[j].replace(TAB, DOUBLE_QUOT);
stringValues[j] = stringValues[j].trim();
}
} else {
stringValues = content.split("\\s+");
}
for (int j = 0; j < m_usedColumns.size(); j++) {
m_prototypes[i][j] = Double.valueOf(stringValues[j]);
}
if (currentCluster.isSetSize()) {
m_clusterCoverage[i] = currentCluster.getSize().intValue();
}
}
if (pmmlClusteringModel.isSetMissingValueWeights()) {
ArrayType weights = pmmlClusteringModel.getMissingValueWeights().getArray();
String content = weights.newCursor().getTextValue();
String[] stringValues;
Double[] weightValues;
content = content.trim();
if (content.contains(DOUBLE_QUOT)) {
content = content.replace(BACKSLASH + DOUBLE_QUOT, TAB);
/* TODO We need to take care of the cases with double quots,
* e.g ==> <Array n="3" type="string">"Cheval Blanc" "TABTAB"
"Latour"</Array> */
stringValues = content.split(DOUBLE_QUOT + SPACE);
weightValues = new Double[stringValues.length];
for (int j = 0; j < stringValues.length; j++) {
stringValues[j] = stringValues[j].replace(DOUBLE_QUOT, "");
stringValues[j] = stringValues[j].replace(TAB, DOUBLE_QUOT);
stringValues[j] = stringValues[j].trim();
weightValues[j] = Double.valueOf(stringValues[j]);
if (weightValues[j] == null || weightValues[j].doubleValue() != 1.0) {
String msg = "Missing Value Weight not equals one" + " is not supported!";
LOGGER.error(msg);
}
}
} else {
stringValues = content.split("\\s+");
}
}
// ------------------------------------------
// initialize m_usedColumns from ClusteringField
ClusteringFieldDocument.ClusteringField[] clusteringFieldArray = pmmlClusteringModel.getClusteringFieldArray();
for (ClusteringField cf : clusteringFieldArray) {
m_usedColumns.add(mapper.getColumnName(cf.getField()));
}
// --------------------------------------------
// initialize Comparison Measure
ComparisonMeasureDocument.ComparisonMeasure pmmlComparisonMeasure = pmmlClusteringModel.getComparisonMeasure();
if (pmmlComparisonMeasure.isSetSquaredEuclidean()) {
m_measure = ComparisonMeasure.squaredEuclidean;
} else if (pmmlComparisonMeasure.isSetEuclidean()) {
m_measure = ComparisonMeasure.euclidean;
} else {
String measure = pmmlComparisonMeasure.getDomNode().getFirstChild().getNodeName();
throw new IllegalArgumentException("\"" + ComparisonMeasure.euclidean + "\" and \"" + ComparisonMeasure.squaredEuclidean + "\" are the only supported comparison " + "measures! Found " + measure + ".");
}
if (Kind.SIMILARITY == pmmlComparisonMeasure.getKind()) {
LOGGER.error("A Similarity Kind of Comparison Measure is not " + "supported!");
}
}
Aggregations