use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.
the class BinarizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Binarizer transformer = getTransformer();
Double threshold = transformer.getThreshold();
InOutMode inputMode = getInputMode();
List<Feature> result = new ArrayList<>();
String[] inputCols = inputMode.getInputCols(transformer);
for (int i = 0; i < inputCols.length; i++) {
String inputCol = inputCols[i];
Feature feature = encoder.getOnlyFeature(inputCol);
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Apply apply = new Apply(PMMLFunctions.IF).addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL, continuousFeature.ref(), PMMLUtil.createConstant(threshold))).addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d));
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.DOUBLE, apply);
result.add(new IndexFeature(encoder, derivedField, Arrays.asList(0d, 1d)));
}
return result;
}
use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.
the class ClusteringModelConverter method registerOutputFields.
@Override
public List<OutputField> registerOutputFields(Label label, org.dmg.pmml.Model pmmlModel, SparkMLEncoder encoder) {
T model = getTransformer();
List<Integer> clusters = LabelUtil.createTargetCategories(getNumberOfClusters());
String predictionCol = model.getPredictionCol();
OutputField pmmlPredictedOutputField = ModelUtil.createPredictedField(FieldNameUtil.create("pmml", predictionCol), OpType.CATEGORICAL, DataType.STRING).setFinalResult(false);
DerivedOutputField pmmlPredictedField = encoder.createDerivedField(pmmlModel, pmmlPredictedOutputField, true);
OutputField predictedOutputField = new OutputField(FieldName.create(predictionCol), OpType.CATEGORICAL, DataType.INTEGER).setResultFeature(ResultFeature.TRANSFORMED_VALUE).setExpression(new FieldRef(pmmlPredictedField.getName()));
DerivedOutputField predictedField = encoder.createDerivedField(pmmlModel, predictedOutputField, true);
encoder.putOnlyFeature(predictionCol, new IndexFeature(encoder, predictedField, clusters));
return Collections.emptyList();
}
use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.
the class ClassificationModelConverter method registerOutputFields.
@Override
public List<OutputField> registerOutputFields(Label label, Model pmmlModel, SparkMLEncoder encoder) {
T model = getTransformer();
CategoricalLabel categoricalLabel = (CategoricalLabel) label;
List<Integer> categories = LabelUtil.createTargetCategories(categoricalLabel.size());
String predictionCol = model.getPredictionCol();
Boolean keepPredictionCol = (Boolean) getOption(HasPredictionModelOptions.OPTION_KEEP_PREDICTIONCOL, Boolean.TRUE);
OutputField pmmlPredictedOutputField = ModelUtil.createPredictedField(FieldNameUtil.create("pmml", predictionCol), OpType.CATEGORICAL, categoricalLabel.getDataType()).setFinalResult(false);
DerivedOutputField pmmlPredictedField = encoder.createDerivedField(pmmlModel, pmmlPredictedOutputField, keepPredictionCol);
MapValues mapValues = PMMLUtil.createMapValues(pmmlPredictedField.getName(), categoricalLabel.getValues(), categories).setDataType(DataType.DOUBLE);
OutputField predictedOutputField = new OutputField(FieldName.create(predictionCol), OpType.CONTINUOUS, DataType.DOUBLE).setResultFeature(ResultFeature.TRANSFORMED_VALUE).setExpression(mapValues);
DerivedOutputField predictedField = encoder.createDerivedField(pmmlModel, predictedOutputField, keepPredictionCol);
encoder.putOnlyFeature(predictionCol, new IndexFeature(encoder, predictedField, categories));
List<OutputField> result = new ArrayList<>();
if (model instanceof HasProbabilityCol) {
HasProbabilityCol hasProbabilityCol = (HasProbabilityCol) model;
String probabilityCol = hasProbabilityCol.getProbabilityCol();
List<Feature> features = new ArrayList<>();
for (int i = 0; i < categoricalLabel.size(); i++) {
Object value = categoricalLabel.getValue(i);
OutputField probabilityField = ModelUtil.createProbabilityField(FieldNameUtil.create(probabilityCol, value), DataType.DOUBLE, value);
result.add(probabilityField);
features.add(new ContinuousFeature(encoder, probabilityField));
}
// XXX
encoder.putFeatures(probabilityCol, features);
}
return result;
}
use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.
the class ModelConverter method getLabel.
public Label getLabel(SparkMLEncoder encoder) {
T model = getTransformer();
Label label = null;
if (model instanceof HasLabelCol) {
HasLabelCol hasLabelCol = (HasLabelCol) model;
String labelCol = hasLabelCol.getLabelCol();
Feature feature = encoder.getOnlyFeature(labelCol);
MiningFunction miningFunction = getMiningFunction();
switch(miningFunction) {
case CLASSIFICATION:
{
if (feature instanceof BooleanFeature) {
BooleanFeature booleanFeature = (BooleanFeature) feature;
label = new CategoricalLabel(booleanFeature.getName(), booleanFeature.getDataType(), booleanFeature.getValues());
} else if (feature instanceof CategoricalFeature) {
CategoricalFeature categoricalFeature = (CategoricalFeature) feature;
DataField dataField = (DataField) categoricalFeature.getField();
label = new CategoricalLabel(dataField);
} else if (feature instanceof ContinuousFeature) {
ContinuousFeature continuousFeature = (ContinuousFeature) feature;
int numClasses = 2;
if (model instanceof ClassificationModel) {
ClassificationModel<?, ?> classificationModel = (ClassificationModel<?, ?>) model;
numClasses = classificationModel.numClasses();
}
List<Integer> categories = LabelUtil.createTargetCategories(numClasses);
Field<?> field = encoder.toCategorical(continuousFeature.getName(), categories);
encoder.putOnlyFeature(labelCol, new IndexFeature(encoder, field, categories));
label = new CategoricalLabel(field.getName(), field.getDataType(), categories);
} else {
throw new IllegalArgumentException("Expected a categorical or categorical-like continuous feature, got " + feature);
}
}
break;
case REGRESSION:
{
Field<?> field = encoder.toContinuous(feature.getName());
field.setDataType(DataType.DOUBLE);
label = new ContinuousLabel(field.getName(), field.getDataType());
}
break;
default:
throw new IllegalArgumentException("Mining function " + miningFunction + " is not supported");
}
}
if (model instanceof ClassificationModel) {
ClassificationModel<?, ?> classificationModel = (ClassificationModel<?, ?>) model;
int numClasses = classificationModel.numClasses();
CategoricalLabel categoricalLabel = (CategoricalLabel) label;
SchemaUtil.checkSize(numClasses, categoricalLabel);
}
return label;
}
use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.
the class BucketizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Bucketizer transformer = getTransformer();
InOutMode inputMode = getInputMode();
String[] inputCols;
double[][] splitsArray;
if ((InOutMode.SINGLE).equals(inputMode)) {
inputCols = inputMode.getInputCols(transformer);
splitsArray = new double[][] { transformer.getSplits() };
} else if ((InOutMode.MULTIPLE).equals(inputMode)) {
inputCols = inputMode.getInputCols(transformer);
splitsArray = transformer.getSplitsArray();
} else {
throw new IllegalArgumentException();
}
List<Feature> result = new ArrayList<>();
for (int i = 0; i < inputCols.length; i++) {
String inputCol = inputCols[i];
double[] splits = splitsArray[i];
Feature feature = encoder.getOnlyFeature(inputCol);
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Discretize discretize = new Discretize(continuousFeature.getName()).setDataType(DataType.INTEGER);
List<Integer> categories = new ArrayList<>();
for (int j = 0; j < (splits.length - 1); j++) {
Integer category = j;
categories.add(category);
Interval interval = new Interval((j < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[j])).setRightMargin(formatMargin(splits[j + 1]));
DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
discretize.addDiscretizeBins(discretizeBin);
}
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, discretize);
result.add(new IndexFeature(encoder, derivedField, categories));
}
return result;
}
Aggregations