use of org.dmg.pmml.DataField in project jpmml-r by jpmml.
the class KMeansConverter method encodeSchema.
@Override
public void encodeSchema(RExpEncoder encoder) {
RGenericVector kmeans = getObject();
RDoubleVector centers = (RDoubleVector) kmeans.getValue("centers");
RStringVector columnNames = centers.dimnames(1);
for (int i = 0; i < columnNames.size(); i++) {
String columnName = columnNames.getValue(i);
DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE);
encoder.addFeature(dataField);
}
}
use of org.dmg.pmml.DataField in project jpmml-r by jpmml.
the class BinaryTreeConverter method encodeVariableList.
private void encodeVariableList(RGenericVector tree, RExpEncoder encoder) {
RBooleanVector terminal = (RBooleanVector) tree.getValue("terminal");
RGenericVector psplit = (RGenericVector) tree.getValue("psplit");
RGenericVector left = (RGenericVector) tree.getValue("left");
RGenericVector right = (RGenericVector) tree.getValue("right");
if ((Boolean.TRUE).equals(terminal.asScalar())) {
return;
}
RNumberVector<?> splitpoint = (RNumberVector<?>) psplit.getValue("splitpoint");
RStringVector variableName = (RStringVector) psplit.getValue("variableName");
FieldName name = FieldName.create(variableName.asScalar());
DataField dataField = encoder.getDataField(name);
if (dataField == null) {
if (splitpoint instanceof RIntegerVector) {
RStringVector levels = (RStringVector) splitpoint.getAttributeValue("levels");
dataField = encoder.createDataField(name, OpType.CATEGORICAL, null, levels.getValues());
} else if (splitpoint instanceof RDoubleVector) {
dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
} else {
throw new IllegalArgumentException();
}
encoder.addFeature(dataField);
this.featureIndexes.put(name, this.featureIndexes.size());
}
encodeVariableList(left, encoder);
encodeVariableList(right, encoder);
}
use of org.dmg.pmml.DataField in project jpmml-r by jpmml.
the class EarthConverter method encodeSchema.
@Override
public void encodeSchema(RExpEncoder encoder) {
RGenericVector earth = getObject();
RDoubleVector dirs = (RDoubleVector) earth.getValue("dirs");
RDoubleVector cuts = (RDoubleVector) earth.getValue("cuts");
RDoubleVector selectedTerms = (RDoubleVector) earth.getValue("selected.terms");
RDoubleVector coefficients = (RDoubleVector) earth.getValue("coefficients");
RExp terms = earth.getValue("terms");
final RGenericVector xlevels;
try {
xlevels = (RGenericVector) earth.getValue("xlevels");
} catch (IllegalArgumentException iae) {
throw new IllegalArgumentException("No variable levels information. Please initialize the \'xlevels\' element", iae);
}
RStringVector dirsRows = dirs.dimnames(0);
RStringVector dirsColumns = dirs.dimnames(1);
RStringVector cutsRows = cuts.dimnames(0);
RStringVector cutsColumns = cuts.dimnames(1);
if (!(dirsRows.getValues()).equals(cutsRows.getValues()) || !(dirsColumns.getValues()).equals(cutsColumns.getValues())) {
throw new IllegalArgumentException();
}
int rows = dirsRows.size();
int columns = dirsColumns.size();
List<String> predictorNames = dirsColumns.getValues();
FormulaContext context = new FormulaContext() {
@Override
public List<String> getCategories(String variable) {
if (xlevels.hasValue(variable)) {
RStringVector levels = (RStringVector) xlevels.getValue(variable);
return levels.getValues();
}
return null;
}
@Override
public RGenericVector getData() {
return null;
}
};
Formula formula = FormulaUtil.createFormula(terms, context, encoder);
// Dependent variable
{
RStringVector yNames = coefficients.dimnames(1);
FieldName name = FieldName.create(yNames.asScalar());
DataField dataField = (DataField) encoder.getField(name);
encoder.setLabel(dataField);
}
// Independent variables
for (int i = 1; i < selectedTerms.size(); i++) {
int termIndex = ValueUtil.asInt(selectedTerms.getValue(i)) - 1;
List<Double> dirsRow = FortranMatrixUtil.getRow(dirs.getValues(), rows, columns, termIndex);
List<Double> cutsRow = FortranMatrixUtil.getRow(cuts.getValues(), rows, columns, termIndex);
List<Feature> features = new ArrayList<>();
predictors: for (int j = 0; j < predictorNames.size(); j++) {
String predictorName = predictorNames.get(j);
int dir = ValueUtil.asInt(dirsRow.get(j));
double cut = cutsRow.get(j);
if (dir == 0) {
continue predictors;
}
Feature feature = formula.resolveFeature(predictorName);
switch(dir) {
case -1:
case 1:
{
feature = feature.toContinuousFeature();
FieldName name = FieldName.create(formatHingeFunction(dir, feature, cut));
DerivedField derivedField = encoder.getDerivedField(name);
if (derivedField == null) {
Apply apply = createHingeFunction(dir, feature, cut);
derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, apply);
}
feature = new ContinuousFeature(encoder, derivedField);
}
break;
case 2:
break;
default:
throw new IllegalArgumentException();
}
features.add(feature);
}
Feature feature;
if (features.size() == 1) {
feature = features.get(0);
} else if (features.size() > 1) {
feature = new InteractionFeature(encoder, FieldName.create(dirsRows.getValue(i)), DataType.DOUBLE, features);
} else {
throw new IllegalArgumentException();
}
encoder.addFeature(feature);
}
}
use of org.dmg.pmml.DataField in project jpmml-r by jpmml.
the class SVMConverter method encodeNonFormula.
private void encodeNonFormula(RExpEncoder encoder) {
RGenericVector svm = getObject();
RDoubleVector type = (RDoubleVector) svm.getValue("type");
RDoubleVector sv = (RDoubleVector) svm.getValue("SV");
RVector<?> levels = (RVector<?>) svm.getValue("levels");
Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
RStringVector rowNames = sv.dimnames(0);
RStringVector columnNames = sv.dimnames(1);
// Dependent variable
{
FieldName name = FieldName.create("_target");
switch(svmType) {
case C_CLASSIFICATION:
case NU_CLASSIFICATION:
{
RStringVector stringLevels = (RStringVector) levels;
DataField dataField = encoder.createDataField(name, OpType.CATEGORICAL, DataType.STRING, stringLevels.getValues());
encoder.setLabel(dataField);
}
break;
case ONE_CLASSIFICATION:
{
encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
}
break;
case EPS_REGRESSION:
case NU_REGRESSION:
{
DataField dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
encoder.setLabel(dataField);
}
break;
}
}
List<Feature> features = new ArrayList<>();
// Independent variables
for (int i = 0; i < columnNames.size(); i++) {
String columnName = columnNames.getValue(i);
DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE);
features.add(new ContinuousFeature(encoder, dataField));
}
features = scale(features, encoder);
for (Feature feature : features) {
encoder.addFeature(feature);
}
}
use of org.dmg.pmml.DataField in project jpmml-r by jpmml.
the class SVMConverter method encodeFormula.
private void encodeFormula(RExpEncoder encoder) {
RGenericVector svm = getObject();
RDoubleVector type = (RDoubleVector) svm.getValue("type");
RDoubleVector sv = (RDoubleVector) svm.getValue("SV");
RVector<?> levels = (RVector<?>) svm.getValue("levels");
RExp terms = svm.getValue("terms");
final RGenericVector xlevels;
try {
xlevels = (RGenericVector) svm.getValue("xlevels");
} catch (IllegalArgumentException iae) {
throw new IllegalArgumentException("No variable levels information. Please initialize the \'xlevels\' element", iae);
}
Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
RStringVector rowNames = sv.dimnames(0);
RStringVector columnNames = sv.dimnames(1);
RIntegerVector response = (RIntegerVector) terms.getAttributeValue("response");
FormulaContext context = new FormulaContext() {
@Override
public List<String> getCategories(String variable) {
if (xlevels.hasValue(variable)) {
RStringVector levels = (RStringVector) xlevels.getValue(variable);
return levels.getValues();
}
return null;
}
@Override
public RGenericVector getData() {
return null;
}
};
Formula formula = FormulaUtil.createFormula(terms, context, encoder);
// Dependent variable
int responseIndex = response.asScalar();
if (responseIndex != 0) {
DataField dataField = (DataField) formula.getField(responseIndex - 1);
switch(svmType) {
case C_CLASSIFICATION:
case NU_CLASSIFICATION:
{
RStringVector stringLevels = (RStringVector) levels;
dataField = (DataField) encoder.toCategorical(dataField.getName(), stringLevels.getValues());
}
break;
case ONE_CLASSIFICATION:
{
OpType opType = dataField.getOpType();
if (!(OpType.CONTINUOUS).equals(opType)) {
throw new IllegalArgumentException();
}
}
break;
default:
break;
}
encoder.setLabel(dataField);
} else {
switch(svmType) {
case ONE_CLASSIFICATION:
break;
default:
throw new IllegalArgumentException();
}
encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
}
List<Feature> features = new ArrayList<>();
// Independent variables
for (int i = 0; i < columnNames.size(); i++) {
String columnName = columnNames.getValue(i);
Feature feature = formula.resolveFeature(columnName);
features.add(feature);
}
features = scale(features, encoder);
for (Feature feature : features) {
encoder.addFeature(feature);
}
}
Aggregations