use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.
the class DBAutoBinner method intoBinnerMaps.
/**
* This method translates a {@link PMMLPortObject} into a {@link DBBinnerMaps} object which holds several Maps
* needed to create a binner statement in {@link StatementManipulator}
*
* @param pmmlPortObject A {@link PMMLPortObject} containing all necessary information about binning operation
* @param dataTableSpec Incoming {@link DataTableSpec}
* @return a {@link DBBinnerMaps} object containing required parameters for {@link StatementManipulator}
*/
public static DBBinnerMaps intoBinnerMaps(final PMMLPortObject pmmlPortObject, final DataTableSpec dataTableSpec) {
Map<String, List<Pair<Double, Double>>> boundariesMap = new LinkedHashMap<>();
Map<String, List<Pair<Boolean, Boolean>>> boundariesOpenMap = new LinkedHashMap<>();
Map<String, List<String>> namingMap = new LinkedHashMap<>();
Map<String, String> appendMap = new LinkedHashMap<>();
DerivedField[] derivedFields = pmmlPortObject.getDerivedFields();
for (int i = 0; i < derivedFields.length; i++) {
// each column has its own derived fields
List<Pair<Double, Double>> boundaries = new ArrayList<>();
List<String> names = new ArrayList<>();
List<Pair<Boolean, Boolean>> boundariesOpen = new ArrayList<>();
List<DiscretizeBin> discretizeBinList = derivedFields[i].getDiscretize().getDiscretizeBinList();
String replacedColumnName = DataTableSpec.getUniqueColumnName(dataTableSpec, derivedFields[i].getName());
String originalColumnName = derivedFields[i].getDiscretize().getField();
for (DiscretizeBin discBin : discretizeBinList) {
Interval interval = discBin.getInterval();
double left = interval.isSetLeftMargin() ? interval.getLeftMargin() : Double.NEGATIVE_INFINITY;
double right = interval.isSetRightMargin() ? interval.getRightMargin() : Double.POSITIVE_INFINITY;
boundaries.add(new Pair<>(left, right));
names.add(discBin.getBinValue());
boolean leftOpen;
boolean rightOpen;
int closure = discBin.getInterval().xgetClosure().enumValue().intValue();
/*
*static final int INT_OPEN_CLOSED = 1;
*static final int INT_OPEN_OPEN = 2;
*static final int INT_CLOSED_OPEN = 3;
*static final int INT_CLOSED_CLOSED = 4;
*/
switch(closure) {
case 1:
leftOpen = true;
rightOpen = false;
break;
case 2:
leftOpen = true;
rightOpen = true;
break;
case 3:
leftOpen = false;
rightOpen = true;
break;
case 4:
leftOpen = false;
rightOpen = false;
break;
default:
leftOpen = true;
rightOpen = false;
break;
}
boundariesOpen.add(new Pair<>(leftOpen, rightOpen));
}
boundariesMap.put(originalColumnName, boundaries);
namingMap.put(originalColumnName, names);
boundariesOpenMap.put(originalColumnName, boundariesOpen);
if (replacedColumnName.matches("(.*)" + originalColumnName + "\\*" + "(.*)")) {
appendMap.put(originalColumnName, null);
} else {
appendMap.put(originalColumnName, replacedColumnName);
}
}
DBBinnerMaps maps = new DBBinnerMaps(boundariesMap, boundariesOpenMap, namingMap, appendMap);
return maps;
}
use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.
the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.
/**
* @param pmmlDoc the PMML document to analyze
* @param colSpecs the list to add the data column specs to
*/
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
for (DataField dataField : dict.getDataFieldArray()) {
String name = dataField.getName();
DataType dataType = getKNIMEDataType(dataField.getDataType());
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
DataColumnDomain domain = null;
if (dataType.isCompatible(NominalValue.class)) {
Value[] valueArray = dataField.getValueArray();
DataCell[] cells;
if (DataType.getType(StringCell.class).equals(dataType)) {
if (dataField.getIntervalArray().length > 0) {
throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
}
cells = new StringCell[valueArray.length];
if (valueArray.length > 0) {
for (int j = 0; j < cells.length; j++) {
cells[j] = new StringCell(valueArray[j].getValue());
}
}
domain = new DataColumnDomainCreator(cells).createDomain();
}
} else if (dataType.isCompatible(DoubleValue.class)) {
Double leftMargin = null;
Double rightMargin = null;
Interval[] intervalArray = dataField.getIntervalArray();
if (intervalArray != null && intervalArray.length > 0) {
Interval interval = dataField.getIntervalArray(0);
leftMargin = interval.getLeftMargin();
rightMargin = interval.getRightMargin();
} else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
// try to derive the bounds from the values
Value[] valueArray = dataField.getValueArray();
List<Double> values = new ArrayList<Double>();
for (int j = 0; j < valueArray.length; j++) {
String value = "";
try {
value = valueArray[j].getValue();
values.add(Double.parseDouble(value));
} catch (Exception e) {
throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
}
}
leftMargin = Collections.min(values);
rightMargin = Collections.max(values);
}
if (leftMargin != null && rightMargin != null) {
// set the bounds of the domain if available
DataCell lowerBound = null;
DataCell upperBound = null;
if (DataType.getType(IntCell.class).equals(dataType)) {
lowerBound = new IntCell(leftMargin.intValue());
upperBound = new IntCell(rightMargin.intValue());
} else if (DataType.getType(DoubleCell.class).equals(dataType)) {
lowerBound = new DoubleCell(leftMargin);
upperBound = new DoubleCell(rightMargin);
}
domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
} else {
domain = new DataColumnDomainCreator().createDomain();
}
}
specCreator.setDomain(domain);
colSpecs.add(specCreator.createSpec());
m_dictFields.add(name);
}
}
use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.
the class PMMLBinningTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
final int num = m_columnToBins.size();
final DerivedField[] derivedFields = new DerivedField[num];
int i = 0;
for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
final Bin[] bins = entry.getValue();
final DerivedField df = DerivedField.Factory.newInstance();
final String name = entry.getKey();
/* The field name must be retrieved before creating a new derived
* name for this derived field as the map only contains the
* current mapping. */
final String fieldName = m_mapper.getDerivedFieldName(name);
final Discretize dis = df.addNewDiscretize();
dis.setField(fieldName);
final String derivedName = m_columnToAppend.get(name);
if (derivedName != null) {
df.setName(derivedName);
} else {
df.setName(m_mapper.createDerivedFieldName(name));
df.setDisplayName(name);
}
df.setOptype(OPTYPE.CATEGORICAL);
df.setDataType(DATATYPE.STRING);
for (Bin bin : bins) {
final NumericBin knimeBin = (NumericBin) bin;
final boolean leftOpen = knimeBin.isLeftOpen();
final boolean rightOpen = knimeBin.isRightOpen();
final double leftValue = knimeBin.getLeftValue();
final double rightValue = knimeBin.getRightValue();
final DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
pmmlBin.setBinValue(knimeBin.getBinName());
final Interval interval = pmmlBin.addNewInterval();
if (!Double.isInfinite(leftValue)) {
interval.setLeftMargin(leftValue);
}
if (!Double.isInfinite(rightValue)) {
interval.setRightMargin(rightValue);
}
if (leftOpen && rightOpen) {
interval.setClosure(Closure.OPEN_OPEN);
} else if (leftOpen && !rightOpen) {
interval.setClosure(Closure.OPEN_CLOSED);
} else if (!leftOpen && rightOpen) {
interval.setClosure(Closure.CLOSED_OPEN);
} else if (!leftOpen && !rightOpen) {
interval.setClosure(Closure.CLOSED_CLOSED);
}
}
derivedFields[i++] = df;
}
return derivedFields;
}
use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.
the class PMMLBinningTranslator method initializeFrom.
/**
* {@inheritDoc}
*/
@Override
public List<Integer> initializeFrom(final DerivedField[] derivedFields) {
m_mapper = new DerivedFieldMapper(derivedFields);
List<Integer> consumed = new ArrayList(derivedFields.length);
for (int i = 0; i < derivedFields.length; i++) {
DerivedField df = derivedFields[i];
if (!df.isSetDiscretize()) {
// only reading discretize entries other entries are skipped
continue;
}
consumed.add(i);
Discretize discretize = df.getDiscretize();
DiscretizeBin[] pmmlBins = discretize.getDiscretizeBinArray();
NumericBin[] knimeBins = new NumericBin[pmmlBins.length];
for (int j = 0; j < pmmlBins.length; j++) {
DiscretizeBin bin = pmmlBins[j];
String binName = bin.getBinValue();
Interval interval = bin.getInterval();
double leftValue = interval.getLeftMargin();
double rightValue = interval.getRightMargin();
Closure.Enum closure = interval.getClosure();
boolean leftOpen = true;
boolean rightOpen = true;
if (Closure.OPEN_CLOSED == closure) {
rightOpen = false;
} else if (Closure.CLOSED_OPEN == closure) {
leftOpen = false;
} else if (Closure.CLOSED_CLOSED == closure) {
leftOpen = false;
rightOpen = false;
}
knimeBins[j] = new NumericBin(binName, leftOpen, leftValue, rightOpen, rightValue);
}
/**
* This field contains the name of the column in KNIME that
* corresponds to the derived field in PMML. This is necessary if
* derived fields are defined on other derived fields and the
* columns in KNIME are replaced with the preprocessed values.
* In this case KNIME has to know the original names (e.g. A) while
* PMML references to A', A'' etc.
*/
String displayName = df.getDisplayName();
if (displayName != null) {
m_columnToBins.put(displayName, knimeBins);
m_columnToAppend.put(displayName, null);
} else if (df.getName() != null) {
String field = m_mapper.getColumnName(discretize.getField());
m_columnToBins.put(field, knimeBins);
m_columnToAppend.put(field, df.getName());
}
}
return consumed;
}
use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.
the class PMMLBinningTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
int num = m_columnToBins.size();
DerivedField[] derivedFields = new DerivedField[num];
int i = 0;
for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
Bin[] bins = entry.getValue();
DerivedField df = DerivedField.Factory.newInstance();
String name = entry.getKey();
/* The field name must be retrieved before creating a new derived
* name for this derived field as the map only contains the
* current mapping. */
String fieldName = m_mapper.getDerivedFieldName(name);
Discretize dis = df.addNewDiscretize();
dis.setField(fieldName);
String derivedName = m_columnToAppend.get(name);
if (derivedName != null) {
df.setName(derivedName);
} else {
df.setName(m_mapper.createDerivedFieldName(name));
df.setDisplayName(name);
}
df.setOptype(OPTYPE.CATEGORICAL);
df.setDataType(DATATYPE.STRING);
for (int j = 0; j < bins.length; j++) {
NumericBin knimeBin = (NumericBin) bins[j];
boolean leftOpen = knimeBin.isLeftOpen();
boolean rightOpen = knimeBin.isRightOpen();
double leftValue = knimeBin.getLeftValue();
double rightValue = knimeBin.getRightValue();
DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
pmmlBin.setBinValue(knimeBin.getBinName());
Interval interval = pmmlBin.addNewInterval();
if (!Double.isInfinite(leftValue)) {
interval.setLeftMargin(leftValue);
}
if (!Double.isInfinite(rightValue)) {
interval.setRightMargin(rightValue);
}
if (leftOpen && rightOpen) {
interval.setClosure(Closure.OPEN_OPEN);
} else if (leftOpen && !rightOpen) {
interval.setClosure(Closure.OPEN_CLOSED);
} else if (!leftOpen && rightOpen) {
interval.setClosure(Closure.CLOSED_OPEN);
} else if (!leftOpen && !rightOpen) {
interval.setClosure(Closure.CLOSED_CLOSED);
}
}
derivedFields[i++] = df;
}
return derivedFields;
}
Aggregations