use of org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin in project knime-core by knime.
the class DBAutoBinner method intoBinnerMaps.
/**
* This method translates a {@link PMMLPortObject} into a {@link DBBinnerMaps} object which holds several Maps
* needed to create a binner statement in {@link StatementManipulator}
*
* @param pmmlPortObject A {@link PMMLPortObject} containing all necessary information about binning operation
* @param dataTableSpec Incoming {@link DataTableSpec}
* @return a {@link DBBinnerMaps} object containing required parameters for {@link StatementManipulator}
*/
public static DBBinnerMaps intoBinnerMaps(final PMMLPortObject pmmlPortObject, final DataTableSpec dataTableSpec) {
Map<String, List<Pair<Double, Double>>> boundariesMap = new LinkedHashMap<>();
Map<String, List<Pair<Boolean, Boolean>>> boundariesOpenMap = new LinkedHashMap<>();
Map<String, List<String>> namingMap = new LinkedHashMap<>();
Map<String, String> appendMap = new LinkedHashMap<>();
DerivedField[] derivedFields = pmmlPortObject.getDerivedFields();
for (int i = 0; i < derivedFields.length; i++) {
// each column has its own derived fields
List<Pair<Double, Double>> boundaries = new ArrayList<>();
List<String> names = new ArrayList<>();
List<Pair<Boolean, Boolean>> boundariesOpen = new ArrayList<>();
List<DiscretizeBin> discretizeBinList = derivedFields[i].getDiscretize().getDiscretizeBinList();
String replacedColumnName = DataTableSpec.getUniqueColumnName(dataTableSpec, derivedFields[i].getName());
String originalColumnName = derivedFields[i].getDiscretize().getField();
for (DiscretizeBin discBin : discretizeBinList) {
Interval interval = discBin.getInterval();
double left = interval.isSetLeftMargin() ? interval.getLeftMargin() : Double.NEGATIVE_INFINITY;
double right = interval.isSetRightMargin() ? interval.getRightMargin() : Double.POSITIVE_INFINITY;
boundaries.add(new Pair<>(left, right));
names.add(discBin.getBinValue());
boolean leftOpen;
boolean rightOpen;
int closure = discBin.getInterval().xgetClosure().enumValue().intValue();
/*
*static final int INT_OPEN_CLOSED = 1;
*static final int INT_OPEN_OPEN = 2;
*static final int INT_CLOSED_OPEN = 3;
*static final int INT_CLOSED_CLOSED = 4;
*/
switch(closure) {
case 1:
leftOpen = true;
rightOpen = false;
break;
case 2:
leftOpen = true;
rightOpen = true;
break;
case 3:
leftOpen = false;
rightOpen = true;
break;
case 4:
leftOpen = false;
rightOpen = false;
break;
default:
leftOpen = true;
rightOpen = false;
break;
}
boundariesOpen.add(new Pair<>(leftOpen, rightOpen));
}
boundariesMap.put(originalColumnName, boundaries);
namingMap.put(originalColumnName, names);
boundariesOpenMap.put(originalColumnName, boundariesOpen);
if (replacedColumnName.matches("(.*)" + originalColumnName + "\\*" + "(.*)")) {
appendMap.put(originalColumnName, null);
} else {
appendMap.put(originalColumnName, replacedColumnName);
}
}
DBBinnerMaps maps = new DBBinnerMaps(boundariesMap, boundariesOpenMap, namingMap, appendMap);
return maps;
}
use of org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin in project knime-core by knime.
the class PMMLBinningTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
final int num = m_columnToBins.size();
final DerivedField[] derivedFields = new DerivedField[num];
int i = 0;
for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
final Bin[] bins = entry.getValue();
final DerivedField df = DerivedField.Factory.newInstance();
final String name = entry.getKey();
/* The field name must be retrieved before creating a new derived
* name for this derived field as the map only contains the
* current mapping. */
final String fieldName = m_mapper.getDerivedFieldName(name);
final Discretize dis = df.addNewDiscretize();
dis.setField(fieldName);
final String derivedName = m_columnToAppend.get(name);
if (derivedName != null) {
df.setName(derivedName);
} else {
df.setName(m_mapper.createDerivedFieldName(name));
df.setDisplayName(name);
}
df.setOptype(OPTYPE.CATEGORICAL);
df.setDataType(DATATYPE.STRING);
for (Bin bin : bins) {
final NumericBin knimeBin = (NumericBin) bin;
final boolean leftOpen = knimeBin.isLeftOpen();
final boolean rightOpen = knimeBin.isRightOpen();
final double leftValue = knimeBin.getLeftValue();
final double rightValue = knimeBin.getRightValue();
final DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
pmmlBin.setBinValue(knimeBin.getBinName());
final Interval interval = pmmlBin.addNewInterval();
if (!Double.isInfinite(leftValue)) {
interval.setLeftMargin(leftValue);
}
if (!Double.isInfinite(rightValue)) {
interval.setRightMargin(rightValue);
}
if (leftOpen && rightOpen) {
interval.setClosure(Closure.OPEN_OPEN);
} else if (leftOpen && !rightOpen) {
interval.setClosure(Closure.OPEN_CLOSED);
} else if (!leftOpen && rightOpen) {
interval.setClosure(Closure.CLOSED_OPEN);
} else if (!leftOpen && !rightOpen) {
interval.setClosure(Closure.CLOSED_CLOSED);
}
}
derivedFields[i++] = df;
}
return derivedFields;
}
use of org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin in project knime-core by knime.
the class PMMLBinningTranslator method initializeFrom.
/**
* {@inheritDoc}
*/
@Override
public List<Integer> initializeFrom(final DerivedField[] derivedFields) {
m_mapper = new DerivedFieldMapper(derivedFields);
List<Integer> consumed = new ArrayList(derivedFields.length);
for (int i = 0; i < derivedFields.length; i++) {
DerivedField df = derivedFields[i];
if (!df.isSetDiscretize()) {
// only reading discretize entries other entries are skipped
continue;
}
consumed.add(i);
Discretize discretize = df.getDiscretize();
DiscretizeBin[] pmmlBins = discretize.getDiscretizeBinArray();
NumericBin[] knimeBins = new NumericBin[pmmlBins.length];
for (int j = 0; j < pmmlBins.length; j++) {
DiscretizeBin bin = pmmlBins[j];
String binName = bin.getBinValue();
Interval interval = bin.getInterval();
double leftValue = interval.getLeftMargin();
double rightValue = interval.getRightMargin();
Closure.Enum closure = interval.getClosure();
boolean leftOpen = true;
boolean rightOpen = true;
if (Closure.OPEN_CLOSED == closure) {
rightOpen = false;
} else if (Closure.CLOSED_OPEN == closure) {
leftOpen = false;
} else if (Closure.CLOSED_CLOSED == closure) {
leftOpen = false;
rightOpen = false;
}
knimeBins[j] = new NumericBin(binName, leftOpen, leftValue, rightOpen, rightValue);
}
/**
* This field contains the name of the column in KNIME that
* corresponds to the derived field in PMML. This is necessary if
* derived fields are defined on other derived fields and the
* columns in KNIME are replaced with the preprocessed values.
* In this case KNIME has to know the original names (e.g. A) while
* PMML references to A', A'' etc.
*/
String displayName = df.getDisplayName();
if (displayName != null) {
m_columnToBins.put(displayName, knimeBins);
m_columnToAppend.put(displayName, null);
} else if (df.getName() != null) {
String field = m_mapper.getColumnName(discretize.getField());
m_columnToBins.put(field, knimeBins);
m_columnToAppend.put(field, df.getName());
}
}
return consumed;
}
use of org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin in project knime-core by knime.
the class PMMLBinningTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
int num = m_columnToBins.size();
DerivedField[] derivedFields = new DerivedField[num];
int i = 0;
for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
Bin[] bins = entry.getValue();
DerivedField df = DerivedField.Factory.newInstance();
String name = entry.getKey();
/* The field name must be retrieved before creating a new derived
* name for this derived field as the map only contains the
* current mapping. */
String fieldName = m_mapper.getDerivedFieldName(name);
Discretize dis = df.addNewDiscretize();
dis.setField(fieldName);
String derivedName = m_columnToAppend.get(name);
if (derivedName != null) {
df.setName(derivedName);
} else {
df.setName(m_mapper.createDerivedFieldName(name));
df.setDisplayName(name);
}
df.setOptype(OPTYPE.CATEGORICAL);
df.setDataType(DATATYPE.STRING);
for (int j = 0; j < bins.length; j++) {
NumericBin knimeBin = (NumericBin) bins[j];
boolean leftOpen = knimeBin.isLeftOpen();
boolean rightOpen = knimeBin.isRightOpen();
double leftValue = knimeBin.getLeftValue();
double rightValue = knimeBin.getRightValue();
DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
pmmlBin.setBinValue(knimeBin.getBinName());
Interval interval = pmmlBin.addNewInterval();
if (!Double.isInfinite(leftValue)) {
interval.setLeftMargin(leftValue);
}
if (!Double.isInfinite(rightValue)) {
interval.setRightMargin(rightValue);
}
if (leftOpen && rightOpen) {
interval.setClosure(Closure.OPEN_OPEN);
} else if (leftOpen && !rightOpen) {
interval.setClosure(Closure.OPEN_CLOSED);
} else if (!leftOpen && rightOpen) {
interval.setClosure(Closure.CLOSED_OPEN);
} else if (!leftOpen && !rightOpen) {
interval.setClosure(Closure.CLOSED_CLOSED);
}
}
derivedFields[i++] = df;
}
return derivedFields;
}
use of org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin in project knime-core by knime.
the class PMMLBinningTranslator method initializeFrom.
@Override
public List<Integer> initializeFrom(final DerivedField[] derivedFields) {
m_mapper = new DerivedFieldMapper(derivedFields);
final List<Integer> consumed = new ArrayList<>(derivedFields.length);
for (int i = 0; i < derivedFields.length; i++) {
final DerivedField df = derivedFields[i];
if (!df.isSetDiscretize()) {
// only reading discretize entries other entries are skipped
continue;
}
consumed.add(i);
final Discretize discretize = df.getDiscretize();
@SuppressWarnings("deprecation") final DiscretizeBin[] pmmlBins = discretize.getDiscretizeBinArray();
final NumericBin[] knimeBins = new NumericBin[pmmlBins.length];
for (int j = 0; j < pmmlBins.length; j++) {
final DiscretizeBin bin = pmmlBins[j];
final String binName = bin.getBinValue();
final Interval interval = bin.getInterval();
final double leftValue = interval.getLeftMargin();
final double rightValue = interval.getRightMargin();
final Closure.Enum closure = interval.getClosure();
boolean leftOpen = true;
boolean rightOpen = true;
if (Closure.OPEN_CLOSED == closure) {
rightOpen = false;
} else if (Closure.CLOSED_OPEN == closure) {
leftOpen = false;
} else if (Closure.CLOSED_CLOSED == closure) {
leftOpen = false;
rightOpen = false;
}
knimeBins[j] = new NumericBin(binName, leftOpen, leftValue, rightOpen, rightValue);
}
/**
* This field contains the name of the column in KNIME that corresponds to the derived field in PMML. This
* is necessary if derived fields are defined on other derived fields and the columns in KNIME are replaced
* with the preprocessed values. In this case KNIME has to know the original names (e.g. A) while PMML
* references to A', A'' etc.
*/
final String displayName = df.getDisplayName();
if (displayName != null) {
m_columnToBins.put(displayName, knimeBins);
m_columnToAppend.put(displayName, null);
} else if (df.getName() != null) {
final String field = m_mapper.getColumnName(discretize.getField());
m_columnToBins.put(field, knimeBins);
m_columnToAppend.put(field, df.getName());
}
}
return consumed;
}
Aggregations