use of org.knime.base.node.preproc.autobinner.pmml.PMMLInterval in project knime-core by knime.
the class AutoBinner method createBins.
private Map<String, List<PMMLDiscretizeBin>> createBins(final Map<String, double[]> edgesMap) {
BinnerNumberFormat formatter = new BinnerNumberFormat();
Map<String, List<PMMLDiscretizeBin>> binMap = new HashMap<String, List<PMMLDiscretizeBin>>();
for (String target : m_settings.getTargetColumn()) {
if (null != edgesMap && null != edgesMap.get(target) && edgesMap.get(target).length > 1) {
double[] edges = edgesMap.get(target);
// Names of the bins
String[] binNames = new String[edges.length - 1];
if (m_settings.getBinNaming().equals(BinNaming.numbered)) {
for (int i = 0; i < binNames.length; i++) {
binNames[i] = "Bin " + (i + 1);
}
} else if (m_settings.getBinNaming().equals(BinNaming.edges)) {
binNames[0] = "[" + formatter.format(edges[0]) + "," + formatter.format(edges[1]) + "]";
for (int i = 1; i < binNames.length; i++) {
binNames[i] = "(" + formatter.format(edges[i]) + "," + formatter.format(edges[i + 1]) + "]";
}
} else {
// BinNaming.midpoints
binNames[0] = formatter.format((edges[1] - edges[0]) / 2 + edges[0]);
for (int i = 1; i < binNames.length; i++) {
binNames[i] = formatter.format((edges[i + 1] - edges[i]) / 2 + edges[i]);
}
}
List<PMMLDiscretizeBin> bins = new ArrayList<PMMLDiscretizeBin>();
bins.add(new PMMLDiscretizeBin(binNames[0], Arrays.asList(new PMMLInterval(edges[0], edges[1], Closure.closedClosed))));
for (int i = 1; i < binNames.length; i++) {
bins.add(new PMMLDiscretizeBin(binNames[i], Arrays.asList(new PMMLInterval(edges[i], edges[i + 1], Closure.openClosed))));
}
binMap.put(target, bins);
} else {
binMap.put(target, new ArrayList<PMMLDiscretizeBin>());
}
}
return binMap;
}
use of org.knime.base.node.preproc.autobinner.pmml.PMMLInterval in project knime-core by knime.
the class DBAutoBinner method translate.
/**
* This method translates a {@link PMMLPreprocDiscretize} object into {@link PMMLPortObject}.
*
* @param pmmlDiscretize {@link PMMLPreprocDiscretize} object
* @param dataTableSpec {@link DataTableSpec} if incoming {@link BufferedDataTable}
* @return a {@link PMMLPortObject} containing required parameters for binning operation
*/
public static PMMLPortObject translate(final PMMLPreprocDiscretize pmmlDiscretize, final DataTableSpec dataTableSpec) {
final Map<String, Bin[]> columnToBins = new HashMap<>();
final Map<String, String> columnToAppend = new HashMap<>();
List<String> replacedColumnNames = pmmlDiscretize.getConfiguration().getNames();
for (String replacedColumnName : replacedColumnNames) {
PMMLDiscretize discretize = pmmlDiscretize.getConfiguration().getDiscretize(replacedColumnName);
List<PMMLDiscretizeBin> bins = discretize.getBins();
String originalColumnName = discretize.getField();
if (replacedColumnName.equals(originalColumnName)) {
// wenn replaced, dann nicht anhängen
columnToAppend.put(originalColumnName, null);
} else {
// nicht replaced -> anhängen
columnToAppend.put(originalColumnName, replacedColumnName);
}
NumericBin[] numericBin = new NumericBin[bins.size()];
int counter = 0;
for (PMMLDiscretizeBin bin : bins) {
String binName = bin.getBinValue();
List<PMMLInterval> intervals = bin.getIntervals();
boolean leftOpen = false;
boolean rightOpen = false;
double leftMargin = 0;
double rightMargin = 0;
// always returns only one interval
for (PMMLInterval interval : intervals) {
Closure closure = interval.getClosure();
switch(closure) {
case openClosed:
leftOpen = true;
rightOpen = false;
break;
case openOpen:
leftOpen = true;
rightOpen = true;
break;
case closedOpen:
leftOpen = false;
rightOpen = true;
case closedClosed:
leftOpen = false;
rightOpen = false;
break;
default:
leftOpen = true;
rightOpen = false;
break;
}
leftMargin = interval.getLeftMargin();
rightMargin = interval.getRightMargin();
}
numericBin[counter] = new NumericBin(binName, leftOpen, leftMargin, rightOpen, rightMargin);
counter++;
}
columnToBins.put(originalColumnName, numericBin);
}
// ColumnRearranger createColReg = createColReg(dataTableSpec, columnToBins, columnToAppended);
DataTableSpec newDataTableSpec = createNewDataTableSpec(dataTableSpec, columnToAppend);
PMMLPortObjectSpecCreator pmmlSpecCreator = new PMMLPortObjectSpecCreator(newDataTableSpec);
PMMLPortObject pmmlPortObject = new PMMLPortObject(pmmlSpecCreator.createSpec(), null, newDataTableSpec);
PMMLBinningTranslator trans = new PMMLBinningTranslator(columnToBins, columnToAppend, new DerivedFieldMapper(pmmlPortObject));
TransformationDictionary exportToTransDict = trans.exportToTransDict();
pmmlPortObject.addGlobalTransformations(exportToTransDict);
return pmmlPortObject;
}
use of org.knime.base.node.preproc.autobinner.pmml.PMMLInterval in project knime-core by knime.
the class AutoBinner method createBins.
private Map<String, List<PMMLDiscretizeBin>> createBins(final Map<String, double[]> edgesMap) {
Map<String, List<PMMLDiscretizeBin>> binMap = new HashMap<String, List<PMMLDiscretizeBin>>();
for (String target : m_settings.getTargetColumn()) {
if (null != edgesMap && null != edgesMap.get(target) && edgesMap.get(target).length > 1) {
double[] edges = edgesMap.get(target);
// Names of the bins
String[] binNames = new String[edges.length - 1];
if (m_settings.getBinNaming().equals(BinNaming.numbered)) {
for (int i = 0; i < binNames.length; i++) {
binNames[i] = "Bin " + (i + 1);
}
} else {
// BinNaming.edges
binNames[0] = "[" + BinnerNumberFormat.format(edges[0]) + "," + BinnerNumberFormat.format(edges[1]) + "]";
for (int i = 1; i < binNames.length; i++) {
binNames[i] = "(" + BinnerNumberFormat.format(edges[i]) + "," + BinnerNumberFormat.format(edges[i + 1]) + "]";
}
}
List<PMMLDiscretizeBin> bins = new ArrayList<PMMLDiscretizeBin>();
bins.add(new PMMLDiscretizeBin(binNames[0], Arrays.asList(new PMMLInterval(edges[0], edges[1], Closure.closedClosed))));
for (int i = 1; i < binNames.length; i++) {
bins.add(new PMMLDiscretizeBin(binNames[i], Arrays.asList(new PMMLInterval(edges[i], edges[i + 1], Closure.openClosed))));
}
binMap.put(target, bins);
} else {
binMap.put(target, new ArrayList<PMMLDiscretizeBin>());
}
}
return binMap;
}
use of org.knime.base.node.preproc.autobinner.pmml.PMMLInterval in project knime-core by knime.
the class AutoBinner method createBins.
private Map<String, List<PMMLDiscretizeBin>> createBins(final Map<String, double[]> edgesMap) {
BinnerNumberFormat formatter = new BinnerNumberFormat();
Map<String, List<PMMLDiscretizeBin>> binMap = new HashMap<String, List<PMMLDiscretizeBin>>();
for (String target : m_included) {
if (null != edgesMap && null != edgesMap.get(target) && edgesMap.get(target).length > 1) {
double[] edges = edgesMap.get(target);
// Names of the bins
String[] binNames = new String[edges.length - 1];
if (m_settings.getBinNaming().equals(BinNaming.numbered)) {
for (int i = 0; i < binNames.length; i++) {
binNames[i] = "Bin " + (i + 1);
}
} else if (m_settings.getBinNaming().equals(BinNaming.edges)) {
binNames[0] = "[" + formatter.format(edges[0]) + "," + formatter.format(edges[1]) + "]";
for (int i = 1; i < binNames.length; i++) {
binNames[i] = "(" + formatter.format(edges[i]) + "," + formatter.format(edges[i + 1]) + "]";
}
} else {
// BinNaming.midpoints
binNames[0] = formatter.format((edges[1] - edges[0]) / 2 + edges[0]);
for (int i = 1; i < binNames.length; i++) {
binNames[i] = formatter.format((edges[i + 1] - edges[i]) / 2 + edges[i]);
}
}
List<PMMLDiscretizeBin> bins = new ArrayList<PMMLDiscretizeBin>();
bins.add(new PMMLDiscretizeBin(binNames[0], Arrays.asList(new PMMLInterval(edges[0], edges[1], Closure.closedClosed))));
for (int i = 1; i < binNames.length; i++) {
bins.add(new PMMLDiscretizeBin(binNames[i], Arrays.asList(new PMMLInterval(edges[i], edges[i + 1], Closure.openClosed))));
}
binMap.put(target, bins);
} else {
binMap.put(target, new ArrayList<PMMLDiscretizeBin>());
}
}
return binMap;
}
Aggregations