Search in sources :

Example 1 with PMMLDiscretizeBin

use of org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin in project knime-core by knime.

the class AutoBinner method createBins.

private Map<String, List<PMMLDiscretizeBin>> createBins(final Map<String, double[]> edgesMap) {
    BinnerNumberFormat formatter = new BinnerNumberFormat();
    Map<String, List<PMMLDiscretizeBin>> binMap = new HashMap<String, List<PMMLDiscretizeBin>>();
    for (String target : m_settings.getTargetColumn()) {
        if (null != edgesMap && null != edgesMap.get(target) && edgesMap.get(target).length > 1) {
            double[] edges = edgesMap.get(target);
            // Names of the bins
            String[] binNames = new String[edges.length - 1];
            if (m_settings.getBinNaming().equals(BinNaming.numbered)) {
                for (int i = 0; i < binNames.length; i++) {
                    binNames[i] = "Bin " + (i + 1);
                }
            } else if (m_settings.getBinNaming().equals(BinNaming.edges)) {
                binNames[0] = "[" + formatter.format(edges[0]) + "," + formatter.format(edges[1]) + "]";
                for (int i = 1; i < binNames.length; i++) {
                    binNames[i] = "(" + formatter.format(edges[i]) + "," + formatter.format(edges[i + 1]) + "]";
                }
            } else {
                // BinNaming.midpoints
                binNames[0] = formatter.format((edges[1] - edges[0]) / 2 + edges[0]);
                for (int i = 1; i < binNames.length; i++) {
                    binNames[i] = formatter.format((edges[i + 1] - edges[i]) / 2 + edges[i]);
                }
            }
            List<PMMLDiscretizeBin> bins = new ArrayList<PMMLDiscretizeBin>();
            bins.add(new PMMLDiscretizeBin(binNames[0], Arrays.asList(new PMMLInterval(edges[0], edges[1], Closure.closedClosed))));
            for (int i = 1; i < binNames.length; i++) {
                bins.add(new PMMLDiscretizeBin(binNames[i], Arrays.asList(new PMMLInterval(edges[i], edges[i + 1], Closure.openClosed))));
            }
            binMap.put(target, bins);
        } else {
            binMap.put(target, new ArrayList<PMMLDiscretizeBin>());
        }
    }
    return binMap;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) PMMLDiscretizeBin(org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin) PMMLInterval(org.knime.base.node.preproc.autobinner.pmml.PMMLInterval) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with PMMLDiscretizeBin

use of org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin in project knime-core by knime.

the class DBAutoBinner method translate.

/**
 * This method translates a {@link PMMLPreprocDiscretize} object into {@link PMMLPortObject}.
 *
 * @param pmmlDiscretize {@link PMMLPreprocDiscretize} object
 * @param dataTableSpec {@link DataTableSpec} if incoming {@link BufferedDataTable}
 * @return a {@link PMMLPortObject} containing required parameters for binning operation
 */
public static PMMLPortObject translate(final PMMLPreprocDiscretize pmmlDiscretize, final DataTableSpec dataTableSpec) {
    final Map<String, Bin[]> columnToBins = new HashMap<>();
    final Map<String, String> columnToAppend = new HashMap<>();
    List<String> replacedColumnNames = pmmlDiscretize.getConfiguration().getNames();
    for (String replacedColumnName : replacedColumnNames) {
        PMMLDiscretize discretize = pmmlDiscretize.getConfiguration().getDiscretize(replacedColumnName);
        List<PMMLDiscretizeBin> bins = discretize.getBins();
        String originalColumnName = discretize.getField();
        if (replacedColumnName.equals(originalColumnName)) {
            // wenn replaced, dann nicht anhängen
            columnToAppend.put(originalColumnName, null);
        } else {
            // nicht replaced -> anhängen
            columnToAppend.put(originalColumnName, replacedColumnName);
        }
        NumericBin[] numericBin = new NumericBin[bins.size()];
        int counter = 0;
        for (PMMLDiscretizeBin bin : bins) {
            String binName = bin.getBinValue();
            List<PMMLInterval> intervals = bin.getIntervals();
            boolean leftOpen = false;
            boolean rightOpen = false;
            double leftMargin = 0;
            double rightMargin = 0;
            // always returns only one interval
            for (PMMLInterval interval : intervals) {
                Closure closure = interval.getClosure();
                switch(closure) {
                    case openClosed:
                        leftOpen = true;
                        rightOpen = false;
                        break;
                    case openOpen:
                        leftOpen = true;
                        rightOpen = true;
                        break;
                    case closedOpen:
                        leftOpen = false;
                        rightOpen = true;
                    case closedClosed:
                        leftOpen = false;
                        rightOpen = false;
                        break;
                    default:
                        leftOpen = true;
                        rightOpen = false;
                        break;
                }
                leftMargin = interval.getLeftMargin();
                rightMargin = interval.getRightMargin();
            }
            numericBin[counter] = new NumericBin(binName, leftOpen, leftMargin, rightOpen, rightMargin);
            counter++;
        }
        columnToBins.put(originalColumnName, numericBin);
    }
    // ColumnRearranger createColReg = createColReg(dataTableSpec, columnToBins, columnToAppended);
    DataTableSpec newDataTableSpec = createNewDataTableSpec(dataTableSpec, columnToAppend);
    PMMLPortObjectSpecCreator pmmlSpecCreator = new PMMLPortObjectSpecCreator(newDataTableSpec);
    PMMLPortObject pmmlPortObject = new PMMLPortObject(pmmlSpecCreator.createSpec(), null, newDataTableSpec);
    PMMLBinningTranslator trans = new PMMLBinningTranslator(columnToBins, columnToAppend, new DerivedFieldMapper(pmmlPortObject));
    TransformationDictionary exportToTransDict = trans.exportToTransDict();
    pmmlPortObject.addGlobalTransformations(exportToTransDict);
    return pmmlPortObject;
}
Also used : NumericBin(org.knime.base.node.preproc.pmml.binner.NumericBin) DataTableSpec(org.knime.core.data.DataTableSpec) Closure(org.knime.base.node.preproc.autobinner.pmml.PMMLInterval.Closure) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary) PMMLDiscretizeBin(org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin) PMMLInterval(org.knime.base.node.preproc.autobinner.pmml.PMMLInterval) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMMLBinningTranslator(org.knime.base.node.preproc.pmml.binner.PMMLBinningTranslator) PMMLDiscretize(org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretize) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 3 with PMMLDiscretizeBin

use of org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin in project knime-core by knime.

the class AutoBinner method createBins.

private Map<String, List<PMMLDiscretizeBin>> createBins(final Map<String, double[]> edgesMap) {
    Map<String, List<PMMLDiscretizeBin>> binMap = new HashMap<String, List<PMMLDiscretizeBin>>();
    for (String target : m_settings.getTargetColumn()) {
        if (null != edgesMap && null != edgesMap.get(target) && edgesMap.get(target).length > 1) {
            double[] edges = edgesMap.get(target);
            // Names of the bins
            String[] binNames = new String[edges.length - 1];
            if (m_settings.getBinNaming().equals(BinNaming.numbered)) {
                for (int i = 0; i < binNames.length; i++) {
                    binNames[i] = "Bin " + (i + 1);
                }
            } else {
                // BinNaming.edges
                binNames[0] = "[" + BinnerNumberFormat.format(edges[0]) + "," + BinnerNumberFormat.format(edges[1]) + "]";
                for (int i = 1; i < binNames.length; i++) {
                    binNames[i] = "(" + BinnerNumberFormat.format(edges[i]) + "," + BinnerNumberFormat.format(edges[i + 1]) + "]";
                }
            }
            List<PMMLDiscretizeBin> bins = new ArrayList<PMMLDiscretizeBin>();
            bins.add(new PMMLDiscretizeBin(binNames[0], Arrays.asList(new PMMLInterval(edges[0], edges[1], Closure.closedClosed))));
            for (int i = 1; i < binNames.length; i++) {
                bins.add(new PMMLDiscretizeBin(binNames[i], Arrays.asList(new PMMLInterval(edges[i], edges[i + 1], Closure.openClosed))));
            }
            binMap.put(target, bins);
        } else {
            binMap.put(target, new ArrayList<PMMLDiscretizeBin>());
        }
    }
    return binMap;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) PMMLDiscretizeBin(org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin) PMMLInterval(org.knime.base.node.preproc.autobinner.pmml.PMMLInterval)

Example 4 with PMMLDiscretizeBin

use of org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin in project knime-core by knime.

the class AutoBinner method createBins.

private Map<String, List<PMMLDiscretizeBin>> createBins(final Map<String, double[]> edgesMap) {
    BinnerNumberFormat formatter = new BinnerNumberFormat();
    Map<String, List<PMMLDiscretizeBin>> binMap = new HashMap<String, List<PMMLDiscretizeBin>>();
    for (String target : m_included) {
        if (null != edgesMap && null != edgesMap.get(target) && edgesMap.get(target).length > 1) {
            double[] edges = edgesMap.get(target);
            // Names of the bins
            String[] binNames = new String[edges.length - 1];
            if (m_settings.getBinNaming().equals(BinNaming.numbered)) {
                for (int i = 0; i < binNames.length; i++) {
                    binNames[i] = "Bin " + (i + 1);
                }
            } else if (m_settings.getBinNaming().equals(BinNaming.edges)) {
                binNames[0] = "[" + formatter.format(edges[0]) + "," + formatter.format(edges[1]) + "]";
                for (int i = 1; i < binNames.length; i++) {
                    binNames[i] = "(" + formatter.format(edges[i]) + "," + formatter.format(edges[i + 1]) + "]";
                }
            } else {
                // BinNaming.midpoints
                binNames[0] = formatter.format((edges[1] - edges[0]) / 2 + edges[0]);
                for (int i = 1; i < binNames.length; i++) {
                    binNames[i] = formatter.format((edges[i + 1] - edges[i]) / 2 + edges[i]);
                }
            }
            List<PMMLDiscretizeBin> bins = new ArrayList<PMMLDiscretizeBin>();
            bins.add(new PMMLDiscretizeBin(binNames[0], Arrays.asList(new PMMLInterval(edges[0], edges[1], Closure.closedClosed))));
            for (int i = 1; i < binNames.length; i++) {
                bins.add(new PMMLDiscretizeBin(binNames[i], Arrays.asList(new PMMLInterval(edges[i], edges[i + 1], Closure.openClosed))));
            }
            binMap.put(target, bins);
        } else {
            binMap.put(target, new ArrayList<PMMLDiscretizeBin>());
        }
    }
    return binMap;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) PMMLDiscretizeBin(org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin) PMMLInterval(org.knime.base.node.preproc.autobinner.pmml.PMMLInterval) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

HashMap (java.util.HashMap)4 LinkedHashMap (java.util.LinkedHashMap)4 PMMLDiscretizeBin (org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin)4 PMMLInterval (org.knime.base.node.preproc.autobinner.pmml.PMMLInterval)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 TransformationDictionary (org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary)1 PMMLDiscretize (org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretize)1 Closure (org.knime.base.node.preproc.autobinner.pmml.PMMLInterval.Closure)1 NumericBin (org.knime.base.node.preproc.pmml.binner.NumericBin)1 PMMLBinningTranslator (org.knime.base.node.preproc.pmml.binner.PMMLBinningTranslator)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)1 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)1 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)1