Search in sources :

Example 1 with Interval

use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.

the class DBAutoBinner method intoBinnerMaps.

/**
 * This method translates a {@link PMMLPortObject} into a {@link DBBinnerMaps} object which holds several Maps
 * needed to create a binner statement in {@link StatementManipulator}
 *
 * @param pmmlPortObject A {@link PMMLPortObject} containing all necessary information about binning operation
 * @param dataTableSpec Incoming {@link DataTableSpec}
 * @return a {@link DBBinnerMaps} object containing required parameters for {@link StatementManipulator}
 */
public static DBBinnerMaps intoBinnerMaps(final PMMLPortObject pmmlPortObject, final DataTableSpec dataTableSpec) {
    Map<String, List<Pair<Double, Double>>> boundariesMap = new LinkedHashMap<>();
    Map<String, List<Pair<Boolean, Boolean>>> boundariesOpenMap = new LinkedHashMap<>();
    Map<String, List<String>> namingMap = new LinkedHashMap<>();
    Map<String, String> appendMap = new LinkedHashMap<>();
    DerivedField[] derivedFields = pmmlPortObject.getDerivedFields();
    for (int i = 0; i < derivedFields.length; i++) {
        // each column has its own derived fields
        List<Pair<Double, Double>> boundaries = new ArrayList<>();
        List<String> names = new ArrayList<>();
        List<Pair<Boolean, Boolean>> boundariesOpen = new ArrayList<>();
        List<DiscretizeBin> discretizeBinList = derivedFields[i].getDiscretize().getDiscretizeBinList();
        String replacedColumnName = DataTableSpec.getUniqueColumnName(dataTableSpec, derivedFields[i].getName());
        String originalColumnName = derivedFields[i].getDiscretize().getField();
        for (DiscretizeBin discBin : discretizeBinList) {
            Interval interval = discBin.getInterval();
            double left = interval.isSetLeftMargin() ? interval.getLeftMargin() : Double.NEGATIVE_INFINITY;
            double right = interval.isSetRightMargin() ? interval.getRightMargin() : Double.POSITIVE_INFINITY;
            boundaries.add(new Pair<>(left, right));
            names.add(discBin.getBinValue());
            boolean leftOpen;
            boolean rightOpen;
            int closure = discBin.getInterval().xgetClosure().enumValue().intValue();
            /*
                 *static final int INT_OPEN_CLOSED = 1;
                 *static final int INT_OPEN_OPEN = 2;
                 *static final int INT_CLOSED_OPEN = 3;
                 *static final int INT_CLOSED_CLOSED = 4;
                 */
            switch(closure) {
                case 1:
                    leftOpen = true;
                    rightOpen = false;
                    break;
                case 2:
                    leftOpen = true;
                    rightOpen = true;
                    break;
                case 3:
                    leftOpen = false;
                    rightOpen = true;
                    break;
                case 4:
                    leftOpen = false;
                    rightOpen = false;
                    break;
                default:
                    leftOpen = true;
                    rightOpen = false;
                    break;
            }
            boundariesOpen.add(new Pair<>(leftOpen, rightOpen));
        }
        boundariesMap.put(originalColumnName, boundaries);
        namingMap.put(originalColumnName, names);
        boundariesOpenMap.put(originalColumnName, boundariesOpen);
        if (replacedColumnName.matches("(.*)" + originalColumnName + "\\*" + "(.*)")) {
            appendMap.put(originalColumnName, null);
        } else {
            appendMap.put(originalColumnName, replacedColumnName);
        }
    }
    DBBinnerMaps maps = new DBBinnerMaps(boundariesMap, boundariesOpenMap, namingMap, appendMap);
    return maps;
}
Also used : ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) PMMLDiscretizeBin(org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin) DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) Pair(org.knime.core.util.Pair) PMMLInterval(org.knime.base.node.preproc.autobinner.pmml.PMMLInterval) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 2 with Interval

use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.

the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.

/**
 * @param pmmlDoc the PMML document to analyze
 * @param colSpecs the list to add the data column specs to
 */
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
    DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
    for (DataField dataField : dict.getDataFieldArray()) {
        String name = dataField.getName();
        DataType dataType = getKNIMEDataType(dataField.getDataType());
        DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
        DataColumnDomain domain = null;
        if (dataType.isCompatible(NominalValue.class)) {
            Value[] valueArray = dataField.getValueArray();
            DataCell[] cells;
            if (DataType.getType(StringCell.class).equals(dataType)) {
                if (dataField.getIntervalArray().length > 0) {
                    throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
                }
                cells = new StringCell[valueArray.length];
                if (valueArray.length > 0) {
                    for (int j = 0; j < cells.length; j++) {
                        cells[j] = new StringCell(valueArray[j].getValue());
                    }
                }
                domain = new DataColumnDomainCreator(cells).createDomain();
            }
        } else if (dataType.isCompatible(DoubleValue.class)) {
            Double leftMargin = null;
            Double rightMargin = null;
            Interval[] intervalArray = dataField.getIntervalArray();
            if (intervalArray != null && intervalArray.length > 0) {
                Interval interval = dataField.getIntervalArray(0);
                leftMargin = interval.getLeftMargin();
                rightMargin = interval.getRightMargin();
            } else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
                // try to derive the bounds from the values
                Value[] valueArray = dataField.getValueArray();
                List<Double> values = new ArrayList<Double>();
                for (int j = 0; j < valueArray.length; j++) {
                    String value = "";
                    try {
                        value = valueArray[j].getValue();
                        values.add(Double.parseDouble(value));
                    } catch (Exception e) {
                        throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
                    }
                }
                leftMargin = Collections.min(values);
                rightMargin = Collections.max(values);
            }
            if (leftMargin != null && rightMargin != null) {
                // set the bounds of the domain if available
                DataCell lowerBound = null;
                DataCell upperBound = null;
                if (DataType.getType(IntCell.class).equals(dataType)) {
                    lowerBound = new IntCell(leftMargin.intValue());
                    upperBound = new IntCell(rightMargin.intValue());
                } else if (DataType.getType(DoubleCell.class).equals(dataType)) {
                    lowerBound = new DoubleCell(leftMargin);
                    upperBound = new DoubleCell(rightMargin);
                }
                domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
            } else {
                domain = new DataColumnDomainCreator().createDomain();
            }
        }
        specCreator.setDomain(domain);
        colSpecs.add(specCreator.createSpec());
        m_dictFields.add(name);
    }
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) IntCell(org.knime.core.data.def.IntCell) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataField(org.dmg.pmml.DataFieldDocument.DataField) StringCell(org.knime.core.data.def.StringCell) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BooleanValue(org.knime.core.data.BooleanValue) IntValue(org.knime.core.data.IntValue) Value(org.dmg.pmml.ValueDocument.Value) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 3 with Interval

use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.

the class PMMLBinningTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    final int num = m_columnToBins.size();
    final DerivedField[] derivedFields = new DerivedField[num];
    int i = 0;
    for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
        final Bin[] bins = entry.getValue();
        final DerivedField df = DerivedField.Factory.newInstance();
        final String name = entry.getKey();
        /* The field name must be retrieved before creating a new derived
             * name for this derived field as the map only contains the
             * current mapping. */
        final String fieldName = m_mapper.getDerivedFieldName(name);
        final Discretize dis = df.addNewDiscretize();
        dis.setField(fieldName);
        final String derivedName = m_columnToAppend.get(name);
        if (derivedName != null) {
            df.setName(derivedName);
        } else {
            df.setName(m_mapper.createDerivedFieldName(name));
            df.setDisplayName(name);
        }
        df.setOptype(OPTYPE.CATEGORICAL);
        df.setDataType(DATATYPE.STRING);
        for (Bin bin : bins) {
            final NumericBin knimeBin = (NumericBin) bin;
            final boolean leftOpen = knimeBin.isLeftOpen();
            final boolean rightOpen = knimeBin.isRightOpen();
            final double leftValue = knimeBin.getLeftValue();
            final double rightValue = knimeBin.getRightValue();
            final DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
            pmmlBin.setBinValue(knimeBin.getBinName());
            final Interval interval = pmmlBin.addNewInterval();
            if (!Double.isInfinite(leftValue)) {
                interval.setLeftMargin(leftValue);
            }
            if (!Double.isInfinite(rightValue)) {
                interval.setRightMargin(rightValue);
            }
            if (leftOpen && rightOpen) {
                interval.setClosure(Closure.OPEN_OPEN);
            } else if (leftOpen && !rightOpen) {
                interval.setClosure(Closure.OPEN_CLOSED);
            } else if (!leftOpen && rightOpen) {
                interval.setClosure(Closure.CLOSED_OPEN);
            } else if (!leftOpen && !rightOpen) {
                interval.setClosure(Closure.CLOSED_CLOSED);
            }
        }
        derivedFields[i++] = df;
    }
    return derivedFields;
}
Also used : DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) TreeMap(java.util.TreeMap) Map(java.util.Map) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 4 with Interval

use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.

the class PMMLBinningTranslator method initializeFrom.

/**
 * {@inheritDoc}
 */
@Override
public List<Integer> initializeFrom(final DerivedField[] derivedFields) {
    m_mapper = new DerivedFieldMapper(derivedFields);
    List<Integer> consumed = new ArrayList(derivedFields.length);
    for (int i = 0; i < derivedFields.length; i++) {
        DerivedField df = derivedFields[i];
        if (!df.isSetDiscretize()) {
            // only reading discretize entries other entries are skipped
            continue;
        }
        consumed.add(i);
        Discretize discretize = df.getDiscretize();
        DiscretizeBin[] pmmlBins = discretize.getDiscretizeBinArray();
        NumericBin[] knimeBins = new NumericBin[pmmlBins.length];
        for (int j = 0; j < pmmlBins.length; j++) {
            DiscretizeBin bin = pmmlBins[j];
            String binName = bin.getBinValue();
            Interval interval = bin.getInterval();
            double leftValue = interval.getLeftMargin();
            double rightValue = interval.getRightMargin();
            Closure.Enum closure = interval.getClosure();
            boolean leftOpen = true;
            boolean rightOpen = true;
            if (Closure.OPEN_CLOSED == closure) {
                rightOpen = false;
            } else if (Closure.CLOSED_OPEN == closure) {
                leftOpen = false;
            } else if (Closure.CLOSED_CLOSED == closure) {
                leftOpen = false;
                rightOpen = false;
            }
            knimeBins[j] = new NumericBin(binName, leftOpen, leftValue, rightOpen, rightValue);
        }
        /**
         * This field contains the name of the column in KNIME that
         * corresponds to the derived field in PMML. This is necessary if
         * derived fields are defined on other derived fields and the
         * columns in KNIME are replaced with the preprocessed values.
         * In this case KNIME has to know the original names (e.g. A) while
         * PMML references to A', A'' etc.
         */
        String displayName = df.getDisplayName();
        if (displayName != null) {
            m_columnToBins.put(displayName, knimeBins);
            m_columnToAppend.put(displayName, null);
        } else if (df.getName() != null) {
            String field = m_mapper.getColumnName(discretize.getField());
            m_columnToBins.put(field, knimeBins);
            m_columnToAppend.put(field, df.getName());
        }
    }
    return consumed;
}
Also used : Closure(org.dmg.pmml.IntervalDocument.Interval.Closure) ArrayList(java.util.ArrayList) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 5 with Interval

use of org.dmg.pmml.IntervalDocument.Interval in project knime-core by knime.

the class PMMLBinningTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    int num = m_columnToBins.size();
    DerivedField[] derivedFields = new DerivedField[num];
    int i = 0;
    for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
        Bin[] bins = entry.getValue();
        DerivedField df = DerivedField.Factory.newInstance();
        String name = entry.getKey();
        /* The field name must be retrieved before creating a new derived
             * name for this derived field as the map only contains the
             * current mapping. */
        String fieldName = m_mapper.getDerivedFieldName(name);
        Discretize dis = df.addNewDiscretize();
        dis.setField(fieldName);
        String derivedName = m_columnToAppend.get(name);
        if (derivedName != null) {
            df.setName(derivedName);
        } else {
            df.setName(m_mapper.createDerivedFieldName(name));
            df.setDisplayName(name);
        }
        df.setOptype(OPTYPE.CATEGORICAL);
        df.setDataType(DATATYPE.STRING);
        for (int j = 0; j < bins.length; j++) {
            NumericBin knimeBin = (NumericBin) bins[j];
            boolean leftOpen = knimeBin.isLeftOpen();
            boolean rightOpen = knimeBin.isRightOpen();
            double leftValue = knimeBin.getLeftValue();
            double rightValue = knimeBin.getRightValue();
            DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
            pmmlBin.setBinValue(knimeBin.getBinName());
            Interval interval = pmmlBin.addNewInterval();
            if (!Double.isInfinite(leftValue)) {
                interval.setLeftMargin(leftValue);
            }
            if (!Double.isInfinite(rightValue)) {
                interval.setRightMargin(rightValue);
            }
            if (leftOpen && rightOpen) {
                interval.setClosure(Closure.OPEN_OPEN);
            } else if (leftOpen && !rightOpen) {
                interval.setClosure(Closure.OPEN_CLOSED);
            } else if (!leftOpen && rightOpen) {
                interval.setClosure(Closure.CLOSED_OPEN);
            } else if (!leftOpen && !rightOpen) {
                interval.setClosure(Closure.CLOSED_CLOSED);
            }
        }
        derivedFields[i++] = df;
    }
    return derivedFields;
}
Also used : DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) Bin(org.knime.base.node.preproc.pmml.binner.BinnerColumnFactory.Bin) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) TreeMap(java.util.TreeMap) Map(java.util.Map) Interval(org.dmg.pmml.IntervalDocument.Interval)

Aggregations

Interval (org.dmg.pmml.IntervalDocument.Interval)7 DerivedField (org.dmg.pmml.DerivedFieldDocument.DerivedField)5 DiscretizeBin (org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin)5 ArrayList (java.util.ArrayList)4 Discretize (org.dmg.pmml.DiscretizeDocument.Discretize)4 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 DataDictionary (org.dmg.pmml.DataDictionaryDocument.DataDictionary)2 DataField (org.dmg.pmml.DataFieldDocument.DataField)2 Closure (org.dmg.pmml.IntervalDocument.Interval.Closure)2 Value (org.dmg.pmml.ValueDocument.Value)2 BooleanValue (org.knime.core.data.BooleanValue)2 DataCell (org.knime.core.data.DataCell)2 DataType (org.knime.core.data.DataType)2 DoubleValue (org.knime.core.data.DoubleValue)2 IntValue (org.knime.core.data.IntValue)2 NominalValue (org.knime.core.data.NominalValue)2 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)2 LinkedHashMap (java.util.LinkedHashMap)1 LinkedList (java.util.LinkedList)1