Search in sources :

Example 41 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class PolyRegLearnerNodeModel method getCellFactory.

private CellFactory getCellFactory(final int dependentIndex) {
    final int degree = m_settings.getDegree();
    return new CellFactory() {

        @Override
        public DataCell[] getCells(final DataRow row) {
            double sum = m_betas[0];
            int betaCount = 1;
            double y = 0;
            for (int col = 0; col < row.getNumCells(); col++) {
                if ((col != dependentIndex) && m_colSelected[col]) {
                    final double value = ((DoubleValue) row.getCell(col)).getDoubleValue();
                    double poly = 1;
                    for (int d = 1; d <= degree; d++) {
                        poly *= value;
                        sum += m_betas[betaCount++] * poly;
                    }
                } else if (col == dependentIndex) {
                    y = ((DoubleValue) row.getCell(col)).getDoubleValue();
                }
            }
            double err = Math.abs(sum - y);
            m_squaredError += err * err;
            return new DataCell[] { new DoubleCell(sum), new DoubleCell(err) };
        }

        @Override
        public DataColumnSpec[] getColumnSpecs() {
            DataColumnSpecCreator crea = new DataColumnSpecCreator("PolyReg prediction", DoubleCell.TYPE);
            DataColumnSpec col1 = crea.createSpec();
            crea = new DataColumnSpecCreator("Prediction Error", DoubleCell.TYPE);
            DataColumnSpec col2 = crea.createSpec();
            return new DataColumnSpec[] { col1, col2 };
        }

        @Override
        public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor execMon) {
        // do nothing
        }
    };
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleValue(org.knime.core.data.DoubleValue) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CellFactory(org.knime.core.data.container.CellFactory) DataRow(org.knime.core.data.DataRow)

Example 42 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class RegressionPredictorNodeModel method createRearranger.

private ColumnRearranger createRearranger(final DataTableSpec inSpec, final PMMLPortObjectSpec regModelSpec, final PMMLRegressionTranslator regModel) throws InvalidSettingsException {
    if (regModelSpec == null) {
        throw new InvalidSettingsException("No input");
    }
    // exclude last (response column)
    String targetCol = "Response";
    for (String s : regModelSpec.getTargetFields()) {
        targetCol = s;
        break;
    }
    final List<String> learnFields;
    if (regModel != null) {
        RegressionTable regTable = regModel.getRegressionTable();
        learnFields = new ArrayList<String>();
        for (NumericPredictor p : regTable.getVariables()) {
            learnFields.add(p.getName());
        }
    } else {
        learnFields = new ArrayList<String>(regModelSpec.getLearningFields());
    }
    final int[] colIndices = new int[learnFields.size()];
    int k = 0;
    for (String learnCol : learnFields) {
        int index = inSpec.findColumnIndex(learnCol);
        if (index < 0) {
            throw new InvalidSettingsException("Missing column for " + "regressor variable : \"" + learnCol + "\"");
        }
        DataColumnSpec regressor = inSpec.getColumnSpec(index);
        String name = regressor.getName();
        DataColumnSpec col = inSpec.getColumnSpec(index);
        if (!col.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Incompatible type of " + "column \"" + name + "\": " + col.getType());
        }
        colIndices[k++] = index;
    }
    // try to use some smart naming scheme for the append column
    String oldName = targetCol;
    if (inSpec.containsName(oldName) && !oldName.toLowerCase().endsWith("(prediction)")) {
        oldName = oldName + " (prediction)";
    }
    String newColName = DataTableSpec.getUniqueColumnName(inSpec, oldName);
    DataColumnSpec newCol = new DataColumnSpecCreator(newColName, DoubleCell.TYPE).createSpec();
    SingleCellFactory fac = new SingleCellFactory(newCol) {

        @Override
        public DataCell getCell(final DataRow row) {
            RegressionTable t = regModel.getRegressionTable();
            int j = 0;
            double result = t.getIntercept();
            for (NumericPredictor p : t.getVariables()) {
                DataCell c = row.getCell(colIndices[j++]);
                if (c.isMissing()) {
                    return DataType.getMissingCell();
                }
                double v = ((DoubleValue) c).getDoubleValue();
                if (p.getExponent() != 1) {
                    v = Math.pow(v, p.getExponent());
                }
                result += p.getCoefficient() * v;
            }
            return new DoubleCell(result);
        }
    };
    ColumnRearranger c = new ColumnRearranger(inSpec);
    c.append(fac);
    return c;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) NumericPredictor(org.knime.base.node.mine.regression.PMMLRegressionTranslator.NumericPredictor) DataRow(org.knime.core.data.DataRow) RegressionTable(org.knime.base.node.mine.regression.PMMLRegressionTranslator.RegressionTable) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 43 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class MissingValueHandling3Table method createTableSpecPrivate.

/* private helper that assumes the ColSetting to have the right format. */
private static DataTableSpec createTableSpecPrivate(final DataTableSpec spec, final MissingValueHandling2ColSetting[] sets) {
    assert (spec.getNumColumns() == sets.length);
    DataColumnSpec[] newSpecs = new DataColumnSpec[sets.length];
    for (int i = 0; i < sets.length; i++) {
        DataColumnSpec colSpec = spec.getColumnSpec(i);
        DataColumnSpec newSpec = colSpec;
        if (sets[i].getMethod() == MissingValueHandling2ColSetting.METHOD_FIX_VAL) {
            DataColumnDomain dom = colSpec.getDomain();
            Comparator<DataCell> comp = colSpec.getType().getComparator();
            DataCell fixCell = sets[i].getFixCell();
            boolean changed = false;
            DataCell l = dom.getLowerBound();
            // (but rather be null). It may happen anyway, we catch it here
            if (l != null && !l.isMissing() && (comp.compare(fixCell, l) < 0)) {
                changed = true;
                l = fixCell;
            }
            DataCell u = dom.getUpperBound();
            if (u != null && !u.isMissing() && (comp.compare(fixCell, u) > 0)) {
                changed = true;
                u = fixCell;
            }
            Set<DataCell> vals = dom.getValues();
            if (vals != null && !vals.contains(fixCell)) {
                changed = true;
                vals = new LinkedHashSet<DataCell>(vals);
                vals.add(fixCell);
            }
            if (changed) {
                DataColumnDomain newDom = new DataColumnDomainCreator(vals, l, u).createDomain();
                DataColumnSpecCreator c = new DataColumnSpecCreator(colSpec);
                c.setDomain(newDom);
                newSpec = c.createSpec();
            }
        }
        newSpecs[i] = newSpec;
    }
    return new DataTableSpec(newSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator)

Example 44 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class Rule method main.

/**
 * Zum Testen...
 *
 * @param args Pieps
 * @throws Exception Tröt
 */
public static void main(final String[] args) throws Exception {
    DataColumnSpec[] colSpecs = { new DataColumnSpecCreator("A", IntCell.TYPE).createSpec(), new DataColumnSpecCreator("B", IntCell.TYPE).createSpec(), new DataColumnSpecCreator("C", IntCell.TYPE).createSpec(), new DataColumnSpecCreator("S", StringCell.TYPE).createSpec(), new DataColumnSpecCreator("X", DoubleCell.TYPE).createSpec(), new DataColumnSpecCreator("Y", DoubleCell.TYPE).createSpec(), new DataColumnSpecCreator("Z", DoubleCell.TYPE).createSpec() };
    DataTableSpec ts = new DataTableSpec(colSpecs);
    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
    String line;
    while ((line = in.readLine()) != null) {
        try {
            Rule r = new Rule(line, ts);
            System.out.println(r.toString());
        } catch (ParseException ex) {
            ex.printStackTrace();
        }
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) ParseException(java.text.ParseException)

Example 45 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.

/**
 * @param pmmlDoc the PMML document to analyze
 * @param colSpecs the list to add the data column specs to
 */
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
    DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
    for (DataField dataField : dict.getDataFieldArray()) {
        String name = dataField.getName();
        DataType dataType = getKNIMEDataType(dataField.getDataType());
        DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
        DataColumnDomain domain = null;
        if (dataType.isCompatible(NominalValue.class)) {
            Value[] valueArray = dataField.getValueArray();
            DataCell[] cells;
            if (DataType.getType(StringCell.class).equals(dataType)) {
                if (dataField.getIntervalArray().length > 0) {
                    throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
                }
                cells = new StringCell[valueArray.length];
                if (valueArray.length > 0) {
                    for (int j = 0; j < cells.length; j++) {
                        cells[j] = new StringCell(valueArray[j].getValue());
                    }
                }
                domain = new DataColumnDomainCreator(cells).createDomain();
            }
        } else if (dataType.isCompatible(DoubleValue.class)) {
            Double leftMargin = null;
            Double rightMargin = null;
            Interval[] intervalArray = dataField.getIntervalArray();
            if (intervalArray != null && intervalArray.length > 0) {
                Interval interval = dataField.getIntervalArray(0);
                leftMargin = interval.getLeftMargin();
                rightMargin = interval.getRightMargin();
            } else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
                // try to derive the bounds from the values
                Value[] valueArray = dataField.getValueArray();
                List<Double> values = new ArrayList<Double>();
                for (int j = 0; j < valueArray.length; j++) {
                    String value = "";
                    try {
                        value = valueArray[j].getValue();
                        values.add(Double.parseDouble(value));
                    } catch (Exception e) {
                        throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
                    }
                }
                leftMargin = Collections.min(values);
                rightMargin = Collections.max(values);
            }
            if (leftMargin != null && rightMargin != null) {
                // set the bounds of the domain if available
                DataCell lowerBound = null;
                DataCell upperBound = null;
                if (DataType.getType(IntCell.class).equals(dataType)) {
                    lowerBound = new IntCell(leftMargin.intValue());
                    upperBound = new IntCell(rightMargin.intValue());
                } else if (DataType.getType(DoubleCell.class).equals(dataType)) {
                    lowerBound = new DoubleCell(leftMargin);
                    upperBound = new DoubleCell(rightMargin);
                }
                domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
            } else {
                domain = new DataColumnDomainCreator().createDomain();
            }
        }
        specCreator.setDomain(domain);
        colSpecs.add(specCreator.createSpec());
        m_dictFields.add(name);
    }
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) IntCell(org.knime.core.data.def.IntCell) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataField(org.dmg.pmml.DataFieldDocument.DataField) StringCell(org.knime.core.data.def.StringCell) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BooleanValue(org.knime.core.data.BooleanValue) IntValue(org.knime.core.data.IntValue) Value(org.dmg.pmml.ValueDocument.Value) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) Interval(org.dmg.pmml.IntervalDocument.Interval)

Aggregations

DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)267 DataColumnSpec (org.knime.core.data.DataColumnSpec)210 DataTableSpec (org.knime.core.data.DataTableSpec)132 DataCell (org.knime.core.data.DataCell)92 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)77 DataType (org.knime.core.data.DataType)74 DataRow (org.knime.core.data.DataRow)73 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)57 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)51 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)48 ArrayList (java.util.ArrayList)46 DoubleCell (org.knime.core.data.def.DoubleCell)45 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)44 StringCell (org.knime.core.data.def.StringCell)29 BufferedDataTable (org.knime.core.node.BufferedDataTable)23 DoubleValue (org.knime.core.data.DoubleValue)22 HashSet (java.util.HashSet)19 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)17 DataColumnDomain (org.knime.core.data.DataColumnDomain)16 DefaultRow (org.knime.core.data.def.DefaultRow)16