Search in sources :

Example 36 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class RPropNodeModel method execute.

/**
 * The execution consists of three steps:
 * <ol>
 * <li>A neural network is build with the inputs and outputs according to
 * the input datatable, number of hidden layers as specified.</li>
 * <li>Input DataTables are converted into double-arrays so they can be
 * attached to the neural net.</li>
 * <li>The neural net is trained.</li>
 * </ol>
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    // If class column is not set, it is the last column.
    DataTableSpec posSpec = (DataTableSpec) inData[INDATA].getSpec();
    if (m_classcol.getStringValue() == null) {
        m_classcol.setStringValue(posSpec.getColumnSpec(posSpec.getNumColumns() - 1).getName());
    }
    List<String> learningCols = new LinkedList<String>();
    List<String> targetCols = new LinkedList<String>();
    // Determine the number of inputs and the number of outputs. Make also
    // sure that the inputs are double values.
    int nrInputs = 0;
    int nrOutputs = 0;
    HashMap<String, Integer> inputmap = new HashMap<String, Integer>();
    HashMap<DataCell, Integer> classMap = new HashMap<DataCell, Integer>();
    for (DataColumnSpec colspec : posSpec) {
        // check for class column
        if (colspec.getName().toString().compareTo(m_classcol.getStringValue()) == 0) {
            targetCols.add(colspec.getName());
            if (colspec.getType().isCompatible(DoubleValue.class)) {
                // check if the values are in range [0,1]
                DataColumnDomain domain = colspec.getDomain();
                if (domain.hasBounds()) {
                    double lower = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
                    double upper = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
                    if (lower < 0 || upper > 1) {
                        throw new InvalidSettingsException("Domain range for regression in column " + colspec.getName() + " not in range [0,1]");
                    }
                }
                nrOutputs = 1;
                classMap = new HashMap<DataCell, Integer>();
                classMap.put(new StringCell(colspec.getName()), 0);
                m_regression = true;
            } else {
                m_regression = false;
                DataColumnDomain domain = colspec.getDomain();
                if (domain.hasValues()) {
                    Set<DataCell> allvalues = domain.getValues();
                    int outputneuron = 0;
                    classMap = new HashMap<DataCell, Integer>();
                    for (DataCell value : allvalues) {
                        classMap.put(value, outputneuron);
                        outputneuron++;
                    }
                    nrOutputs = allvalues.size();
                } else {
                    throw new Exception("Could not find domain values in" + "nominal column " + colspec.getName().toString());
                }
            }
        } else {
            if (!colspec.getType().isCompatible(DoubleValue.class)) {
                throw new Exception("Only double columns for input");
            }
            inputmap.put(colspec.getName(), nrInputs);
            learningCols.add(colspec.getName());
            nrInputs++;
        }
    }
    assert targetCols.size() == 1 : "Only one class column allowed.";
    m_architecture.setNrInputNeurons(nrInputs);
    m_architecture.setNrHiddenLayers(m_nrHiddenLayers.getIntValue());
    m_architecture.setNrHiddenNeurons(m_nrHiddenNeuronsperLayer.getIntValue());
    m_architecture.setNrOutputNeurons(nrOutputs);
    Random random = new Random();
    if (m_useRandomSeed.getBooleanValue()) {
        random.setSeed(m_randomSeed.getIntValue());
    }
    m_mlp = new MultiLayerPerceptron(m_architecture, random);
    if (m_regression) {
        m_mlp.setMode(MultiLayerPerceptron.REGRESSION_MODE);
    } else {
        m_mlp.setMode(MultiLayerPerceptron.CLASSIFICATION_MODE);
    }
    // Convert inputs to double arrays. Values from the class column are
    // encoded as bitvectors.
    int classColNr = posSpec.findColumnIndex(m_classcol.getStringValue());
    List<Double[]> samples = new ArrayList<Double[]>();
    List<Double[]> outputs = new ArrayList<Double[]>();
    Double[] sample = new Double[nrInputs];
    Double[] output = new Double[nrOutputs];
    final RowIterator rowIt = ((BufferedDataTable) inData[INDATA]).iterator();
    int rowcounter = 0;
    while (rowIt.hasNext()) {
        boolean add = true;
        output = new Double[nrOutputs];
        sample = new Double[nrInputs];
        DataRow row = rowIt.next();
        int nrCells = row.getNumCells();
        int index = 0;
        for (int i = 0; i < nrCells; i++) {
            if (i != classColNr) {
                if (!row.getCell(i).isMissing()) {
                    DoubleValue dc = (DoubleValue) row.getCell(i);
                    sample[index] = dc.getDoubleValue();
                    index++;
                } else {
                    if (m_ignoreMV.getBooleanValue()) {
                        add = false;
                        break;
                    } else {
                        throw new Exception("Missing values in input" + " datatable");
                    }
                }
            } else {
                if (row.getCell(i).isMissing()) {
                    add = false;
                    if (!m_ignoreMV.getBooleanValue()) {
                        throw new Exception("Missing value in class" + " column");
                    }
                    break;
                }
                if (m_regression) {
                    DoubleValue dc = (DoubleValue) row.getCell(i);
                    output[0] = dc.getDoubleValue();
                } else {
                    for (int j = 0; j < nrOutputs; j++) {
                        if (classMap.get(row.getCell(i)) == j) {
                            output[j] = new Double(1.0);
                        } else {
                            output[j] = new Double(0.0);
                        }
                    }
                }
            }
        }
        if (add) {
            samples.add(sample);
            outputs.add(output);
            rowcounter++;
        }
    }
    Double[][] samplesarr = new Double[rowcounter][nrInputs];
    Double[][] outputsarr = new Double[rowcounter][nrInputs];
    for (int i = 0; i < samplesarr.length; i++) {
        samplesarr[i] = samples.get(i);
        outputsarr[i] = outputs.get(i);
    }
    // Now finally train the network.
    m_mlp.setClassMapping(classMap);
    m_mlp.setInputMapping(inputmap);
    RProp myrprop = new RProp();
    m_errors = new double[m_nrIterations.getIntValue()];
    for (int iteration = 0; iteration < m_nrIterations.getIntValue(); iteration++) {
        exec.setProgress((double) iteration / (double) m_nrIterations.getIntValue(), "Iteration " + iteration);
        myrprop.train(m_mlp, samplesarr, outputsarr);
        double error = 0;
        for (int j = 0; j < outputsarr.length; j++) {
            double[] myoutput = m_mlp.output(samplesarr[j]);
            for (int o = 0; o < outputsarr[0].length; o++) {
                error += (myoutput[o] - outputsarr[j][o]) * (myoutput[o] - outputsarr[j][o]);
            }
        }
        m_errors[iteration] = error;
        exec.checkCanceled();
    }
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[INMODEL] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec outPortSpec = createPMMLPortObjectSpec(inPMMLSpec, posSpec, learningCols, targetCols);
    PMMLPortObject outPMMLPort = new PMMLPortObject(outPortSpec, inPMMLPort, posSpec);
    outPMMLPort.addModelTranslater(new PMMLNeuralNetworkTranslator(m_mlp));
    return new PortObject[] { outPMMLPort };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) HashMap(java.util.HashMap) PMMLNeuralNetworkTranslator(org.knime.base.node.mine.neural.mlp2.PMMLNeuralNetworkTranslator) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) Random(java.util.Random) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject) LinkedList(java.util.LinkedList) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) IOException(java.io.IOException) MultiLayerPerceptron(org.knime.base.data.neural.MultiLayerPerceptron) SettingsModelInteger(org.knime.core.node.defaultnodesettings.SettingsModelInteger) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) RProp(org.knime.base.data.neural.methods.RProp)

Example 37 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class DecTreePredictorNodeModel method createOutTableSpec.

private DataTableSpec createOutTableSpec(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    List<DataCell> predValues = null;
    if (m_showDistribution.getBooleanValue()) {
        predValues = getPredictionValues((PMMLPortObjectSpec) inSpecs[INMODELPORT]);
        if (predValues == null) {
            // no out spec can be determined
            return null;
        }
    }
    int numCols = (predValues == null ? 0 : predValues.size()) + 1;
    DataTableSpec inSpec = (DataTableSpec) inSpecs[INDATAPORT];
    DataColumnSpec[] newCols = new DataColumnSpec[numCols];
    /* Set bar renderer and domain [0,1] as default for the double cells
         * containing the distribution */
    // DataColumnProperties propsRendering = new DataColumnProperties(
    // Collections.singletonMap(
    // DataValueRenderer.PROPERTY_PREFERRED_RENDERER,
    // DoubleBarRenderer.DESCRIPTION));
    DataColumnDomain domain = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain();
    PredictorHelper predictorHelper = PredictorHelper.getInstance();
    String trainingColumnName = ((PMMLPortObjectSpec) inSpecs[INMODELPORT]).getTargetFields().iterator().next();
    // add all distribution columns
    for (int i = 0; i < numCols - 1; i++) {
        assert predValues != null;
        DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(predictorHelper.probabilityColumnName(trainingColumnName, predValues.get(i).toString(), m_probabilitySuffix.getStringValue()), DoubleCell.TYPE);
        // colSpecCreator.setProperties(propsRendering);
        colSpecCreator.setDomain(domain);
        newCols[i] = colSpecCreator.createSpec();
    }
    // add the prediction column
    String predictionColumnName = predictorHelper.computePredictionColumnName(m_predictionColumn.getStringValue(), m_overridePrediction.getBooleanValue(), trainingColumnName);
    newCols[numCols - 1] = new DataColumnSpecCreator(predictionColumnName, StringCell.TYPE).createSpec();
    DataTableSpec newColSpec = new DataTableSpec(newCols);
    return new DataTableSpec(inSpec, newColSpec);
}
Also used : PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataTableSpec(org.knime.core.data.DataTableSpec) PredictorHelper(org.knime.base.node.mine.util.PredictorHelper) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataCell(org.knime.core.data.DataCell)

Example 38 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class Pivot2NodeModel method createCombinedPivots.

private Set<String>[] createCombinedPivots(final DataTableSpec groupSpec, final List<String> pivotCols) {
    final int[] pivotIdx = new int[pivotCols.size()];
    @SuppressWarnings("unchecked") final Set<String>[] combPivots = new Set[pivotIdx.length];
    for (int i = 0; i < pivotIdx.length; i++) {
        pivotIdx[i] = groupSpec.findColumnIndex(pivotCols.get(i));
    }
    for (int i = 0; i < pivotIdx.length; i++) {
        final DataColumnSpec cspec = groupSpec.getColumnSpec(pivotIdx[i]);
        final DataColumnDomain domain = cspec.getDomain();
        if (!m_ignoreDomain.getBooleanValue() && domain.hasValues()) {
            combPivots[i] = new LinkedHashSet<String>();
            final Set<DataCell> values = domain.getValues();
            for (final DataCell pivotValue : values) {
                combPivots[i].add(pivotValue.toString());
            }
            if (!m_ignoreMissValues.getBooleanValue()) {
                combPivots[i].add("?");
            }
        }
    }
    return combPivots;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataCell(org.knime.core.data.DataCell) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString)

Example 39 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class RenameColumnSetting method configure.

/**
 * Called by configure in NodeModel to compute the new column spec.
 *
 * @param inSpec the original input spec (names must match)
 * @return the new column spec
 * @throws InvalidSettingsException if that fails
 */
public DataColumnSpec configure(final DataColumnSpec inSpec) throws InvalidSettingsException {
    String name = inSpec.getName();
    DataType oldType = inSpec.getType();
    if (!name.equals(m_name)) {
        throw new InvalidSettingsException("Column names don't match: \"" + m_name + "\" vs. \"" + name + "\"");
    }
    Set<Class<? extends DataValue>> possibleTypeSet = constructPossibleTypes(inSpec.getType());
    // no generics in array definition
    @SuppressWarnings("unchecked") Class<? extends DataValue>[] possibleTypes = possibleTypeSet.toArray(new Class[possibleTypeSet.size()]);
    if (getNewValueClassIndex() >= possibleTypes.length) {
        throw new InvalidSettingsException("Invalid type index: " + getNewValueClassIndex());
    }
    String newName = m_newColumnName == null ? m_name : m_newColumnName;
    Class<? extends DataValue> newVal = possibleTypes[getNewValueClassIndex()];
    boolean useToString = newVal.equals(StringValue.class) && // need to handled separately, bug #1939
    (DataType.getMissingCell().getType().equals(oldType) || !oldType.isCompatible(StringValue.class));
    DataColumnDomain newDomain;
    DataType newType;
    if (useToString) {
        newDomain = null;
        newType = StringCell.TYPE;
    } else {
        newDomain = inSpec.getDomain();
        Class<? extends DataValue> oldP = oldType.getPreferredValueClass();
        if (oldP.equals(newVal)) {
            newType = oldType;
        } else {
            newType = DataType.cloneChangePreferredValue(oldType, newVal);
        }
    }
    DataColumnSpecCreator creator = new DataColumnSpecCreator(inSpec);
    creator.setName(newName);
    creator.setType(newType);
    creator.setDomain(newDomain);
    return creator.createSpec();
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataValue(org.knime.core.data.DataValue) DataColumnDomain(org.knime.core.data.DataColumnDomain) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) StringValue(org.knime.core.data.StringValue)

Example 40 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class NominalAttributeColumnHelper method createMetaData.

/**
 * {@inheritDoc}
 */
@Override
protected TreeNominalColumnMetaData createMetaData(final DataColumnSpec nominalColSpec) {
    DataColumnDomain domain = nominalColSpec.getDomain();
    CheckUtils.checkArgument(domain.hasValues(), "The data dictionary doesn't contain domain" + " information for column \"%s\".", nominalColSpec);
    NominalValueRepresentation[] nomVals = NominalColumnHelperUtil.extractNomValReps(domain.getValues());
    return new TreeNominalColumnMetaData(nominalColSpec.getName(), nomVals);
}
Also used : TreeNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeNominalColumnMetaData) DataColumnDomain(org.knime.core.data.DataColumnDomain) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation)

Aggregations

DataColumnDomain (org.knime.core.data.DataColumnDomain)46 DataColumnSpec (org.knime.core.data.DataColumnSpec)34 DataCell (org.knime.core.data.DataCell)32 DataTableSpec (org.knime.core.data.DataTableSpec)20 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 DoubleValue (org.knime.core.data.DoubleValue)13 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)12 DataType (org.knime.core.data.DataType)11 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)8 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)7 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 LinkedHashSet (java.util.LinkedHashSet)5 DoubleCell (org.knime.core.data.def.DoubleCell)5 HashMap (java.util.HashMap)3 LinkedHashMap (java.util.LinkedHashMap)3 LinkedList (java.util.LinkedList)3 Set (java.util.Set)3 DataRow (org.knime.core.data.DataRow)3