use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class RPropNodeModel method execute.
/**
* The execution consists of three steps:
* <ol>
* <li>A neural network is build with the inputs and outputs according to
* the input datatable, number of hidden layers as specified.</li>
* <li>Input DataTables are converted into double-arrays so they can be
* attached to the neural net.</li>
* <li>The neural net is trained.</li>
* </ol>
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
// If class column is not set, it is the last column.
DataTableSpec posSpec = (DataTableSpec) inData[INDATA].getSpec();
if (m_classcol.getStringValue() == null) {
m_classcol.setStringValue(posSpec.getColumnSpec(posSpec.getNumColumns() - 1).getName());
}
List<String> learningCols = new LinkedList<String>();
List<String> targetCols = new LinkedList<String>();
// Determine the number of inputs and the number of outputs. Make also
// sure that the inputs are double values.
int nrInputs = 0;
int nrOutputs = 0;
HashMap<String, Integer> inputmap = new HashMap<String, Integer>();
HashMap<DataCell, Integer> classMap = new HashMap<DataCell, Integer>();
for (DataColumnSpec colspec : posSpec) {
// check for class column
if (colspec.getName().toString().compareTo(m_classcol.getStringValue()) == 0) {
targetCols.add(colspec.getName());
if (colspec.getType().isCompatible(DoubleValue.class)) {
// check if the values are in range [0,1]
DataColumnDomain domain = colspec.getDomain();
if (domain.hasBounds()) {
double lower = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
double upper = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
if (lower < 0 || upper > 1) {
throw new InvalidSettingsException("Domain range for regression in column " + colspec.getName() + " not in range [0,1]");
}
}
nrOutputs = 1;
classMap = new HashMap<DataCell, Integer>();
classMap.put(new StringCell(colspec.getName()), 0);
m_regression = true;
} else {
m_regression = false;
DataColumnDomain domain = colspec.getDomain();
if (domain.hasValues()) {
Set<DataCell> allvalues = domain.getValues();
int outputneuron = 0;
classMap = new HashMap<DataCell, Integer>();
for (DataCell value : allvalues) {
classMap.put(value, outputneuron);
outputneuron++;
}
nrOutputs = allvalues.size();
} else {
throw new Exception("Could not find domain values in" + "nominal column " + colspec.getName().toString());
}
}
} else {
if (!colspec.getType().isCompatible(DoubleValue.class)) {
throw new Exception("Only double columns for input");
}
inputmap.put(colspec.getName(), nrInputs);
learningCols.add(colspec.getName());
nrInputs++;
}
}
assert targetCols.size() == 1 : "Only one class column allowed.";
m_architecture.setNrInputNeurons(nrInputs);
m_architecture.setNrHiddenLayers(m_nrHiddenLayers.getIntValue());
m_architecture.setNrHiddenNeurons(m_nrHiddenNeuronsperLayer.getIntValue());
m_architecture.setNrOutputNeurons(nrOutputs);
Random random = new Random();
if (m_useRandomSeed.getBooleanValue()) {
random.setSeed(m_randomSeed.getIntValue());
}
m_mlp = new MultiLayerPerceptron(m_architecture, random);
if (m_regression) {
m_mlp.setMode(MultiLayerPerceptron.REGRESSION_MODE);
} else {
m_mlp.setMode(MultiLayerPerceptron.CLASSIFICATION_MODE);
}
// Convert inputs to double arrays. Values from the class column are
// encoded as bitvectors.
int classColNr = posSpec.findColumnIndex(m_classcol.getStringValue());
List<Double[]> samples = new ArrayList<Double[]>();
List<Double[]> outputs = new ArrayList<Double[]>();
Double[] sample = new Double[nrInputs];
Double[] output = new Double[nrOutputs];
final RowIterator rowIt = ((BufferedDataTable) inData[INDATA]).iterator();
int rowcounter = 0;
while (rowIt.hasNext()) {
boolean add = true;
output = new Double[nrOutputs];
sample = new Double[nrInputs];
DataRow row = rowIt.next();
int nrCells = row.getNumCells();
int index = 0;
for (int i = 0; i < nrCells; i++) {
if (i != classColNr) {
if (!row.getCell(i).isMissing()) {
DoubleValue dc = (DoubleValue) row.getCell(i);
sample[index] = dc.getDoubleValue();
index++;
} else {
if (m_ignoreMV.getBooleanValue()) {
add = false;
break;
} else {
throw new Exception("Missing values in input" + " datatable");
}
}
} else {
if (row.getCell(i).isMissing()) {
add = false;
if (!m_ignoreMV.getBooleanValue()) {
throw new Exception("Missing value in class" + " column");
}
break;
}
if (m_regression) {
DoubleValue dc = (DoubleValue) row.getCell(i);
output[0] = dc.getDoubleValue();
} else {
for (int j = 0; j < nrOutputs; j++) {
if (classMap.get(row.getCell(i)) == j) {
output[j] = new Double(1.0);
} else {
output[j] = new Double(0.0);
}
}
}
}
}
if (add) {
samples.add(sample);
outputs.add(output);
rowcounter++;
}
}
Double[][] samplesarr = new Double[rowcounter][nrInputs];
Double[][] outputsarr = new Double[rowcounter][nrInputs];
for (int i = 0; i < samplesarr.length; i++) {
samplesarr[i] = samples.get(i);
outputsarr[i] = outputs.get(i);
}
// Now finally train the network.
m_mlp.setClassMapping(classMap);
m_mlp.setInputMapping(inputmap);
RProp myrprop = new RProp();
m_errors = new double[m_nrIterations.getIntValue()];
for (int iteration = 0; iteration < m_nrIterations.getIntValue(); iteration++) {
exec.setProgress((double) iteration / (double) m_nrIterations.getIntValue(), "Iteration " + iteration);
myrprop.train(m_mlp, samplesarr, outputsarr);
double error = 0;
for (int j = 0; j < outputsarr.length; j++) {
double[] myoutput = m_mlp.output(samplesarr[j]);
for (int o = 0; o < outputsarr[0].length; o++) {
error += (myoutput[o] - outputsarr[j][o]) * (myoutput[o] - outputsarr[j][o]);
}
}
m_errors[iteration] = error;
exec.checkCanceled();
}
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[INMODEL] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec outPortSpec = createPMMLPortObjectSpec(inPMMLSpec, posSpec, learningCols, targetCols);
PMMLPortObject outPMMLPort = new PMMLPortObject(outPortSpec, inPMMLPort, posSpec);
outPMMLPort.addModelTranslater(new PMMLNeuralNetworkTranslator(m_mlp));
return new PortObject[] { outPMMLPort };
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class DecTreePredictorNodeModel method createOutTableSpec.
private DataTableSpec createOutTableSpec(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
List<DataCell> predValues = null;
if (m_showDistribution.getBooleanValue()) {
predValues = getPredictionValues((PMMLPortObjectSpec) inSpecs[INMODELPORT]);
if (predValues == null) {
// no out spec can be determined
return null;
}
}
int numCols = (predValues == null ? 0 : predValues.size()) + 1;
DataTableSpec inSpec = (DataTableSpec) inSpecs[INDATAPORT];
DataColumnSpec[] newCols = new DataColumnSpec[numCols];
/* Set bar renderer and domain [0,1] as default for the double cells
* containing the distribution */
// DataColumnProperties propsRendering = new DataColumnProperties(
// Collections.singletonMap(
// DataValueRenderer.PROPERTY_PREFERRED_RENDERER,
// DoubleBarRenderer.DESCRIPTION));
DataColumnDomain domain = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain();
PredictorHelper predictorHelper = PredictorHelper.getInstance();
String trainingColumnName = ((PMMLPortObjectSpec) inSpecs[INMODELPORT]).getTargetFields().iterator().next();
// add all distribution columns
for (int i = 0; i < numCols - 1; i++) {
assert predValues != null;
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(predictorHelper.probabilityColumnName(trainingColumnName, predValues.get(i).toString(), m_probabilitySuffix.getStringValue()), DoubleCell.TYPE);
// colSpecCreator.setProperties(propsRendering);
colSpecCreator.setDomain(domain);
newCols[i] = colSpecCreator.createSpec();
}
// add the prediction column
String predictionColumnName = predictorHelper.computePredictionColumnName(m_predictionColumn.getStringValue(), m_overridePrediction.getBooleanValue(), trainingColumnName);
newCols[numCols - 1] = new DataColumnSpecCreator(predictionColumnName, StringCell.TYPE).createSpec();
DataTableSpec newColSpec = new DataTableSpec(newCols);
return new DataTableSpec(inSpec, newColSpec);
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class Pivot2NodeModel method createCombinedPivots.
private Set<String>[] createCombinedPivots(final DataTableSpec groupSpec, final List<String> pivotCols) {
final int[] pivotIdx = new int[pivotCols.size()];
@SuppressWarnings("unchecked") final Set<String>[] combPivots = new Set[pivotIdx.length];
for (int i = 0; i < pivotIdx.length; i++) {
pivotIdx[i] = groupSpec.findColumnIndex(pivotCols.get(i));
}
for (int i = 0; i < pivotIdx.length; i++) {
final DataColumnSpec cspec = groupSpec.getColumnSpec(pivotIdx[i]);
final DataColumnDomain domain = cspec.getDomain();
if (!m_ignoreDomain.getBooleanValue() && domain.hasValues()) {
combPivots[i] = new LinkedHashSet<String>();
final Set<DataCell> values = domain.getValues();
for (final DataCell pivotValue : values) {
combPivots[i].add(pivotValue.toString());
}
if (!m_ignoreMissValues.getBooleanValue()) {
combPivots[i].add("?");
}
}
}
return combPivots;
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class RenameColumnSetting method configure.
/**
* Called by configure in NodeModel to compute the new column spec.
*
* @param inSpec the original input spec (names must match)
* @return the new column spec
* @throws InvalidSettingsException if that fails
*/
public DataColumnSpec configure(final DataColumnSpec inSpec) throws InvalidSettingsException {
String name = inSpec.getName();
DataType oldType = inSpec.getType();
if (!name.equals(m_name)) {
throw new InvalidSettingsException("Column names don't match: \"" + m_name + "\" vs. \"" + name + "\"");
}
Set<Class<? extends DataValue>> possibleTypeSet = constructPossibleTypes(inSpec.getType());
// no generics in array definition
@SuppressWarnings("unchecked") Class<? extends DataValue>[] possibleTypes = possibleTypeSet.toArray(new Class[possibleTypeSet.size()]);
if (getNewValueClassIndex() >= possibleTypes.length) {
throw new InvalidSettingsException("Invalid type index: " + getNewValueClassIndex());
}
String newName = m_newColumnName == null ? m_name : m_newColumnName;
Class<? extends DataValue> newVal = possibleTypes[getNewValueClassIndex()];
boolean useToString = newVal.equals(StringValue.class) && // need to handled separately, bug #1939
(DataType.getMissingCell().getType().equals(oldType) || !oldType.isCompatible(StringValue.class));
DataColumnDomain newDomain;
DataType newType;
if (useToString) {
newDomain = null;
newType = StringCell.TYPE;
} else {
newDomain = inSpec.getDomain();
Class<? extends DataValue> oldP = oldType.getPreferredValueClass();
if (oldP.equals(newVal)) {
newType = oldType;
} else {
newType = DataType.cloneChangePreferredValue(oldType, newVal);
}
}
DataColumnSpecCreator creator = new DataColumnSpecCreator(inSpec);
creator.setName(newName);
creator.setType(newType);
creator.setDomain(newDomain);
return creator.createSpec();
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class NominalAttributeColumnHelper method createMetaData.
/**
* {@inheritDoc}
*/
@Override
protected TreeNominalColumnMetaData createMetaData(final DataColumnSpec nominalColSpec) {
DataColumnDomain domain = nominalColSpec.getDomain();
CheckUtils.checkArgument(domain.hasValues(), "The data dictionary doesn't contain domain" + " information for column \"%s\".", nominalColSpec);
NominalValueRepresentation[] nomVals = NominalColumnHelperUtil.extractNomValReps(domain.getValues());
return new TreeNominalColumnMetaData(nominalColSpec.getName(), nomVals);
}
Aggregations