use of org.knime.core.data.DataRow in project knime-core by knime.
the class AppendVariableToTableNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec spec) throws InvalidSettingsException {
ColumnRearranger arranger = new ColumnRearranger(spec);
Set<String> nameHash = new HashSet<String>();
for (DataColumnSpec c : spec) {
nameHash.add(c.getName());
}
List<Pair<String, FlowVariable.Type>> vars;
if (m_settings.getIncludeAll()) {
vars = getAllVariables();
} else {
vars = m_settings.getVariablesOfInterest();
}
if (vars.isEmpty()) {
throw new InvalidSettingsException("No variables selected");
}
DataColumnSpec[] specs = new DataColumnSpec[vars.size()];
final DataCell[] values = new DataCell[vars.size()];
for (int i = 0; i < vars.size(); i++) {
Pair<String, FlowVariable.Type> c = vars.get(i);
String name = c.getFirst();
DataType type;
switch(c.getSecond()) {
case DOUBLE:
type = DoubleCell.TYPE;
try {
double dValue = peekFlowVariableDouble(name);
values[i] = new DoubleCell(dValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type double): " + name);
}
break;
case INTEGER:
type = IntCell.TYPE;
try {
int iValue = peekFlowVariableInt(name);
values[i] = new IntCell(iValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type int): " + name);
}
break;
case STRING:
type = StringCell.TYPE;
try {
String sValue = peekFlowVariableString(name);
sValue = sValue == null ? "" : sValue;
values[i] = new StringCell(sValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type String): " + name);
}
break;
default:
throw new InvalidSettingsException("Unsupported variable type: " + c.getSecond());
}
if (nameHash.contains(name) && !name.toLowerCase().endsWith("(variable)")) {
name = name.concat(" (variable)");
}
String newName = name;
int uniquifier = 1;
while (!nameHash.add(newName)) {
newName = name + " (#" + (uniquifier++) + ")";
}
specs[i] = new DataColumnSpecCreator(newName, type).createSpec();
}
arranger.append(new AbstractCellFactory(specs) {
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
return values;
}
});
return arranger;
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class VariableFileReaderNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
Map<String, FlowVariable> stack = createStack(m_frSettings.getVariableName());
VariableFileReaderNodeSettings settings = m_frSettings.createSettingsFrom(stack);
LOGGER.info("Preparing to read from '" + m_frSettings.getDataFileLocation().toString() + "'.");
// check again the settings - especially file existence (under Linux
// files could be deleted/renamed since last config-call...
SettingsStatus status = settings.getStatusOfSettings(true, null);
if (status.getNumOfErrors() > 0) {
throw new InvalidSettingsException(status.getAllErrorMessages(10));
}
DataTableSpec tSpec = settings.createDataTableSpec();
FileTable fTable = new FileTable(tSpec, settings, settings.getSkippedColumns(), exec);
// create a DataContainer and fill it with the rows read. It is faster
// then reading the file every time (for each row iterator), and it
// collects the domain for each column for us. Also, if things fail,
// the error message is printed during file reader execution (were it
// belongs to) and not some time later when a node uses the row
// iterator from the file table.
BufferedDataContainer c = exec.createDataContainer(fTable.getDataTableSpec(), /* initDomain= */
true);
int row = 0;
FileRowIterator it = fTable.iterator();
try {
if (it.getZipEntryName() != null) {
// seems we are reading a ZIP archive.
LOGGER.info("Reading entry '" + it.getZipEntryName() + "' from the specified ZIP archive.");
}
while (it.hasNext()) {
row++;
DataRow next = it.next();
String message = "Caching row #" + row + " (\"" + next.getKey() + "\")";
exec.setMessage(message);
exec.checkCanceled();
c.addRowToTable(next);
}
if (it.zippedSourceHasMoreEntries()) {
// after reading til the end of the file this returns a valid
// result
setWarningMessage("Source is a ZIP archive with multiple " + "entries. Only reading first entry!");
}
} catch (DuplicateKeyException dke) {
String msg = dke.getMessage();
if (msg == null) {
msg = "Duplicate row IDs";
}
msg += ". Consider making IDs unique in the advanced settings.";
DuplicateKeyException newDKE = new DuplicateKeyException(msg);
newDKE.initCause(dke);
throw newDKE;
} finally {
c.close();
}
// user settings allow for truncating the table
if (it.iteratorEndedEarly()) {
setWarningMessage("Data was truncated due to user settings.");
}
BufferedDataTable out = c.getTable();
// closes all sources.
fTable.dispose();
return new BufferedDataTable[] { out };
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class DecTreePredictorNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
public PortObject[] execute(final PortObject[] inPorts, final ExecutionContext exec) throws CanceledExecutionException, Exception {
exec.setMessage("Decision Tree Predictor: Loading predictor...");
PMMLPortObject port = (PMMLPortObject) inPorts[INMODELPORT];
List<Node> models = port.getPMMLValue().getModels(PMMLModelType.TreeModel);
if (models.isEmpty()) {
String msg = "Decision Tree evaluation failed: " + "No tree model found.";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
PMMLDecisionTreeTranslator trans = new PMMLDecisionTreeTranslator();
port.initializeModelTranslator(trans);
DecisionTree decTree = trans.getDecisionTree();
decTree.resetColorInformation();
BufferedDataTable inData = (BufferedDataTable) inPorts[INDATAPORT];
// get column with color information
String colorColumn = null;
for (DataColumnSpec s : inData.getDataTableSpec()) {
if (s.getColorHandler() != null) {
colorColumn = s.getName();
break;
}
}
decTree.setColorColumn(colorColumn);
exec.setMessage("Decision Tree Predictor: start execution.");
PortObjectSpec[] inSpecs = new PortObjectSpec[] { inPorts[0].getSpec(), inPorts[1].getSpec() };
DataTableSpec outSpec = createOutTableSpec(inSpecs);
BufferedDataContainer outData = exec.createDataContainer(outSpec);
long coveredPattern = 0;
long nrPattern = 0;
long rowCount = 0;
long numberRows = inData.size();
exec.setMessage("Classifying...");
for (DataRow thisRow : inData) {
DataCell cl = null;
LinkedHashMap<String, Double> classDistrib = null;
try {
Pair<DataCell, LinkedHashMap<DataCell, Double>> pair = decTree.getWinnerAndClasscounts(thisRow, inData.getDataTableSpec());
cl = pair.getFirst();
LinkedHashMap<DataCell, Double> classCounts = pair.getSecond();
classDistrib = getDistribution(classCounts);
if (coveredPattern < m_maxNumCoveredPattern.getIntValue()) {
// remember this one for HiLite support
decTree.addCoveredPattern(thisRow, inData.getDataTableSpec());
coveredPattern++;
} else {
// too many patterns for HiLite - at least remember color
decTree.addCoveredColor(thisRow, inData.getDataTableSpec());
}
nrPattern++;
} catch (Exception e) {
LOGGER.error("Decision Tree evaluation failed: " + e.getMessage());
throw e;
}
if (cl == null) {
LOGGER.error("Decision Tree evaluation failed: result empty");
throw new Exception("Decision Tree evaluation failed.");
}
DataCell[] newCells = new DataCell[outSpec.getNumColumns()];
int numInCells = thisRow.getNumCells();
for (int i = 0; i < numInCells; i++) {
newCells[i] = thisRow.getCell(i);
}
if (m_showDistribution.getBooleanValue()) {
for (int i = numInCells; i < newCells.length - 1; i++) {
String predClass = outSpec.getColumnSpec(i).getName();
if (classDistrib != null && classDistrib.get(predClass) != null) {
newCells[i] = new DoubleCell(classDistrib.get(predClass));
} else {
newCells[i] = new DoubleCell(0.0);
}
}
}
newCells[newCells.length - 1] = cl;
outData.addRowToTable(new DefaultRow(thisRow.getKey(), newCells));
rowCount++;
if (rowCount % 100 == 0) {
exec.setProgress(rowCount / (double) numberRows, "Classifying... Row " + rowCount + " of " + numberRows);
}
exec.checkCanceled();
}
if (coveredPattern < nrPattern) {
// let the user know that we did not store all available pattern
// for HiLiting.
this.setWarningMessage("Tree only stored first " + m_maxNumCoveredPattern.getIntValue() + " (of " + nrPattern + ") rows for HiLiting!");
}
outData.close();
m_decTree = decTree;
exec.setMessage("Decision Tree Predictor: end execution.");
return new BufferedDataTable[] { outData.getTable() };
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class RegressionPredictorNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
PMMLPortObjectSpec regModelSpec = (PMMLPortObjectSpec) inSpecs[0];
DataTableSpec dataSpec = (DataTableSpec) inSpecs[1];
if (dataSpec == null || regModelSpec == null) {
throw new InvalidSettingsException("No input specification available");
}
if (regModelSpec.getTargetCols().get(0).getType().isCompatible(DoubleValue.class) && m_settings.getIncludeProbabilities()) {
setWarningMessage("The option \"Append columns with predicted probabilities\"" + " has only an effect for nominal targets");
}
if (null != RegressionPredictorCellFactory.createColumnSpec(regModelSpec, dataSpec, m_settings)) {
ColumnRearranger c = new ColumnRearranger(dataSpec);
c.append(new RegressionPredictorCellFactory(regModelSpec, dataSpec, m_settings) {
@Override
public DataCell[] getCells(final DataRow row) {
// not called during configure
return null;
}
});
DataTableSpec outSpec = c.createSpec();
return new DataTableSpec[] { outSpec };
} else {
return null;
}
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class BitVectorGeneratorNodeModel method calculateMeanValues.
private double[] calculateMeanValues(final DataTable input) {
double[] meanValues = new double[input.getDataTableSpec().getNumColumns()];
int nrOfRows = 0;
for (DataRow row : input) {
for (int i = 0; i < row.getNumCells(); i++) {
if (row.getCell(i).isMissing() || !row.getCell(i).getType().isCompatible(DoubleValue.class)) {
continue;
}
meanValues[i] += ((DoubleValue) row.getCell(i)).getDoubleValue();
}
nrOfRows++;
}
for (int i = 0; i < meanValues.length; i++) {
meanValues[i] = meanValues[i] / nrOfRows;
}
return meanValues;
}
Aggregations