use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class CollectionSplitNodeModel method getColSpecsByElementNames.
/**
* Get new column specs as inferred from the element names in the
* collection column.
*/
private DataColumnSpec[] getColSpecsByElementNames(final DataTableSpec spec) throws InvalidSettingsException {
int colIndex = getTargetColIndex(spec);
DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
List<String> elementNames = colSpec.getElementNames();
if (elementNames.isEmpty()) {
throw new InvalidSettingsException("Input column \"" + colSpec.getName() + "\" does not provide element names; " + "consider to change option in dialog or make sure that" + "the input table contains the necessary information.");
}
DataType type = colSpec.getType().getCollectionElementType();
HashSet<String> hashNames = new HashSet<String>();
for (DataColumnSpec s : spec) {
hashNames.add(s.getName());
}
if (m_settings.isReplaceInputColumn()) {
hashNames.remove(colSpec.getName());
}
DataColumnSpec[] newColSpec = new DataColumnSpec[elementNames.size()];
for (int i = 0; i < newColSpec.length; i++) {
String baseName = elementNames.get(i);
int uniquifier = 1;
while (!hashNames.add(baseName)) {
baseName = elementNames.get(i) + "(#" + (uniquifier++) + ")";
}
newColSpec[i] = new DataColumnSpecCreator(baseName, type).createSpec();
}
return newColSpec;
}
use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class CollectionSplitNodeModel method refineTypes.
/**
* Retype the argument table to use the types as determined by the
* cell factory.
*/
private BufferedDataTable refineTypes(final BufferedDataTable table, final SplitCellFactory fac, final ExecutionContext exec) {
HashMap<String, Integer> colMap = new HashMap<String, Integer>();
DataTableSpec spec = table.getDataTableSpec();
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < spec.getNumColumns(); i++) {
colMap.put(spec.getColumnSpec(i).getName(), i);
newColSpecs[i] = spec.getColumnSpec(i);
}
DataColumnSpec[] oldReplacedSpecs = fac.getColumnSpecs();
DataType[] mostSpecificTypes = fac.getCommonTypes();
DataColumnDomain[] domains = fac.getDomains();
for (int i = 0; i < oldReplacedSpecs.length; i++) {
DataColumnSpec s = oldReplacedSpecs[i];
Integer index = colMap.get(s.getName());
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
creator.setType(mostSpecificTypes[i]);
creator.setDomain(domains[i]);
newColSpecs[index] = creator.createSpec();
}
DataTableSpec newSpec = new DataTableSpec(spec.getName(), newColSpecs);
return exec.createSpecReplacerTable(table, newSpec);
}
use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class CollectionSplitNodeModel method countNewColumns.
/**
* Iterate the argument table, determine maximum element count,
* return freshly created column specs.
*/
private DataColumnSpec[] countNewColumns(final BufferedDataTable table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
DataTableSpec spec = table.getDataTableSpec();
long i = 0;
long rowCount = table.size();
int maxColumns = 0;
int targetColIndex = getTargetColIndex(spec);
for (DataRow row : table) {
DataCell c = row.getCell(targetColIndex);
if (!c.isMissing()) {
maxColumns = Math.max(((CollectionDataValue) c).size(), maxColumns);
}
exec.setProgress((i++) / (double) rowCount, "Determining maximum element count, row \"" + row.getKey() + "\" (" + i + "/" + rowCount + ")");
exec.checkCanceled();
}
HashSet<String> hashNames = new HashSet<String>();
for (DataColumnSpec s : spec) {
hashNames.add(s.getName());
}
if (m_settings.isReplaceInputColumn()) {
hashNames.remove(spec.getColumnSpec(targetColIndex).getName());
}
DataType elementType = spec.getColumnSpec(targetColIndex).getType().getCollectionElementType();
DataColumnSpec[] newColSpec = new DataColumnSpec[maxColumns];
for (int j = 0; j < newColSpec.length; j++) {
String baseName = "Split Value " + (j + 1);
String newName = baseName;
int uniquifier = 1;
while (!hashNames.add(newName)) {
newName = baseName + "(#" + (uniquifier++) + ")";
}
newColSpec[j] = new DataColumnSpecCreator(newName, elementType).createSpec();
}
return newColSpec;
}
use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class RuleEngineNodeModel method createRearranger.
private ColumnRearranger createRearranger(final DataTableSpec inSpec, final List<Rule> rules) throws InvalidSettingsException {
ColumnRearranger crea = new ColumnRearranger(inSpec);
String newColName = DataTableSpec.getUniqueColumnName(inSpec, m_settings.getNewColName());
final int defaultLabelColumnIndex;
if (m_settings.getDefaultLabelIsColumn()) {
if (m_settings.getDefaultLabel().length() < 3) {
throw new InvalidSettingsException("Default label is not a column reference");
}
if (!m_settings.getDefaultLabel().startsWith("$") || !m_settings.getDefaultLabel().endsWith("$")) {
throw new InvalidSettingsException("Column references in default label must be enclosed in $");
}
String colRef = m_settings.getDefaultLabel().substring(1, m_settings.getDefaultLabel().length() - 1);
defaultLabelColumnIndex = inSpec.findColumnIndex(colRef);
if (defaultLabelColumnIndex == -1) {
throw new InvalidSettingsException("Column '" + m_settings.getDefaultLabel() + "' for default label does not exist in input table");
}
} else {
defaultLabelColumnIndex = -1;
}
// determine output type
List<DataType> types = new ArrayList<DataType>();
// add outcome column types
for (Rule r : rules) {
if (r.getOutcome() instanceof ColumnReference) {
types.add(((ColumnReference) r.getOutcome()).spec.getType());
} else if (r.getOutcome() instanceof Double) {
types.add(DoubleCell.TYPE);
} else if (r.getOutcome() instanceof Integer) {
types.add(IntCell.TYPE);
} else if (r.getOutcome().toString().length() > 0) {
types.add(StringCell.TYPE);
}
}
if (defaultLabelColumnIndex >= 0) {
types.add(inSpec.getColumnSpec(defaultLabelColumnIndex).getType());
} else if (m_settings.getDefaultLabel().length() > 0) {
try {
Integer.parseInt(m_settings.getDefaultLabel());
types.add(IntCell.TYPE);
} catch (NumberFormatException ex) {
try {
Double.parseDouble(m_settings.getDefaultLabel());
types.add(DoubleCell.TYPE);
} catch (NumberFormatException ex1) {
types.add(StringCell.TYPE);
}
}
}
final DataType outType;
if (types.size() > 0) {
DataType temp = types.get(0);
for (int i = 1; i < types.size(); i++) {
temp = DataType.getCommonSuperType(temp, types.get(i));
}
if ((temp.getValueClasses().size() == 1) && temp.getValueClasses().contains(DataValue.class)) {
// a non-native type, we replace it with string
temp = StringCell.TYPE;
}
outType = temp;
} else {
outType = StringCell.TYPE;
}
DataColumnSpec cs = new DataColumnSpecCreator(newColName, outType).createSpec();
crea.append(new SingleCellFactory(cs) {
@Override
public DataCell getCell(final DataRow row) {
for (Rule r : rules) {
if (r.matches(row)) {
Object outcome = r.getOutcome();
if (outcome instanceof ColumnReference) {
DataCell cell = row.getCell(((ColumnReference) outcome).index);
if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
return new StringCell(cell.toString());
} else {
return cell;
}
} else if (outType.equals(IntCell.TYPE)) {
return new IntCell((Integer) outcome);
} else if (outType.equals(DoubleCell.TYPE)) {
return new DoubleCell((Double) outcome);
} else {
return new StringCell(outcome.toString());
}
}
}
if (defaultLabelColumnIndex >= 0) {
DataCell cell = row.getCell(defaultLabelColumnIndex);
if (outType.equals(StringCell.TYPE) && !cell.getType().equals(StringCell.TYPE)) {
return new StringCell(cell.toString());
} else {
return cell;
}
} else if (m_settings.getDefaultLabel().length() > 0) {
String l = m_settings.getDefaultLabel();
if (outType.equals(StringCell.TYPE)) {
return new StringCell(l);
}
try {
int i = Integer.parseInt(l);
return new IntCell(i);
} catch (NumberFormatException ex) {
try {
double d = Double.parseDouble(l);
return new DoubleCell(d);
} catch (NumberFormatException ex1) {
return new StringCell(l);
}
}
} else {
return DataType.getMissingCell();
}
}
});
return crea;
}
use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class PolyRegLearnerNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec tableSpec = (DataTableSpec) inSpecs[0];
PMMLPortObjectSpec pmmlSpec = (PMMLPortObjectSpec) inSpecs[1];
String[] selectedCols = computeSelectedColumns(tableSpec);
for (String colName : selectedCols) {
DataColumnSpec dcs = tableSpec.getColumnSpec(colName);
if (dcs == null) {
throw new InvalidSettingsException("Selected column '" + colName + "' does not exist in input table");
}
if (!dcs.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Selected column '" + dcs.getName() + "' from the input table is not a numeric column.");
}
}
if (m_settings.getTargetColumn() == null) {
throw new InvalidSettingsException("No target column selected");
}
if (tableSpec.findColumnIndex(m_settings.getTargetColumn()) == -1) {
throw new InvalidSettingsException("Target column '" + m_settings.getTargetColumn() + "' does not exist.");
}
DataColumnSpecCreator crea = new DataColumnSpecCreator("PolyReg prediction", DoubleCell.TYPE);
DataColumnSpec col1 = crea.createSpec();
crea = new DataColumnSpecCreator("Prediction Error", DoubleCell.TYPE);
DataColumnSpec col2 = crea.createSpec();
return new PortObjectSpec[] { AppendedColumnTable.getTableSpec(tableSpec, col1, col2), createModelSpec(pmmlSpec, tableSpec) };
}
Aggregations