use of org.knime.core.data.DataType in project knime-core by knime.
the class CollectionSplitNodeModel method refineTypes.
/**
* Retype the argument table to use the types as determined by the
* cell factory.
*/
private BufferedDataTable refineTypes(final BufferedDataTable table, final SplitCellFactory fac, final ExecutionContext exec) {
HashMap<String, Integer> colMap = new HashMap<String, Integer>();
DataTableSpec spec = table.getDataTableSpec();
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < spec.getNumColumns(); i++) {
colMap.put(spec.getColumnSpec(i).getName(), i);
newColSpecs[i] = spec.getColumnSpec(i);
}
DataColumnSpec[] oldReplacedSpecs = fac.getColumnSpecs();
DataType[] mostSpecificTypes = fac.getCommonTypes();
DataColumnDomain[] domains = fac.getDomains();
for (int i = 0; i < oldReplacedSpecs.length; i++) {
DataColumnSpec s = oldReplacedSpecs[i];
Integer index = colMap.get(s.getName());
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
creator.setType(mostSpecificTypes[i]);
creator.setDomain(domains[i]);
newColSpecs[index] = creator.createSpec();
}
DataTableSpec newSpec = new DataTableSpec(spec.getName(), newColSpecs);
return exec.createSpecReplacerTable(table, newSpec);
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class CollectionSplitNodeModel method countNewColumns.
/**
* Iterate the argument table, determine maximum element count,
* return freshly created column specs.
*/
private DataColumnSpec[] countNewColumns(final BufferedDataTable table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
DataTableSpec spec = table.getDataTableSpec();
long i = 0;
long rowCount = table.size();
int maxColumns = 0;
int targetColIndex = getTargetColIndex(spec);
for (DataRow row : table) {
DataCell c = row.getCell(targetColIndex);
if (!c.isMissing()) {
maxColumns = Math.max(((CollectionDataValue) c).size(), maxColumns);
}
exec.setProgress((i++) / (double) rowCount, "Determining maximum element count, row \"" + row.getKey() + "\" (" + i + "/" + rowCount + ")");
exec.checkCanceled();
}
HashSet<String> hashNames = new HashSet<String>();
for (DataColumnSpec s : spec) {
hashNames.add(s.getName());
}
if (m_settings.isReplaceInputColumn()) {
hashNames.remove(spec.getColumnSpec(targetColIndex).getName());
}
DataType elementType = spec.getColumnSpec(targetColIndex).getType().getCollectionElementType();
DataColumnSpec[] newColSpec = new DataColumnSpec[maxColumns];
for (int j = 0; j < newColSpec.length; j++) {
String baseName = "Split Value " + (j + 1);
String newName = baseName;
int uniquifier = 1;
while (!hashNames.add(newName)) {
newName = baseName + "(#" + (uniquifier++) + ")";
}
newColSpec[j] = new DataColumnSpecCreator(newName, elementType).createSpec();
}
return newColSpec;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class RuleEngineNodeModel method createRearranger.
private ColumnRearranger createRearranger(final DataTableSpec inSpec, final List<Rule> rules) throws InvalidSettingsException {
ColumnRearranger crea = new ColumnRearranger(inSpec);
String newColName = DataTableSpec.getUniqueColumnName(inSpec, m_settings.getNewColName());
final int defaultLabelColumnIndex;
if (m_settings.getDefaultLabelIsColumn()) {
if (m_settings.getDefaultLabel().length() < 3) {
throw new InvalidSettingsException("Default label is not a column reference");
}
if (!m_settings.getDefaultLabel().startsWith("$") || !m_settings.getDefaultLabel().endsWith("$")) {
throw new InvalidSettingsException("Column references in default label must be enclosed in $");
}
String colRef = m_settings.getDefaultLabel().substring(1, m_settings.getDefaultLabel().length() - 1);
defaultLabelColumnIndex = inSpec.findColumnIndex(colRef);
if (defaultLabelColumnIndex == -1) {
throw new InvalidSettingsException("Column '" + m_settings.getDefaultLabel() + "' for default label does not exist in input table");
}
} else {
defaultLabelColumnIndex = -1;
}
// determine output type
List<DataType> types = new ArrayList<DataType>();
// add outcome column types
for (Rule r : rules) {
if (r.getOutcome() instanceof ColumnReference) {
types.add(((ColumnReference) r.getOutcome()).spec.getType());
} else if (r.getOutcome() instanceof Double) {
types.add(DoubleCell.TYPE);
} else if (r.getOutcome() instanceof Integer) {
types.add(IntCell.TYPE);
} else if (r.getOutcome().toString().length() > 0) {
types.add(StringCell.TYPE);
}
}
if (defaultLabelColumnIndex >= 0) {
types.add(inSpec.getColumnSpec(defaultLabelColumnIndex).getType());
} else if (m_settings.getDefaultLabel().length() > 0) {
try {
Integer.parseInt(m_settings.getDefaultLabel());
types.add(IntCell.TYPE);
} catch (NumberFormatException ex) {
try {
Double.parseDouble(m_settings.getDefaultLabel());
types.add(DoubleCell.TYPE);
} catch (NumberFormatException ex1) {
types.add(StringCell.TYPE);
}
}
}
final DataType outType;
if (types.size() > 0) {
DataType temp = types.get(0);
for (int i = 1; i < types.size(); i++) {
temp = DataType.getCommonSuperType(temp, types.get(i));
}
if ((temp.getValueClasses().size() == 1) && temp.getValueClasses().contains(DataValue.class)) {
// a non-native type, we replace it with string
temp = StringCell.TYPE;
}
outType = temp;
} else {
outType = StringCell.TYPE;
}
DataColumnSpec cs = new DataColumnSpecCreator(newColName, outType).createSpec();
crea.append(new SingleCellFactory(cs) {
@Override
public DataCell getCell(final DataRow row) {
for (Rule r : rules) {
if (r.matches(row)) {
Object outcome = r.getOutcome();
if (outcome instanceof ColumnReference) {
DataCell cell = row.getCell(((ColumnReference) outcome).index);
if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
return new StringCell(cell.toString());
} else {
return cell;
}
} else if (outType.equals(IntCell.TYPE)) {
return new IntCell((Integer) outcome);
} else if (outType.equals(DoubleCell.TYPE)) {
return new DoubleCell((Double) outcome);
} else {
return new StringCell(outcome.toString());
}
}
}
if (defaultLabelColumnIndex >= 0) {
DataCell cell = row.getCell(defaultLabelColumnIndex);
if (outType.equals(StringCell.TYPE) && !cell.getType().equals(StringCell.TYPE)) {
return new StringCell(cell.toString());
} else {
return cell;
}
} else if (m_settings.getDefaultLabel().length() > 0) {
String l = m_settings.getDefaultLabel();
if (outType.equals(StringCell.TYPE)) {
return new StringCell(l);
}
try {
int i = Integer.parseInt(l);
return new IntCell(i);
} catch (NumberFormatException ex) {
try {
double d = Double.parseDouble(l);
return new DoubleCell(d);
} catch (NumberFormatException ex1) {
return new StringCell(l);
}
}
} else {
return DataType.getMissingCell();
}
}
});
return crea;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class AbstractTreeEnsembleModel method createNominalNumericPredictorRecord.
private PredictorRecord createNominalNumericPredictorRecord(final DataRow filterRow, final DataTableSpec trainSpec) {
final int nrCols = trainSpec.getNumColumns();
Map<String, Object> valueMap = new LinkedHashMap<String, Object>((int) (nrCols / 0.75 + 1.0));
for (int i = 0; i < nrCols; i++) {
DataColumnSpec col = trainSpec.getColumnSpec(i);
String colName = col.getName();
DataType colType = col.getType();
DataCell cell = filterRow.getCell(i);
if (cell.isMissing()) {
valueMap.put(colName, PredictorRecord.NULL);
} else if (colType.isCompatible(NominalValue.class)) {
TreeNominalColumnMetaData nomColMeta = (TreeNominalColumnMetaData) m_metaData.getAttributeMetaData(i);
NominalValueRepresentation[] nomVals = nomColMeta.getValues();
int assignedInteger = -1;
String val = cell.toString();
// find assignedInteger of value
for (NominalValueRepresentation nomVal : nomVals) {
if (nomVal.getNominalValue().equals(val)) {
assignedInteger = nomVal.getAssignedInteger();
break;
}
}
// the value is not known to the model
if (assignedInteger == -1) {
// treat as missing value
valueMap.put(colName, PredictorRecord.NULL);
} else {
valueMap.put(colName, Integer.valueOf(assignedInteger));
}
} else if (colType.isCompatible(DoubleValue.class)) {
double val = ((DoubleValue) cell).getDoubleValue();
if (Double.isNaN(val)) {
// make sure that NaNs are treated as missing values
// bug AP-7169
valueMap.put(colName, PredictorRecord.NULL);
} else {
valueMap.put(colName, val);
}
} else {
throw new IllegalStateException("Expected nominal or numeric column type for column \"" + colName + "\" but got \"" + colType + "\"");
}
}
return new PredictorRecord(valueMap);
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class RegressionTreeModelPortObjectSpec method calculateFilterIndices.
public int[] calculateFilterIndices(final DataTableSpec testTableInput) throws InvalidSettingsException {
DataTableSpec learnSpec = getLearnTableSpec();
// check existence and types of columns, create reordering
int[] result = new int[learnSpec.getNumColumns()];
for (int i = 0; i < learnSpec.getNumColumns(); i++) {
DataColumnSpec learnCol = learnSpec.getColumnSpec(i);
final String colName = learnCol.getName();
int dataColIndex = testTableInput.findColumnIndex(colName);
if (dataColIndex < 0) {
throw new InvalidSettingsException("Required data column \"" + colName + "\" does not exist in table");
}
DataColumnSpec dataCol = testTableInput.getColumnSpec(dataColIndex);
// expected type
DataType eType = learnCol.getType();
// actual type
DataType aType = dataCol.getType();
String errorType = null;
if (eType.isCompatible(NominalValue.class) && !aType.isCompatible(NominalValue.class)) {
errorType = "nominal";
}
if (eType.isCompatible(DoubleValue.class) && !aType.isCompatible(DoubleValue.class)) {
errorType = "numeric";
}
if (eType.isCompatible(BitVectorValue.class) && !aType.isCompatible(BitVectorValue.class)) {
errorType = "fingerprint/bitvector";
}
if (eType.isCompatible(ByteVectorValue.class) && !aType.isCompatible(ByteVectorValue.class)) {
errorType = "fingerprint/bytevector";
}
if (errorType != null) {
throw new InvalidSettingsException("Column \"" + colName + "\" does exist in the data but" + "is not of the expected " + errorType + " type");
}
result[i] = dataColIndex;
}
return result;
}
Aggregations