use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class DomainDialog method takeOverSettings.
/**
* @return an object with domain values set by the user. Or <code>null</code> if settings are invalid. Then, a error
* message box is displayed.
*/
private ColProperty takeOverSettings() {
ColProperty result = new ColProperty();
if (m_colProp.getColumnSpec().getType().isCompatible(StringValue.class)) {
DataColumnSpecCreator dcsc = new DataColumnSpecCreator(m_colProp.getColumnSpec().getName(), m_colProp.getColumnSpec().getType());
if (m_containsVals != null) {
result.setReadPossibleValuesFromFile(m_containsVals.isSelected());
}
if ((m_containsVals == null) || m_containsVals.isSelected()) {
// if it's null we have a string column
Set<DataCell> pVals = null;
// tranfser possible values
int valCount = m_valueList.getModel().getSize();
pVals = new LinkedHashSet<DataCell>();
for (int i = 0; i < valCount; i++) {
DataCell val = (DataCell) m_valueList.getModel().getElementAt(i);
pVals.add(val);
}
if (pVals.size() > 0) {
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(pVals);
dcsc.setDomain(domainCreator.createDomain());
}
}
result.setColumnSpec(dcsc.createSpec());
} else {
DataType type = m_colProp.getColumnSpec().getType();
DataColumnSpecCreator dcsc = new DataColumnSpecCreator(m_colProp.getColumnSpec().getName(), type);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator();
if (type.equals(IntCell.TYPE)) {
domainCreator.setLowerBound(new IntCell((int) m_lowerBoundField.getValue()));
domainCreator.setUpperBound(new IntCell((int) m_upperBoundField.getValue()));
} else if (type.equals(DoubleCell.TYPE)) {
domainCreator.setLowerBound(new DoubleCell((double) m_lowerBoundField.getValue()));
domainCreator.setUpperBound(new DoubleCell((double) m_upperBoundField.getValue()));
}
dcsc.setDomain(domainCreator.createDomain());
result.setColumnSpec(dcsc.createSpec());
}
return result;
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class NominalTable method computeValues.
/**
* Finds all possible values based on a table and a number of given column
* indices by iterating through the table.
*
* @param table ihe table to get values from
* @param columnIndex an array of sorted column indices
* @param exec an object to check if user canceled
* @return a modified table spec containing all possible values
* @throws NullPointerException if the table is <code>null</code>
* @throws IllegalArgumentException if column indices are not sorted
* @throws IndexOutOfBoundsException if a column index is out of range
* @throws CanceledExecutionException if user canceled operation
*/
public static final DataTableSpec computeValues(final BufferedDataTable table, final ExecutionMonitor exec, final int... columnIndex) throws CanceledExecutionException {
DataTableSpec oldSpec = table.getDataTableSpec();
// keep all possible values for each column (index)
@SuppressWarnings("unchecked") Set<DataCell>[] set = new Set[columnIndex.length];
HashSet<Integer> hash = new HashSet<Integer>();
for (int c = 0; c < columnIndex.length; c++) {
if (columnIndex[c] == -1) {
throw new IllegalArgumentException("Column " + columnIndex[c] + " not found.");
}
if (hash.contains(columnIndex[c])) {
throw new IllegalArgumentException("Column indices " + " contain duplicates: " + c);
}
if (c > 0 && columnIndex[c - 1] >= columnIndex[c]) {
throw new IllegalArgumentException("Column indices are " + "not sorted.");
}
hash.add(columnIndex[c]);
set[c] = new HashSet<DataCell>();
}
// overall rows in the table
long rowCount = 0;
for (DataRow row : table) {
// get value for column indices
for (int c = 0; c < columnIndex.length; c++) {
DataCell cell = row.getCell(columnIndex[c]);
// adds only each value once
set[c].add(cell);
}
if (exec != null) {
// throws exception if user canceled
exec.checkCanceled();
exec.setProgress((double) ++rowCount / table.size(), "" + row.getKey());
}
}
DataColumnSpec[] newColSpecs = new DataColumnSpec[oldSpec.getNumColumns()];
// index within the set of possible values
int idx = 0;
for (int i = 0; i < newColSpecs.length; i++) {
DataColumnSpec oldColSpec = oldSpec.getColumnSpec(i);
if (hash.contains(i)) {
DataColumnSpecCreator creator = new DataColumnSpecCreator(oldColSpec);
DataCell lower = null;
DataCell upper = null;
if (oldColSpec.getDomain().hasBounds()) {
lower = oldColSpec.getDomain().getLowerBound();
upper = oldColSpec.getDomain().getUpperBound();
} else {
// TODO DoubleValue is to restrict
if (oldColSpec.getType().isCompatible(DoubleValue.class)) {
TreeSet<DataCell> tSet = new TreeSet<DataCell>(oldColSpec.getType().getComparator());
tSet.addAll(set[idx]);
lower = tSet.first();
upper = tSet.last();
}
}
DataColumnDomain dom = new DataColumnDomainCreator(set[idx], lower, upper).createDomain();
creator.setDomain(dom);
newColSpecs[i] = creator.createSpec();
idx++;
} else {
newColSpecs[i] = oldColSpec;
}
}
// create new table spec along with all column specs
return new DataTableSpec(newColSpecs);
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class MissingValueHandlingTable method createTableSpecPrivate.
/* private helper that assumes the ColSetting to have the right format. */
private static DataTableSpec createTableSpecPrivate(final DataTableSpec spec, final ColSetting[] sets) {
assert (spec.getNumColumns() == sets.length);
DataColumnSpec[] newSpecs = new DataColumnSpec[sets.length];
for (int i = 0; i < sets.length; i++) {
DataColumnSpec colSpec = spec.getColumnSpec(i);
DataColumnSpec newSpec = colSpec;
if (sets[i].getMethod() == ColSetting.METHOD_FIX_VAL) {
DataColumnDomain dom = colSpec.getDomain();
Comparator<DataCell> comp = colSpec.getType().getComparator();
DataCell fixCell = sets[i].getFixCell();
boolean changed = false;
DataCell l = dom.getLowerBound();
// (but rather be null). It may happen anyway, we catch it here
if (l != null && !l.isMissing() && (comp.compare(fixCell, l) < 0)) {
changed = true;
l = fixCell;
}
DataCell u = dom.getUpperBound();
if (u != null && !u.isMissing() && (comp.compare(fixCell, u) > 0)) {
changed = true;
u = fixCell;
}
Set<DataCell> vals = dom.getValues();
if (vals != null && !vals.contains(fixCell)) {
changed = true;
vals = new LinkedHashSet<DataCell>(vals);
vals.add(fixCell);
}
if (changed) {
DataColumnDomain newDom = new DataColumnDomainCreator(vals, l, u).createDomain();
DataColumnSpecCreator c = new DataColumnSpecCreator(colSpec);
c.setDomain(newDom);
newSpec = c.createSpec();
}
}
newSpecs[i] = newSpec;
}
return new DataTableSpec(newSpecs);
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class ARFFTable method createDataTableSpecFromARFFfile.
/**
* Reads in the header of the specified ARFF file and returns a
* corresponding table spec object.
*
* @param fileLoc the location of the ARFF file to read
* @param exec to enable users to cancel this process
* @return a table spec reflecting the settings in the file header
* @throws IOException if the file location couldn't be opened
* @throws InvalidSettingsException if the file contains an invalid format
* @throws CanceledExecutionException if user canceled
*/
public static DataTableSpec createDataTableSpecFromARFFfile(final URL fileLoc, final ExecutionMonitor exec) throws IOException, InvalidSettingsException, CanceledExecutionException {
// create a tokenizer to read the header
InputStream inStream = FileUtil.openStreamWithTimeout(fileLoc);
Tokenizer tokenizer = new Tokenizer(new BufferedReader(new InputStreamReader(inStream)));
// create tokenizer settings that will deliver us the attributes and
// arguments as tokens.
tokenizer.setSettings(getTokenizerHeaderSettings());
// prepare for creating a column spec for each "@attribute" read
Vector<DataColumnSpec> colSpecs = new Vector<DataColumnSpec>();
String tableName = null;
String token;
// the data section begins.
while (true) {
if (exec != null) {
// throws exception if user canceled.
exec.checkCanceled();
}
DataCell[] possVals = null;
DataType type;
token = tokenizer.nextToken();
if (token == null) {
throw new InvalidSettingsException("Incorrect/Incomplete " + "ARFF file. No data section found.");
}
if (token.length() == 0) {
// ignore empty lines
continue;
}
if (token.equalsIgnoreCase("@DATA")) {
// this starts the data section: we are done.
break;
}
if (token.equalsIgnoreCase("@ATTRIBUTE")) {
// defines a new data column
String colName = tokenizer.nextToken();
String colType = null;
if (tokenizer.lastTokenWasQuoted() && tokenizer.getLastQuoteBeginPattern().equals("{")) {
// name. Extract it from there and set it in the 'colType'
if (colName.charAt(0) == '{') {
// seems we only got a value list.
// The col name must be empty/missing then...
colType = colName;
colName = null;
} else {
int openBraceIdx = colName.indexOf('{');
int closeBraceIdx = colName.lastIndexOf('}');
colType = colName.substring(openBraceIdx + 1, closeBraceIdx);
colName = colName.substring(0, openBraceIdx);
// we ignore everything after the nominal value list
}
} else {
colType = tokenizer.nextToken();
}
if ((colName == null) || (colType == null)) {
throw new InvalidSettingsException("Incomplete '@attribute' statement at line " + tokenizer.getLineNumber() + " in ARFF file '" + fileLoc + "'.");
}
// start the 'if' thing here.
if (colType.equalsIgnoreCase("NUMERIC") || colType.equalsIgnoreCase("REAL")) {
type = DoubleCell.TYPE;
// ignore whatever still comes in that line, warn though
readUntilEOL(tokenizer, fileLoc.toString());
} else if (colType.equalsIgnoreCase("INTEGER")) {
type = IntCell.TYPE;
// ignore whatever still comes in that line, warn though
readUntilEOL(tokenizer, fileLoc.toString());
} else if (colType.equalsIgnoreCase("STRING")) {
type = StringCell.TYPE;
// ignore whatever still comes in that line, warn though
readUntilEOL(tokenizer, fileLoc.toString());
} else if (colType.equalsIgnoreCase("DATE")) {
// we use string cell for date ...
type = StringCell.TYPE;
// ignore whatever date format is specified
readUntilEOL(tokenizer, null);
} else if (tokenizer.lastTokenWasQuoted() && tokenizer.getLastQuoteBeginPattern().equals("{")) {
// the braces should be still in the string
int openBraceIdx = colType.indexOf('{');
int closeBraceIdx = colType.lastIndexOf('}');
if ((openBraceIdx >= 0) && (closeBraceIdx > 0) && (openBraceIdx < closeBraceIdx)) {
colType = colType.substring(openBraceIdx + 1, closeBraceIdx);
}
// the type was a list of nominal values
possVals = extractNominalVals(colType, fileLoc.toString(), tokenizer.getLineNumber());
// KNIME uses string cells for nominal values.
type = StringCell.TYPE;
readUntilEOL(tokenizer, fileLoc.toString());
} else {
throw new InvalidSettingsException("Invalid column type" + " '" + colType + "' in attribute control " + "statement in ARFF file '" + fileLoc + "' at line " + tokenizer.getLineNumber() + ".");
}
DataColumnSpecCreator dcsc = new DataColumnSpecCreator(colName, type);
if (possVals != null) {
dcsc.setDomain(new DataColumnDomainCreator(possVals).createDomain());
}
colSpecs.add(dcsc.createSpec());
} else if (token.equalsIgnoreCase("@RELATION")) {
tableName = tokenizer.nextToken();
if (tableName == null) {
throw new InvalidSettingsException("Incomplete '@relation' statement at line " + tokenizer.getLineNumber() + " in ARFF file '" + fileLoc + "'.");
}
// we just ignore the name of the data set.
readUntilEOL(tokenizer, null);
} else if (token.charAt(0) == '@') {
// OOps. What's that?!?
LOGGER.warn("ARFF reader WARNING: Unsupported control " + "statement '" + token + "' in line " + tokenizer.getLineNumber() + ". Ignoring it! File: " + fileLoc);
readUntilEOL(tokenizer, null);
} else if (!token.equals("\n")) {
LOGGER.warn("ARFF reader WARNING: Unsupported " + "statement '" + token + "' in header of ARFF file '" + fileLoc + "', line " + tokenizer.getLineNumber() + ". Ignoring it!");
readUntilEOL(tokenizer, null);
}
// else ignore empty lines
}
// end of while (not EOF)
// check uniqueness of column names
HashSet<String> colNames = new HashSet<>();
for (int c = 0; c < colSpecs.size(); c++) {
if (!colNames.add(colSpecs.get(c).getName())) {
throw new InvalidSettingsException("Two attributes with equal names defined in header of file '" + fileLoc + "'.");
}
}
return new DataTableSpec(tableName, colSpecs.toArray(new DataColumnSpec[colSpecs.size()]));
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class Normalizer2NodeModel method calculate.
/**
* New normalized {@link org.knime.core.data.DataTable} is created depending
* on the mode.
*/
/**
* @param inData The input data.
* @param exec For BufferedDataTable creation and progress.
* @return the result of the calculation
* @throws Exception If the node calculation fails for any reason.
*/
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getSpec();
// extract selected numeric columns
updateNumericColumnSelection(inSpec);
Normalizer2 ntable = new Normalizer2(inTable, m_columns);
long rowcount = inTable.size();
ExecutionContext prepareExec = exec.createSubExecutionContext(0.3);
AffineTransTable outTable;
boolean fixDomainBounds = false;
switch(m_mode) {
case NONORM_MODE:
return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
case MINMAX_MODE:
fixDomainBounds = true;
outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
break;
case ZSCORE_MODE:
outTable = ntable.doZScoreNorm(prepareExec);
break;
case DECIMALSCALING_MODE:
outTable = ntable.doDecimalScaling(prepareExec);
break;
default:
throw new Exception("No mode set");
}
if (outTable.getErrorMessage() != null) {
// something went wrong, report and throw an exception
throw new Exception(outTable.getErrorMessage());
}
if (ntable.getErrorMessage() != null) {
// something went wrong during initialization, report.
setWarningMessage(ntable.getErrorMessage());
}
DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
AffineTransConfiguration configuration = outTable.getConfiguration();
DataTableSpec spec = outTable.getDataTableSpec();
// the same transformation, which is not guaranteed to snap to min/max)
if (fixDomainBounds) {
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < newColSpecs.length; i++) {
newColSpecs[i] = spec.getColumnSpec(i);
}
for (int i = 0; i < m_columns.length; i++) {
int index = spec.findColumnIndex(m_columns[i]);
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
domCreator.setLowerBound(new DoubleCell(m_min));
domCreator.setUpperBound(new DoubleCell(m_max));
creator.setDomain(domCreator.createDomain());
newColSpecs[index] = creator.createSpec();
}
spec = new DataTableSpec(spec.getName(), newColSpecs);
}
ExecutionMonitor normExec = exec.createSubProgress(.7);
BufferedDataContainer container = exec.createDataContainer(spec);
long count = 1;
for (DataRow row : outTable) {
normExec.checkCanceled();
normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
container.addRowToTable(row);
count++;
}
container.close();
return new CalculationResult(container.getTable(), modelSpec, configuration);
}
Aggregations