Search in sources :

Example 31 with DataColumnDomainCreator

use of in project knime-core by knime.

the class DomainDialog method takeOverSettings.

 * @return an object with domain values set by the user. Or <code>null</code> if settings are invalid. Then, a error
 *         message box is displayed.
private ColProperty takeOverSettings() {
    ColProperty result = new ColProperty();
    if (m_colProp.getColumnSpec().getType().isCompatible(StringValue.class)) {
        DataColumnSpecCreator dcsc = new DataColumnSpecCreator(m_colProp.getColumnSpec().getName(), m_colProp.getColumnSpec().getType());
        if (m_containsVals != null) {
        if ((m_containsVals == null) || m_containsVals.isSelected()) {
            // if it's null we have a string column
            Set<DataCell> pVals = null;
            // tranfser possible values
            int valCount = m_valueList.getModel().getSize();
            pVals = new LinkedHashSet<DataCell>();
            for (int i = 0; i < valCount; i++) {
                DataCell val = (DataCell) m_valueList.getModel().getElementAt(i);
            if (pVals.size() > 0) {
                DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(pVals);
    } else {
        DataType type = m_colProp.getColumnSpec().getType();
        DataColumnSpecCreator dcsc = new DataColumnSpecCreator(m_colProp.getColumnSpec().getName(), type);
        DataColumnDomainCreator domainCreator = new DataColumnDomainCreator();
        if (type.equals(IntCell.TYPE)) {
            domainCreator.setLowerBound(new IntCell((int) m_lowerBoundField.getValue()));
            domainCreator.setUpperBound(new IntCell((int) m_upperBoundField.getValue()));
        } else if (type.equals(DoubleCell.TYPE)) {
            domainCreator.setLowerBound(new DoubleCell((double) m_lowerBoundField.getValue()));
            domainCreator.setUpperBound(new DoubleCell((double) m_upperBoundField.getValue()));
    return result;
Also used : DataColumnSpecCreator( DoubleCell( DataCell( DataColumnDomainCreator( DataType( IntCell(

Example 32 with DataColumnDomainCreator

use of in project knime-core by knime.

the class NominalTable method computeValues.

 * Finds all possible values based on a table and a number of given column
 * indices by iterating through the table.
 * @param table ihe table to get values from
 * @param columnIndex an array of sorted column indices
 * @param exec an object to check if user canceled
 * @return a modified table spec containing all possible values
 * @throws NullPointerException if the table is <code>null</code>
 * @throws IllegalArgumentException if column indices are not sorted
 * @throws IndexOutOfBoundsException if a column index is out of range
 * @throws CanceledExecutionException if user canceled operation
public static final DataTableSpec computeValues(final BufferedDataTable table, final ExecutionMonitor exec, final int... columnIndex) throws CanceledExecutionException {
    DataTableSpec oldSpec = table.getDataTableSpec();
    // keep all possible values for each column (index)
    @SuppressWarnings("unchecked") Set<DataCell>[] set = new Set[columnIndex.length];
    HashSet<Integer> hash = new HashSet<Integer>();
    for (int c = 0; c < columnIndex.length; c++) {
        if (columnIndex[c] == -1) {
            throw new IllegalArgumentException("Column " + columnIndex[c] + " not found.");
        if (hash.contains(columnIndex[c])) {
            throw new IllegalArgumentException("Column indices " + " contain duplicates: " + c);
        if (c > 0 && columnIndex[c - 1] >= columnIndex[c]) {
            throw new IllegalArgumentException("Column indices are " + "not sorted.");
        set[c] = new HashSet<DataCell>();
    // overall rows in the table
    long rowCount = 0;
    for (DataRow row : table) {
        // get value for column indices
        for (int c = 0; c < columnIndex.length; c++) {
            DataCell cell = row.getCell(columnIndex[c]);
            // adds only each value once
        if (exec != null) {
            // throws exception if user canceled
            exec.setProgress((double) ++rowCount / table.size(), "" + row.getKey());
    DataColumnSpec[] newColSpecs = new DataColumnSpec[oldSpec.getNumColumns()];
    // index within the set of possible values
    int idx = 0;
    for (int i = 0; i < newColSpecs.length; i++) {
        DataColumnSpec oldColSpec = oldSpec.getColumnSpec(i);
        if (hash.contains(i)) {
            DataColumnSpecCreator creator = new DataColumnSpecCreator(oldColSpec);
            DataCell lower = null;
            DataCell upper = null;
            if (oldColSpec.getDomain().hasBounds()) {
                lower = oldColSpec.getDomain().getLowerBound();
                upper = oldColSpec.getDomain().getUpperBound();
            } else {
                // TODO DoubleValue is to restrict
                if (oldColSpec.getType().isCompatible(DoubleValue.class)) {
                    TreeSet<DataCell> tSet = new TreeSet<DataCell>(oldColSpec.getType().getComparator());
                    lower = tSet.first();
                    upper = tSet.last();
            DataColumnDomain dom = new DataColumnDomainCreator(set[idx], lower, upper).createDomain();
            newColSpecs[i] = creator.createSpec();
        } else {
            newColSpecs[i] = oldColSpec;
    // create new table spec along with all column specs
    return new DataTableSpec(newColSpecs);
Also used : DataTableSpec( Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) DataColumnSpecCreator( DataColumnDomainCreator( DataRow( DataColumnSpec( DataColumnDomain( TreeSet(java.util.TreeSet) DataCell( HashSet(java.util.HashSet)

Example 33 with DataColumnDomainCreator

use of in project knime-core by knime.

the class MissingValueHandlingTable method createTableSpecPrivate.

/* private helper that assumes the ColSetting to have the right format. */
private static DataTableSpec createTableSpecPrivate(final DataTableSpec spec, final ColSetting[] sets) {
    assert (spec.getNumColumns() == sets.length);
    DataColumnSpec[] newSpecs = new DataColumnSpec[sets.length];
    for (int i = 0; i < sets.length; i++) {
        DataColumnSpec colSpec = spec.getColumnSpec(i);
        DataColumnSpec newSpec = colSpec;
        if (sets[i].getMethod() == ColSetting.METHOD_FIX_VAL) {
            DataColumnDomain dom = colSpec.getDomain();
            Comparator<DataCell> comp = colSpec.getType().getComparator();
            DataCell fixCell = sets[i].getFixCell();
            boolean changed = false;
            DataCell l = dom.getLowerBound();
            // (but rather be null). It may happen anyway, we catch it here
            if (l != null && !l.isMissing() && (, l) < 0)) {
                changed = true;
                l = fixCell;
            DataCell u = dom.getUpperBound();
            if (u != null && !u.isMissing() && (, u) > 0)) {
                changed = true;
                u = fixCell;
            Set<DataCell> vals = dom.getValues();
            if (vals != null && !vals.contains(fixCell)) {
                changed = true;
                vals = new LinkedHashSet<DataCell>(vals);
            if (changed) {
                DataColumnDomain newDom = new DataColumnDomainCreator(vals, l, u).createDomain();
                DataColumnSpecCreator c = new DataColumnSpecCreator(colSpec);
                newSpec = c.createSpec();
        newSpecs[i] = newSpec;
    return new DataTableSpec(newSpecs);
Also used : DataTableSpec( DataColumnSpec( DataColumnDomain( DataColumnSpecCreator( DataCell( DataColumnDomainCreator(

Example 34 with DataColumnDomainCreator

use of in project knime-core by knime.

the class ARFFTable method createDataTableSpecFromARFFfile.

 * Reads in the header of the specified ARFF file and returns a
 * corresponding table spec object.
 * @param fileLoc the location of the ARFF file to read
 * @param exec to enable users to cancel this process
 * @return a table spec reflecting the settings in the file header
 * @throws IOException if the file location couldn't be opened
 * @throws InvalidSettingsException if the file contains an invalid format
 * @throws CanceledExecutionException if user canceled
public static DataTableSpec createDataTableSpecFromARFFfile(final URL fileLoc, final ExecutionMonitor exec) throws IOException, InvalidSettingsException, CanceledExecutionException {
    // create a tokenizer to read the header
    InputStream inStream = FileUtil.openStreamWithTimeout(fileLoc);
    Tokenizer tokenizer = new Tokenizer(new BufferedReader(new InputStreamReader(inStream)));
    // create tokenizer settings that will deliver us the attributes and
    // arguments as tokens.
    // prepare for creating a column spec for each "@attribute" read
    Vector<DataColumnSpec> colSpecs = new Vector<DataColumnSpec>();
    String tableName = null;
    String token;
    // the data section begins.
    while (true) {
        if (exec != null) {
            // throws exception if user canceled.
        DataCell[] possVals = null;
        DataType type;
        token = tokenizer.nextToken();
        if (token == null) {
            throw new InvalidSettingsException("Incorrect/Incomplete " + "ARFF file. No data section found.");
        if (token.length() == 0) {
            // ignore empty lines
        if (token.equalsIgnoreCase("@DATA")) {
            // this starts the data section: we are done.
        if (token.equalsIgnoreCase("@ATTRIBUTE")) {
            // defines a new data column
            String colName = tokenizer.nextToken();
            String colType = null;
            if (tokenizer.lastTokenWasQuoted() && tokenizer.getLastQuoteBeginPattern().equals("{")) {
                // name. Extract it from there and set it in the 'colType'
                if (colName.charAt(0) == '{') {
                    // seems we only got a value list.
                    // The col name must be empty/missing then...
                    colType = colName;
                    colName = null;
                } else {
                    int openBraceIdx = colName.indexOf('{');
                    int closeBraceIdx = colName.lastIndexOf('}');
                    colType = colName.substring(openBraceIdx + 1, closeBraceIdx);
                    colName = colName.substring(0, openBraceIdx);
                // we ignore everything after the nominal value list
            } else {
                colType = tokenizer.nextToken();
            if ((colName == null) || (colType == null)) {
                throw new InvalidSettingsException("Incomplete '@attribute' statement at line " + tokenizer.getLineNumber() + " in ARFF file '" + fileLoc + "'.");
            // start the 'if' thing here.
            if (colType.equalsIgnoreCase("NUMERIC") || colType.equalsIgnoreCase("REAL")) {
                type = DoubleCell.TYPE;
                // ignore whatever still comes in that line, warn though
                readUntilEOL(tokenizer, fileLoc.toString());
            } else if (colType.equalsIgnoreCase("INTEGER")) {
                type = IntCell.TYPE;
                // ignore whatever still comes in that line, warn though
                readUntilEOL(tokenizer, fileLoc.toString());
            } else if (colType.equalsIgnoreCase("STRING")) {
                type = StringCell.TYPE;
                // ignore whatever still comes in that line, warn though
                readUntilEOL(tokenizer, fileLoc.toString());
            } else if (colType.equalsIgnoreCase("DATE")) {
                // we use string cell for date ...
                type = StringCell.TYPE;
                // ignore whatever date format is specified
                readUntilEOL(tokenizer, null);
            } else if (tokenizer.lastTokenWasQuoted() && tokenizer.getLastQuoteBeginPattern().equals("{")) {
                // the braces should be still in the string
                int openBraceIdx = colType.indexOf('{');
                int closeBraceIdx = colType.lastIndexOf('}');
                if ((openBraceIdx >= 0) && (closeBraceIdx > 0) && (openBraceIdx < closeBraceIdx)) {
                    colType = colType.substring(openBraceIdx + 1, closeBraceIdx);
                // the type was a list of nominal values
                possVals = extractNominalVals(colType, fileLoc.toString(), tokenizer.getLineNumber());
                // KNIME uses string cells for nominal values.
                type = StringCell.TYPE;
                readUntilEOL(tokenizer, fileLoc.toString());
            } else {
                throw new InvalidSettingsException("Invalid column type" + " '" + colType + "' in attribute control " + "statement in ARFF file '" + fileLoc + "' at line " + tokenizer.getLineNumber() + ".");
            DataColumnSpecCreator dcsc = new DataColumnSpecCreator(colName, type);
            if (possVals != null) {
                dcsc.setDomain(new DataColumnDomainCreator(possVals).createDomain());
        } else if (token.equalsIgnoreCase("@RELATION")) {
            tableName = tokenizer.nextToken();
            if (tableName == null) {
                throw new InvalidSettingsException("Incomplete '@relation' statement at line " + tokenizer.getLineNumber() + " in ARFF file '" + fileLoc + "'.");
            // we just ignore the name of the data set.
            readUntilEOL(tokenizer, null);
        } else if (token.charAt(0) == '@') {
            // OOps. What's that?!?
            LOGGER.warn("ARFF reader WARNING: Unsupported control " + "statement '" + token + "' in line " + tokenizer.getLineNumber() + ". Ignoring it! File: " + fileLoc);
            readUntilEOL(tokenizer, null);
        } else if (!token.equals("\n")) {
            LOGGER.warn("ARFF reader WARNING: Unsupported " + "statement '" + token + "' in header of ARFF file '" + fileLoc + "', line " + tokenizer.getLineNumber() + ". Ignoring it!");
            readUntilEOL(tokenizer, null);
    // else ignore empty lines
    // end of while (not EOF)
    // check uniqueness of column names
    HashSet<String> colNames = new HashSet<>();
    for (int c = 0; c < colSpecs.size(); c++) {
        if (!colNames.add(colSpecs.get(c).getName())) {
            throw new InvalidSettingsException("Two attributes with equal names defined in header of file '" + fileLoc + "'.");
    return new DataTableSpec(tableName, colSpecs.toArray(new DataColumnSpec[colSpecs.size()]));
Also used : DataTableSpec( DataColumnSpecCreator( InputStreamReader( InputStream( DataColumnDomainCreator( DataColumnSpec( InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedReader( DataCell( DataType( Tokenizer(org.knime.core.util.tokenizer.Tokenizer) Vector(java.util.Vector) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 35 with DataColumnDomainCreator

use of in project knime-core by knime.

the class Normalizer2NodeModel method calculate.

 * New normalized {@link} is created depending
 * on the mode.
 * @param inData The input data.
 * @param exec For BufferedDataTable creation and progress.
 * @return the result of the calculation
 * @throws Exception If the node calculation fails for any reason.
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getSpec();
    // extract selected numeric columns
    Normalizer2 ntable = new Normalizer2(inTable, m_columns);
    long rowcount = inTable.size();
    ExecutionContext prepareExec = exec.createSubExecutionContext(0.3);
    AffineTransTable outTable;
    boolean fixDomainBounds = false;
    switch(m_mode) {
        case NONORM_MODE:
            return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
        case MINMAX_MODE:
            fixDomainBounds = true;
            outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
        case ZSCORE_MODE:
            outTable = ntable.doZScoreNorm(prepareExec);
            outTable = ntable.doDecimalScaling(prepareExec);
            throw new Exception("No mode set");
    if (outTable.getErrorMessage() != null) {
        // something went wrong, report and throw an exception
        throw new Exception(outTable.getErrorMessage());
    if (ntable.getErrorMessage() != null) {
        // something went wrong during initialization, report.
    DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
    AffineTransConfiguration configuration = outTable.getConfiguration();
    DataTableSpec spec = outTable.getDataTableSpec();
    // the same transformation, which is not guaranteed to snap to min/max)
    if (fixDomainBounds) {
        DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
        for (int i = 0; i < newColSpecs.length; i++) {
            newColSpecs[i] = spec.getColumnSpec(i);
        for (int i = 0; i < m_columns.length; i++) {
            int index = spec.findColumnIndex(m_columns[i]);
            DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
            DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
            domCreator.setLowerBound(new DoubleCell(m_min));
            domCreator.setUpperBound(new DoubleCell(m_max));
            newColSpecs[index] = creator.createSpec();
        spec = new DataTableSpec(spec.getName(), newColSpecs);
    ExecutionMonitor normExec = exec.createSubProgress(.7);
    BufferedDataContainer container = exec.createDataContainer(spec);
    long count = 1;
    for (DataRow row : outTable) {
        normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
    return new CalculationResult(container.getTable(), modelSpec, configuration);
Also used : DataTableSpec( DataColumnSpecCreator( BufferedDataContainer(org.knime.core.node.BufferedDataContainer) Normalizer2( DoubleCell( DataColumnDomainCreator( DataRow( InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException( ExecutionContext(org.knime.core.node.ExecutionContext) DataColumnSpec( BufferedDataTable(org.knime.core.node.BufferedDataTable) AffineTransTable( AffineTransConfiguration( ExecutionMonitor(org.knime.core.node.ExecutionMonitor)


DataColumnDomainCreator ( DataColumnSpecCreator ( DataColumnSpec ( DoubleCell ( DataCell ( DataTableSpec ( InvalidSettingsException (org.knime.core.node.InvalidSettingsException)15 ArrayList (java.util.ArrayList)14 DataColumnDomain ( DataRow ( DataType ( DoubleValue ( StringCell ( BufferedDataTable (org.knime.core.node.BufferedDataTable)7 LinkedHashSet (java.util.LinkedHashSet)6 Coordinate (org.knime.base.util.coordinate.Coordinate)6 HashMap (java.util.HashMap)5 HashSet (java.util.HashSet)5 LinkedHashMap (java.util.LinkedHashMap)5 NumericCoordinate (org.knime.base.util.coordinate.NumericCoordinate)5