Search in sources :

Example 31 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class DomainDialog method takeOverSettings.

/**
 * @return an object with domain values set by the user. Or <code>null</code> if settings are invalid. Then, a error
 *         message box is displayed.
 */
private ColProperty takeOverSettings() {
    ColProperty result = new ColProperty();
    if (m_colProp.getColumnSpec().getType().isCompatible(StringValue.class)) {
        DataColumnSpecCreator dcsc = new DataColumnSpecCreator(m_colProp.getColumnSpec().getName(), m_colProp.getColumnSpec().getType());
        if (m_containsVals != null) {
            result.setReadPossibleValuesFromFile(m_containsVals.isSelected());
        }
        if ((m_containsVals == null) || m_containsVals.isSelected()) {
            // if it's null we have a string column
            Set<DataCell> pVals = null;
            // tranfser possible values
            int valCount = m_valueList.getModel().getSize();
            pVals = new LinkedHashSet<DataCell>();
            for (int i = 0; i < valCount; i++) {
                DataCell val = (DataCell) m_valueList.getModel().getElementAt(i);
                pVals.add(val);
            }
            if (pVals.size() > 0) {
                DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(pVals);
                dcsc.setDomain(domainCreator.createDomain());
            }
        }
        result.setColumnSpec(dcsc.createSpec());
    } else {
        DataType type = m_colProp.getColumnSpec().getType();
        DataColumnSpecCreator dcsc = new DataColumnSpecCreator(m_colProp.getColumnSpec().getName(), type);
        DataColumnDomainCreator domainCreator = new DataColumnDomainCreator();
        if (type.equals(IntCell.TYPE)) {
            domainCreator.setLowerBound(new IntCell((int) m_lowerBoundField.getValue()));
            domainCreator.setUpperBound(new IntCell((int) m_upperBoundField.getValue()));
        } else if (type.equals(DoubleCell.TYPE)) {
            domainCreator.setLowerBound(new DoubleCell((double) m_lowerBoundField.getValue()));
            domainCreator.setUpperBound(new DoubleCell((double) m_upperBoundField.getValue()));
        }
        dcsc.setDomain(domainCreator.createDomain());
        result.setColumnSpec(dcsc.createSpec());
    }
    return result;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataType(org.knime.core.data.DataType) IntCell(org.knime.core.data.def.IntCell)

Example 32 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class NominalTable method computeValues.

/**
 * Finds all possible values based on a table and a number of given column
 * indices by iterating through the table.
 *
 * @param table ihe table to get values from
 * @param columnIndex an array of sorted column indices
 * @param exec an object to check if user canceled
 * @return a modified table spec containing all possible values
 * @throws NullPointerException if the table is <code>null</code>
 * @throws IllegalArgumentException if column indices are not sorted
 * @throws IndexOutOfBoundsException if a column index is out of range
 * @throws CanceledExecutionException if user canceled operation
 */
public static final DataTableSpec computeValues(final BufferedDataTable table, final ExecutionMonitor exec, final int... columnIndex) throws CanceledExecutionException {
    DataTableSpec oldSpec = table.getDataTableSpec();
    // keep all possible values for each column (index)
    @SuppressWarnings("unchecked") Set<DataCell>[] set = new Set[columnIndex.length];
    HashSet<Integer> hash = new HashSet<Integer>();
    for (int c = 0; c < columnIndex.length; c++) {
        if (columnIndex[c] == -1) {
            throw new IllegalArgumentException("Column " + columnIndex[c] + " not found.");
        }
        if (hash.contains(columnIndex[c])) {
            throw new IllegalArgumentException("Column indices " + " contain duplicates: " + c);
        }
        if (c > 0 && columnIndex[c - 1] >= columnIndex[c]) {
            throw new IllegalArgumentException("Column indices are " + "not sorted.");
        }
        hash.add(columnIndex[c]);
        set[c] = new HashSet<DataCell>();
    }
    // overall rows in the table
    long rowCount = 0;
    for (DataRow row : table) {
        // get value for column indices
        for (int c = 0; c < columnIndex.length; c++) {
            DataCell cell = row.getCell(columnIndex[c]);
            // adds only each value once
            set[c].add(cell);
        }
        if (exec != null) {
            // throws exception if user canceled
            exec.checkCanceled();
            exec.setProgress((double) ++rowCount / table.size(), "" + row.getKey());
        }
    }
    DataColumnSpec[] newColSpecs = new DataColumnSpec[oldSpec.getNumColumns()];
    // index within the set of possible values
    int idx = 0;
    for (int i = 0; i < newColSpecs.length; i++) {
        DataColumnSpec oldColSpec = oldSpec.getColumnSpec(i);
        if (hash.contains(i)) {
            DataColumnSpecCreator creator = new DataColumnSpecCreator(oldColSpec);
            DataCell lower = null;
            DataCell upper = null;
            if (oldColSpec.getDomain().hasBounds()) {
                lower = oldColSpec.getDomain().getLowerBound();
                upper = oldColSpec.getDomain().getUpperBound();
            } else {
                // TODO DoubleValue is to restrict
                if (oldColSpec.getType().isCompatible(DoubleValue.class)) {
                    TreeSet<DataCell> tSet = new TreeSet<DataCell>(oldColSpec.getType().getComparator());
                    tSet.addAll(set[idx]);
                    lower = tSet.first();
                    upper = tSet.last();
                }
            }
            DataColumnDomain dom = new DataColumnDomainCreator(set[idx], lower, upper).createDomain();
            creator.setDomain(dom);
            newColSpecs[i] = creator.createSpec();
            idx++;
        } else {
            newColSpecs[i] = oldColSpec;
        }
    }
    // create new table spec along with all column specs
    return new DataTableSpec(newColSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) TreeSet(java.util.TreeSet) DataCell(org.knime.core.data.DataCell) HashSet(java.util.HashSet)

Example 33 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class MissingValueHandlingTable method createTableSpecPrivate.

/* private helper that assumes the ColSetting to have the right format. */
private static DataTableSpec createTableSpecPrivate(final DataTableSpec spec, final ColSetting[] sets) {
    assert (spec.getNumColumns() == sets.length);
    DataColumnSpec[] newSpecs = new DataColumnSpec[sets.length];
    for (int i = 0; i < sets.length; i++) {
        DataColumnSpec colSpec = spec.getColumnSpec(i);
        DataColumnSpec newSpec = colSpec;
        if (sets[i].getMethod() == ColSetting.METHOD_FIX_VAL) {
            DataColumnDomain dom = colSpec.getDomain();
            Comparator<DataCell> comp = colSpec.getType().getComparator();
            DataCell fixCell = sets[i].getFixCell();
            boolean changed = false;
            DataCell l = dom.getLowerBound();
            // (but rather be null). It may happen anyway, we catch it here
            if (l != null && !l.isMissing() && (comp.compare(fixCell, l) < 0)) {
                changed = true;
                l = fixCell;
            }
            DataCell u = dom.getUpperBound();
            if (u != null && !u.isMissing() && (comp.compare(fixCell, u) > 0)) {
                changed = true;
                u = fixCell;
            }
            Set<DataCell> vals = dom.getValues();
            if (vals != null && !vals.contains(fixCell)) {
                changed = true;
                vals = new LinkedHashSet<DataCell>(vals);
                vals.add(fixCell);
            }
            if (changed) {
                DataColumnDomain newDom = new DataColumnDomainCreator(vals, l, u).createDomain();
                DataColumnSpecCreator c = new DataColumnSpecCreator(colSpec);
                c.setDomain(newDom);
                newSpec = c.createSpec();
            }
        }
        newSpecs[i] = newSpec;
    }
    return new DataTableSpec(newSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator)

Example 34 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class ARFFTable method createDataTableSpecFromARFFfile.

/**
 * Reads in the header of the specified ARFF file and returns a
 * corresponding table spec object.
 *
 * @param fileLoc the location of the ARFF file to read
 * @param exec to enable users to cancel this process
 * @return a table spec reflecting the settings in the file header
 * @throws IOException if the file location couldn't be opened
 * @throws InvalidSettingsException if the file contains an invalid format
 * @throws CanceledExecutionException if user canceled
 */
public static DataTableSpec createDataTableSpecFromARFFfile(final URL fileLoc, final ExecutionMonitor exec) throws IOException, InvalidSettingsException, CanceledExecutionException {
    // create a tokenizer to read the header
    InputStream inStream = FileUtil.openStreamWithTimeout(fileLoc);
    Tokenizer tokenizer = new Tokenizer(new BufferedReader(new InputStreamReader(inStream)));
    // create tokenizer settings that will deliver us the attributes and
    // arguments as tokens.
    tokenizer.setSettings(getTokenizerHeaderSettings());
    // prepare for creating a column spec for each "@attribute" read
    Vector<DataColumnSpec> colSpecs = new Vector<DataColumnSpec>();
    String tableName = null;
    String token;
    // the data section begins.
    while (true) {
        if (exec != null) {
            // throws exception if user canceled.
            exec.checkCanceled();
        }
        DataCell[] possVals = null;
        DataType type;
        token = tokenizer.nextToken();
        if (token == null) {
            throw new InvalidSettingsException("Incorrect/Incomplete " + "ARFF file. No data section found.");
        }
        if (token.length() == 0) {
            // ignore empty lines
            continue;
        }
        if (token.equalsIgnoreCase("@DATA")) {
            // this starts the data section: we are done.
            break;
        }
        if (token.equalsIgnoreCase("@ATTRIBUTE")) {
            // defines a new data column
            String colName = tokenizer.nextToken();
            String colType = null;
            if (tokenizer.lastTokenWasQuoted() && tokenizer.getLastQuoteBeginPattern().equals("{")) {
                // name. Extract it from there and set it in the 'colType'
                if (colName.charAt(0) == '{') {
                    // seems we only got a value list.
                    // The col name must be empty/missing then...
                    colType = colName;
                    colName = null;
                } else {
                    int openBraceIdx = colName.indexOf('{');
                    int closeBraceIdx = colName.lastIndexOf('}');
                    colType = colName.substring(openBraceIdx + 1, closeBraceIdx);
                    colName = colName.substring(0, openBraceIdx);
                // we ignore everything after the nominal value list
                }
            } else {
                colType = tokenizer.nextToken();
            }
            if ((colName == null) || (colType == null)) {
                throw new InvalidSettingsException("Incomplete '@attribute' statement at line " + tokenizer.getLineNumber() + " in ARFF file '" + fileLoc + "'.");
            }
            // start the 'if' thing here.
            if (colType.equalsIgnoreCase("NUMERIC") || colType.equalsIgnoreCase("REAL")) {
                type = DoubleCell.TYPE;
                // ignore whatever still comes in that line, warn though
                readUntilEOL(tokenizer, fileLoc.toString());
            } else if (colType.equalsIgnoreCase("INTEGER")) {
                type = IntCell.TYPE;
                // ignore whatever still comes in that line, warn though
                readUntilEOL(tokenizer, fileLoc.toString());
            } else if (colType.equalsIgnoreCase("STRING")) {
                type = StringCell.TYPE;
                // ignore whatever still comes in that line, warn though
                readUntilEOL(tokenizer, fileLoc.toString());
            } else if (colType.equalsIgnoreCase("DATE")) {
                // we use string cell for date ...
                type = StringCell.TYPE;
                // ignore whatever date format is specified
                readUntilEOL(tokenizer, null);
            } else if (tokenizer.lastTokenWasQuoted() && tokenizer.getLastQuoteBeginPattern().equals("{")) {
                // the braces should be still in the string
                int openBraceIdx = colType.indexOf('{');
                int closeBraceIdx = colType.lastIndexOf('}');
                if ((openBraceIdx >= 0) && (closeBraceIdx > 0) && (openBraceIdx < closeBraceIdx)) {
                    colType = colType.substring(openBraceIdx + 1, closeBraceIdx);
                }
                // the type was a list of nominal values
                possVals = extractNominalVals(colType, fileLoc.toString(), tokenizer.getLineNumber());
                // KNIME uses string cells for nominal values.
                type = StringCell.TYPE;
                readUntilEOL(tokenizer, fileLoc.toString());
            } else {
                throw new InvalidSettingsException("Invalid column type" + " '" + colType + "' in attribute control " + "statement in ARFF file '" + fileLoc + "' at line " + tokenizer.getLineNumber() + ".");
            }
            DataColumnSpecCreator dcsc = new DataColumnSpecCreator(colName, type);
            if (possVals != null) {
                dcsc.setDomain(new DataColumnDomainCreator(possVals).createDomain());
            }
            colSpecs.add(dcsc.createSpec());
        } else if (token.equalsIgnoreCase("@RELATION")) {
            tableName = tokenizer.nextToken();
            if (tableName == null) {
                throw new InvalidSettingsException("Incomplete '@relation' statement at line " + tokenizer.getLineNumber() + " in ARFF file '" + fileLoc + "'.");
            }
            // we just ignore the name of the data set.
            readUntilEOL(tokenizer, null);
        } else if (token.charAt(0) == '@') {
            // OOps. What's that?!?
            LOGGER.warn("ARFF reader WARNING: Unsupported control " + "statement '" + token + "' in line " + tokenizer.getLineNumber() + ". Ignoring it! File: " + fileLoc);
            readUntilEOL(tokenizer, null);
        } else if (!token.equals("\n")) {
            LOGGER.warn("ARFF reader WARNING: Unsupported " + "statement '" + token + "' in header of ARFF file '" + fileLoc + "', line " + tokenizer.getLineNumber() + ". Ignoring it!");
            readUntilEOL(tokenizer, null);
        }
    // else ignore empty lines
    }
    // end of while (not EOF)
    // check uniqueness of column names
    HashSet<String> colNames = new HashSet<>();
    for (int c = 0; c < colSpecs.size(); c++) {
        if (!colNames.add(colSpecs.get(c).getName())) {
            throw new InvalidSettingsException("Two attributes with equal names defined in header of file '" + fileLoc + "'.");
        }
    }
    return new DataTableSpec(tableName, colSpecs.toArray(new DataColumnSpec[colSpecs.size()]));
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedReader(java.io.BufferedReader) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) Tokenizer(org.knime.core.util.tokenizer.Tokenizer) Vector(java.util.Vector) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 35 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class Normalizer2NodeModel method calculate.

/**
 * New normalized {@link org.knime.core.data.DataTable} is created depending
 * on the mode.
 */
/**
 * @param inData The input data.
 * @param exec For BufferedDataTable creation and progress.
 * @return the result of the calculation
 * @throws Exception If the node calculation fails for any reason.
 */
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getSpec();
    // extract selected numeric columns
    updateNumericColumnSelection(inSpec);
    Normalizer2 ntable = new Normalizer2(inTable, m_columns);
    long rowcount = inTable.size();
    ExecutionContext prepareExec = exec.createSubExecutionContext(0.3);
    AffineTransTable outTable;
    boolean fixDomainBounds = false;
    switch(m_mode) {
        case NONORM_MODE:
            return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
        case MINMAX_MODE:
            fixDomainBounds = true;
            outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
            break;
        case ZSCORE_MODE:
            outTable = ntable.doZScoreNorm(prepareExec);
            break;
        case DECIMALSCALING_MODE:
            outTable = ntable.doDecimalScaling(prepareExec);
            break;
        default:
            throw new Exception("No mode set");
    }
    if (outTable.getErrorMessage() != null) {
        // something went wrong, report and throw an exception
        throw new Exception(outTable.getErrorMessage());
    }
    if (ntable.getErrorMessage() != null) {
        // something went wrong during initialization, report.
        setWarningMessage(ntable.getErrorMessage());
    }
    DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
    AffineTransConfiguration configuration = outTable.getConfiguration();
    DataTableSpec spec = outTable.getDataTableSpec();
    // the same transformation, which is not guaranteed to snap to min/max)
    if (fixDomainBounds) {
        DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
        for (int i = 0; i < newColSpecs.length; i++) {
            newColSpecs[i] = spec.getColumnSpec(i);
        }
        for (int i = 0; i < m_columns.length; i++) {
            int index = spec.findColumnIndex(m_columns[i]);
            DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
            DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
            domCreator.setLowerBound(new DoubleCell(m_min));
            domCreator.setUpperBound(new DoubleCell(m_max));
            creator.setDomain(domCreator.createDomain());
            newColSpecs[index] = creator.createSpec();
        }
        spec = new DataTableSpec(spec.getName(), newColSpecs);
    }
    ExecutionMonitor normExec = exec.createSubProgress(.7);
    BufferedDataContainer container = exec.createDataContainer(spec);
    long count = 1;
    for (DataRow row : outTable) {
        normExec.checkCanceled();
        normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
        container.addRowToTable(row);
        count++;
    }
    container.close();
    return new CalculationResult(container.getTable(), modelSpec, configuration);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) Normalizer2(org.knime.base.data.normalize.Normalizer2) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) ExecutionContext(org.knime.core.node.ExecutionContext) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) AffineTransTable(org.knime.base.data.normalize.AffineTransTable) AffineTransConfiguration(org.knime.base.data.normalize.AffineTransConfiguration) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Aggregations

DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)57 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)51 DataColumnSpec (org.knime.core.data.DataColumnSpec)43 DoubleCell (org.knime.core.data.def.DoubleCell)28 DataCell (org.knime.core.data.DataCell)27 DataTableSpec (org.knime.core.data.DataTableSpec)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)15 ArrayList (java.util.ArrayList)14 DataColumnDomain (org.knime.core.data.DataColumnDomain)12 DataRow (org.knime.core.data.DataRow)12 DataType (org.knime.core.data.DataType)12 DoubleValue (org.knime.core.data.DoubleValue)11 StringCell (org.knime.core.data.def.StringCell)8 BufferedDataTable (org.knime.core.node.BufferedDataTable)7 LinkedHashSet (java.util.LinkedHashSet)6 Coordinate (org.knime.base.util.coordinate.Coordinate)6 HashMap (java.util.HashMap)5 HashSet (java.util.HashSet)5 LinkedHashMap (java.util.LinkedHashMap)5 NumericCoordinate (org.knime.base.util.coordinate.NumericCoordinate)5