Search in sources :

Example 56 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class KnnNodeModel method checkInputTables.

/**
 * Checks if the two input tables are correct and fills the last two
 * arguments with sensible values.
 *
 * @param inSpecs the input tables' specs
 * @param featureColumns a list that gets filled with the feature columns'
 *            indices; all columns with {@link DoubleValue}s are used as
 *            features
 * @param firstToSecond a map that afterwards maps the indices of the
 *            feature columns in the first table to the corresponding
 *            columns from the second table
 * @throws InvalidSettingsException if the two tables are not compatible
 */
private void checkInputTables(final DataTableSpec[] inSpecs, final List<Integer> featureColumns, final Map<Integer, Integer> firstToSecond) throws InvalidSettingsException {
    if (!inSpecs[0].containsCompatibleType(DoubleValue.class)) {
        throw new InvalidSettingsException("First input table does not contain a numeric column.");
    }
    if (!inSpecs[0].containsCompatibleType(StringValue.class)) {
        throw new InvalidSettingsException("First input table does not contain a class column of type " + "string.");
    }
    int i = 0;
    for (DataColumnSpec cs : inSpecs[0]) {
        if (cs.getType().isCompatible(DoubleValue.class)) {
            featureColumns.add(i);
        } else if (!cs.getName().equals(m_settings.classColumn())) {
            setWarningMessage("Input table contains more than one non-numeric column; they will be ignored.");
        }
        i++;
    }
    for (int k : featureColumns) {
        final DataColumnSpec cs0 = inSpecs[0].getColumnSpec(k);
        int secondColIndex = inSpecs[1].findColumnIndex(cs0.getName());
        if (secondColIndex == -1) {
            throw new InvalidSettingsException("Second input table does not contain a column: '" + cs0.getName() + "'");
        }
        final DataColumnSpec cs1 = inSpecs[1].getColumnSpec(secondColIndex);
        if (cs0.getName().equals(cs1.getName()) && cs1.getType().isCompatible(DoubleValue.class)) {
            firstToSecond.put(k, secondColIndex);
        } else {
            throw new InvalidSettingsException("Column '" + cs1.getName() + "' from second table is not compatible " + "with corresponding column '" + cs0.getName() + "' from first table.");
        }
    }
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringValue(org.knime.core.data.StringValue)

Example 57 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class Distances method getCosinusDistance.

/**
 * Computes the cosinus distance between the given two rows, with given
 * offset.
 *
 * @param row1 first row to compute the cosinus distance of
 * @param row2 second row to compute the cosinus distance of
 * @param offset offset to subtract cosinus distance from
 * @param fuzzy if <code>true</code> only fuzzy data is respected, if
 *            <code>false</code> only number data
 * @return the cosinus distance between the given two rows
 */
public static double getCosinusDistance(final DataRow row1, final DataRow row2, final double offset, final boolean fuzzy) {
    double distance = 0;
    double vectorMultRes = 0;
    double vector1Length = 0;
    double vector2Length = 0;
    for (int i = 0; i < row1.getNumCells(); i++) {
        DataType type1 = row1.getCell(i).getType();
        DataType type2 = row2.getCell(i).getType();
        if (SotaUtil.isNumberType(type1) && SotaUtil.isNumberType(type2) && !fuzzy) {
            vectorMultRes += ((DoubleValue) row1.getCell(i)).getDoubleValue() * ((DoubleValue) row2.getCell(i)).getDoubleValue();
            vector1Length += Math.pow(((DoubleValue) row1.getCell(i)).getDoubleValue(), 2);
            vector2Length += Math.pow(((DoubleValue) row2.getCell(i)).getDoubleValue(), 2);
        } else if (SotaUtil.isFuzzyIntervalType(type1) && SotaUtil.isFuzzyIntervalType(type2) && fuzzy) {
            vectorMultRes += SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row1.getCell(i)) * SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row2.getCell(i));
            vector1Length += Math.pow(SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row1.getCell(i)), 2);
            vector2Length += Math.pow(SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row2.getCell(i)), 2);
        }
    }
    vector1Length = Math.sqrt(vector1Length);
    vector2Length = Math.sqrt(vector2Length);
    distance = vectorMultRes / (vector1Length * vector2Length);
    if (offset != 0) {
        distance = offset - distance;
    }
    return distance;
}
Also used : FuzzyIntervalValue(org.knime.core.data.FuzzyIntervalValue) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataPoint(org.knime.base.node.mine.mds.DataPoint)

Example 58 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class CreateBitVectorNodeModel method calculateMeanValues.

private double[] calculateMeanValues(final ExecutionMonitor exec, final BufferedDataTable input, final int[] colIndices) throws CanceledExecutionException {
    double[] meanValues = new double[input.getDataTableSpec().getNumColumns()];
    long nrOfRows = 0;
    final long rowCount = input.size();
    for (DataRow row : input) {
        exec.setProgress(nrOfRows / (double) rowCount, "Computing mean value. Processing row " + nrOfRows + " of " + rowCount);
        exec.checkCanceled();
        for (int i = 0; i < colIndices.length; i++) {
            DataCell cell = row.getCell(colIndices[i]);
            if (cell.isMissing()) {
                continue;
            }
            if (cell instanceof DoubleValue) {
                meanValues[i] += ((DoubleValue) cell).getDoubleValue();
            } else {
                throw new RuntimeException("Found incompatible type in row " + row.getKey().getString());
            }
        }
        nrOfRows++;
    }
    for (int i = 0; i < meanValues.length; i++) {
        meanValues[i] = meanValues[i] / nrOfRows;
    }
    return meanValues;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 59 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class CAIMDiscretizationNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    // measure the time
    long startTime = System.currentTimeMillis();
    // empty model
    if (m_includedColumnNames.getIncludeList() == null || m_includedColumnNames.getIncludeList().size() == 0) {
        return new PortObject[] { inData[0], new DiscretizationModel() };
    }
    LOGGER.debug("Start discretizing.");
    // as the algorithm is for binary class problems only
    // (positive, negative) the algorithm is performed for each class value
    // labeled as positive class and the rest as negative
    exec.setProgress(0.0, "Preparing...");
    // check input data
    BufferedDataTable data = (BufferedDataTable) inData[0];
    // get class column index
    m_classifyColumnIndex = data.getDataTableSpec().findColumnIndex(m_classColumnName.getStringValue());
    assert m_classifyColumnIndex > -1;
    // create the class - index mapping
    createClassFromToIndexMaps(data.getDataTableSpec());
    // create the array with the result discretization schemes for
    // each included column
    DiscretizationScheme[] resultSchemes = new DiscretizationScheme[m_includedColumnNames.getIncludeList().size()];
    // for all included columns do the discretization
    int currentColumn = 0;
    for (String includedColumnName : m_includedColumnNames.getIncludeList()) {
        LOGGER.debug("Process column: " + includedColumnName);
        exec.setProgress("Discretizing column '" + includedColumnName + "'");
        ExecutionContext subExecPerColumn = exec.createSubExecutionContext(1.0D / m_includedColumnNames.getIncludeList().size());
        subExecPerColumn.checkCanceled();
        // never discretize the column index (should never happen)
        if (m_classColumnName.getStringValue().equals(includedColumnName)) {
            continue;
        }
        // determine the column index of the current column
        int columnIndex = data.getDataTableSpec().findColumnIndex(includedColumnName);
        DataColumnDomain domain = data.getDataTableSpec().getColumnSpec(columnIndex).getDomain();
        double minValue = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
        double maxValue = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
        // find all distinct values of the column and create
        // a table with all possible interval boundaries (midpoint value of
        // adjacent values)
        subExecPerColumn.setProgress("Find possible boundaries.");
        BoundaryScheme boundaryScheme = null;
        // create subExec for sorting
        ExecutionContext subExecSort = subExecPerColumn.createSubExecutionContext(0.1);
        // long t1 = System.currentTimeMillis();
        if (m_classOptimizedVersion) {
            boundaryScheme = createAllIntervalBoundaries(data, columnIndex, subExecSort);
        } else {
            boundaryScheme = createAllIntervalBoundaries2(data, columnIndex, subExecSort);
        }
        subExecSort.setProgress(1.0D);
        // long t2 = System.currentTimeMillis() - t1;
        // LOGGER.error("Create boundaries time: " + (t2 / 1000.0)
        // + " optimized: " + m_classOptimizedVersion);
        // LOGGER.error("Boundaries: " + boundaryScheme.getHead());
        LinkedDouble allIntervalBoundaries = boundaryScheme.getHead();
        // create the initial discretization scheme
        DiscretizationScheme discretizationScheme = new DiscretizationScheme(new Interval(minValue, maxValue, true, true));
        double globalCAIM = 0;
        // performe the iterative search for the best intervals
        int numInsertedBounds = 0;
        double currentCAIM = 0;
        // create subExec for inserted bounds
        ExecutionContext subExecBounds = subExecPerColumn.createSubExecutionContext(0.9);
        while (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length - 1) {
            subExecPerColumn.checkCanceled();
            // create subExec for counting
            ExecutionContext subExecCount = subExecBounds.createSubExecutionContext(1.0D / m_classValues.length);
            // LOGGER.debug("Inserted bounds: " + numInsertedBounds);
            // LOGGER.debug("intervall boundaries: " +
            // allIntervalBoundaries);
            // for all possible interval boundaries
            // insert each one, calculate the caim value and add
            // the one with the biggest caim
            LinkedDouble intervalBoundary = allIntervalBoundaries.m_next;
            currentCAIM = 0;
            LinkedDouble bestBoundary = null;
            long currentCountedBoundaries = 0;
            while (intervalBoundary != null) {
                subExecPerColumn.checkCanceled();
                // set progress
                currentCountedBoundaries++;
                subExecCount.setProgress((double) currentCountedBoundaries / (double) boundaryScheme.getNumBoundaries(), "Count for possible boundary " + currentCountedBoundaries + " of " + boundaryScheme.getNumBoundaries());
                // LOGGER.debug("current caim: " + currentCAIM);
                DiscretizationScheme tentativeDS = new DiscretizationScheme(discretizationScheme);
                tentativeDS.insertBound(intervalBoundary.m_value);
                // create the quanta matrix
                QuantaMatrix2D quantaMatrix = new QuantaMatrix2D(tentativeDS, m_classValueToIndexMap);
                // pass the data for filling the matrix
                quantaMatrix.countData(data, columnIndex, m_classifyColumnIndex);
                // calculate the caim
                double caim = quantaMatrix.calculateCaim();
                if (caim > currentCAIM) {
                    currentCAIM = caim;
                    bestBoundary = intervalBoundary;
                }
                intervalBoundary = intervalBoundary.m_next;
            }
            // if there is no best boundary, break the first while loop
            if (bestBoundary == null) {
                break;
            }
            // in this case accept the best discretization scheme
            if (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length) {
                int numIntervals = discretizationScheme.getNumIntervals();
                discretizationScheme.insertBound(bestBoundary.m_value);
                // remove the linked list element from the list
                bestBoundary.remove();
                globalCAIM = currentCAIM;
                if (numIntervals < discretizationScheme.getNumIntervals()) {
                    numInsertedBounds++;
                    subExecPerColumn.setProgress("Inserted bound " + numInsertedBounds);
                // LOGGER.debug("Inserted boundary: "
                // + bestBoundary.m_value);
                } else {
                    throw new IllegalStateException("Only usefull bounds should be inserted: " + bestBoundary.m_value);
                }
            }
            subExecCount.setProgress(1.0D);
        }
        resultSchemes[currentColumn] = discretizationScheme;
        subExecBounds.setProgress(1.0D);
        // ensure the full progress is set for this iteration
        subExecPerColumn.setProgress(1.0D);
        currentColumn++;
    }
    // set the model
    DataTableSpec modelSpec = createModelSpec(m_includedColumnNames, data.getDataTableSpec());
    m_discretizationModel = new DiscretizationModel(resultSchemes, modelSpec);
    // create an output table that replaces the included columns by
    // interval values
    BufferedDataTable resultTable = createResultTable(exec, data, m_discretizationModel);
    // log the runtime of the execute method
    long runtime = System.currentTimeMillis() - startTime;
    LOGGER.debug("Binning runtime: " + (runtime / 1000.0) + " sec.");
    return new PortObject[] { resultTable, m_discretizationModel };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DiscretizationScheme(org.knime.base.node.preproc.discretization.caim2.DiscretizationScheme) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) ExecutionContext(org.knime.core.node.ExecutionContext) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) DiscretizationModel(org.knime.base.node.preproc.discretization.caim2.DiscretizationModel) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject) Interval(org.knime.base.node.preproc.discretization.caim2.Interval)

Example 60 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class CAIMDiscretizationNodeModel method createAllIntervalBoundaries2.

/**
 * Sorts the data table in ascending order on the given column, then all
 * distinct values are determined and finally a new table is created that
 * holds the minimum, the maximum value and the midpoints of all adjacent
 * values. These represent all possible boundaries.
 *
 * @param table the table with the data
 * @param columnIndex the column of interest
 * @param exec the execution context to set the progress
 */
private BoundaryScheme createAllIntervalBoundaries2(final BufferedDataTable table, final int columnIndex, final ExecutionContext exec) throws Exception {
    // sort the data accordint to the column index
    List<String> sortColumn = new ArrayList<String>();
    sortColumn.add(table.getDataTableSpec().getColumnSpec(columnIndex).getName());
    boolean[] sortOrder = new boolean[1];
    // in ascending order
    sortOrder[0] = true;
    SortedTable sortedTable = new SortedTable(table, sortColumn, sortOrder, true, exec);
    // the first different value is the minimum value of the sorted list
    RowIterator rowIterator = sortedTable.iterator();
    double lastDifferentValue = ((DoubleValue) rowIterator.next().getCell(columnIndex)).getDoubleValue();
    // create the head of the linked double list
    // marked by NaN
    LinkedDouble head = new LinkedDouble(Double.NEGATIVE_INFINITY);
    // set the last added element
    LinkedDouble lastAdded = head;
    // count the number of boundaries
    int numBoundaries = 0;
    while (rowIterator.hasNext()) {
        DataRow row = rowIterator.next();
        DataCell cell = row.getCell(columnIndex);
        double value = ((DoubleValue) cell).getDoubleValue();
        if (value != lastDifferentValue) {
            // a new boundary is the midpoint
            double newBoundary = (value + lastDifferentValue) / 2.0D;
            lastDifferentValue = value;
            // add the new midpoint boundary to the linked list
            lastAdded.m_next = new LinkedDouble(newBoundary);
            numBoundaries++;
            lastAdded.m_next.m_previous = lastAdded;
            lastAdded = lastAdded.m_next;
        }
    }
    return new BoundaryScheme(head, numBoundaries);
}
Also used : ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell)

Aggregations

DoubleValue (org.knime.core.data.DoubleValue)154 DataCell (org.knime.core.data.DataCell)103 DataRow (org.knime.core.data.DataRow)71 DataColumnSpec (org.knime.core.data.DataColumnSpec)38 DataTableSpec (org.knime.core.data.DataTableSpec)38 DoubleCell (org.knime.core.data.def.DoubleCell)32 ArrayList (java.util.ArrayList)26 BufferedDataTable (org.knime.core.node.BufferedDataTable)26 DataType (org.knime.core.data.DataType)23 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)21 LinkedHashMap (java.util.LinkedHashMap)18 IntValue (org.knime.core.data.IntValue)15 HashMap (java.util.HashMap)14 RowIterator (org.knime.core.data.RowIterator)14 RowKey (org.knime.core.data.RowKey)13 DefaultRow (org.knime.core.data.def.DefaultRow)13 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 LongValue (org.knime.core.data.LongValue)10 StringValue (org.knime.core.data.StringValue)10 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)10