Search in sources :

Example 21 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class MDSProjectionNodeModel method execute.

 * {@inheritDoc}
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final DataTableSpec inSpecData = inData[IN_DATA_INDEX].getSpec();
    final ColumnRearranger colFilter = new ColumnRearranger(inSpecData);
    if (m_includeList != null) {
        colFilter.keepOnly(m_includeList.toArray(new String[m_includeList.size()]));
    BufferedDataTable rowCutDataTable = exec.createColumnRearrangeTable(inData[IN_DATA_INDEX], colFilter, exec.createSilentSubProgress(0.0));
    int rowsToUse = m_rowsModel.getIntValue();
    if (m_useRowsModel.getBooleanValue()) {
        rowsToUse = inData[IN_DATA_INDEX].getRowCount();
    // Warn if number of rows is greater than chosen number of rows
    if (inData[IN_DATA_INDEX].getRowCount() > rowsToUse) {
        setWarningMessage("Maximal number of rows to report is less than number of rows in input data table !");
    // use only specified rows
    DataTable dataContainer = new DefaultDataArray(rowCutDataTable, 1, rowsToUse);
    // create BufferedDataTable
    rowCutDataTable = exec.createBufferedDataTable(dataContainer, exec);
    // get the indices of the fixed mds columns
    List<String> fixedCols = m_fixedMdsColModel.getIncludeList();
    int[] fixedMdsColsIndicies = new int[fixedCols.size()];
    DataTableSpec spec = inData[FIXED_DATA_INDEX].getSpec();
    for (int i = 0; i < fixedCols.size(); i++) {
        fixedMdsColsIndicies[i] = spec.findColumnIndex(fixedCols.get(i));
    // create MDS manager, init and train stuff
    m_manager = new MDSProjectionManager(m_outputDimModel.getIntValue(), m_distModel.getStringValue(), m_fuzzy, rowCutDataTable, inData[FIXED_DATA_INDEX], fixedMdsColsIndicies, exec);
    m_manager.train(m_epochsModel.getIntValue(), m_learnrateModel.getDoubleValue());
    // create BufferedDataTable out of mapped data.
    ColumnRearranger rearranger = createColumnRearranger(inSpecData, new MDSCellFactory(m_manager.getDataPoints(), m_manager.getDimension()));
    return new BufferedDataTable[] { exec.createColumnRearrangeTable(inData[IN_DATA_INDEX], rearranger, exec.createSubProgress(0.1)) };
Also used : DataTable( BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec( ColumnRearranger( DefaultDataArray(org.knime.base.node.util.DefaultDataArray) BufferedDataTable(org.knime.core.node.BufferedDataTable) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) MDSCellFactory(org.knime.base.node.mine.mds.MDSCellFactory)

Example 22 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class HierarchicalClusterNodeModel method loadInternals.

 * {@inheritDoc}
protected void loadInternals(final File nodeInternDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
    // distances
    File distFile = new File(nodeInternDir, CFG_DIST_DATA);
    ContainerTable table1 = DataContainer.readFromZip(distFile);
    m_fusionTable = new DefaultDataArray(table1, 1, table1.getRowCount());
    // data rows
    File dataFile = new File(nodeInternDir, CFG_H_CLUST_DATA);
    ContainerTable table2 = DataContainer.readFromZip(dataFile);
    m_dataArray = new DefaultDataArray(table2, 1, table2.getRowCount());
    File f = new File(nodeInternDir, CFG_HCLUST);
    FileInputStream fis = new FileInputStream(f);
    NodeSettingsRO settings = NodeSettings.loadFromXML(fis);
    // if we had some data...
    if (m_dataArray.size() > 0) {
        // we also have some clustering nodes
        try {
            m_rootNode = ClusterNode.loadFromXML(settings, m_dataArray);
        } catch (InvalidSettingsException e) {
            throw new IOException(e.getMessage());
Also used : InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) IOException( File( ContainerTable( FileInputStream(

Example 23 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class SotaNodeModel method execute.

 * {@inheritDoc}
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws CanceledExecutionException, Exception {
    if (!(inData[SotaNodeModel.INPORT] instanceof BufferedDataTable)) {
        throw new IllegalArgumentException("Given indata port object is " + " no BufferedDataTable!");
    BufferedDataTable bdt = (BufferedDataTable) inData[SotaNodeModel.INPORT];
    final DataArray origRowContainer = new DefaultDataArray(bdt, 1, Integer.MAX_VALUE);
    DataTable dataTableToUse = bdt;
    int indexOfClassCol = -1;
    // get index of column containing class information
    indexOfClassCol = dataTableToUse.getDataTableSpec().findColumnIndex(m_classCol.getStringValue());
    m_sota.initializeTree(dataTableToUse, origRowContainer, exec, indexOfClassCol);
    if (m_withOutPort) {
        return new PortObject[] { new SotaPortObject(m_sota, dataTableToUse.getDataTableSpec(), indexOfClassCol) };
    return new PortObject[] {};
Also used : DataTable( BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DataArray(org.knime.base.node.util.DataArray)

Example 24 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class SotaManager method initializeTree.

 * Initializes the tree by creating the root node and two children cells of
 * the root node. The nodes data are the mean values of the input data
 * rows.
 * @param inData the table with the input data
 * @param originalData the original data
 * @param exec the execution monitor to set
 * @param indexOfClassColumn The index of the column containing the class
 * information. If value is -1 class values are ignored.
 * @throws CanceledExecutionException if user canceled the process
public void initializeTree(final DataTable inData, final DataArray originalData, final ExecutionMonitor exec, final int indexOfClassColumn) throws CanceledExecutionException {
    this.m_indexOfClassColumn = indexOfClassColumn;
    this.m_origData = originalData;
    this.m_exec = exec;
    this.m_inDataContainer = new DefaultDataArray(inData, 1, Integer.MAX_VALUE);
    m_state += 0.01;
    m_exec.setProgress(m_state, "Preparing data");
    // / Check for Fuzzy DataCells
    this.m_isFuzzy = false;
    for (int i = 0; i < m_inDataContainer.getDataTableSpec().getNumColumns(); i++) {
        DataType type = m_inDataContainer.getDataTableSpec().getColumnSpec(i).getType();
        if (SotaUtil.isFuzzyIntervalType(type)) {
            this.m_isFuzzy = true;
    if (m_useHierarchicalFuzzyData) {
        this.m_isFuzzy = true;
        this.m_inDataContainer = new FuzzyHierarchyFilterRowContainer(m_inDataContainer, m_currentHierarchyLevel);
        this.m_maxHierarchicalLevel = ((FuzzyHierarchyFilterRowContainer) m_inDataContainer).getMaxLevel();
    // / Create distance metric
    double offset = 1;
    m_distanceManager = DistanceManagerFactory.createDistanceManager(m_distance, m_isFuzzy, offset);
    if (this.m_isFuzzy) {
        m_helper = new SotaFuzzyHelper(m_inDataContainer, m_exec);
    } else {
        m_helper = new SotaNumberHelper(m_inDataContainer, m_exec);
    // Count all number cells in rows of row container
    m_dimension = m_helper.initializeDimension();
    // initialize root and children node/cells
    m_root = m_helper.initializeTree();
    // assign all Data to the root cell which have no missing values
    for (int i = 0; i < m_inDataContainer.size(); i++) {
        if (m_root.getDataIds().indexOf(i) == -1) {
            DataRow row = m_inDataContainer.getRow(i);
            if (!SotaUtil.hasMissingValues(row)) {
        m_state += 0.1 / m_inDataContainer.size();
        m_exec.setProgress(m_state, "Assigning data");
    // assign the data to the children of the root cell
Also used : DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DataType( DataRow(

Example 25 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class LinRegLearnerNodeModel method execute.

 * {@inheritDoc}
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
         * What comes next is the matrix calculation, solving A \times w = b
         * where A is the matrix having the training data (as many rows as there
         * are rows in inData[0], w is the vector of weights to learn (number of
         * variables) and b is the target output
    // reset was called, must be cleared
    final BufferedDataTable data = (BufferedDataTable) inData[0];
    final DataTableSpec spec = data.getDataTableSpec();
    final String[] includes = computeIncludes(spec);
    final int nrUnknown = includes.length + 1;
    double[] means = new double[includes.length];
    // indices of the columns in m_includes
    final int[] colIndizes = new int[includes.length];
    for (int i = 0; i < includes.length; i++) {
        colIndizes[i] = spec.findColumnIndex(includes[i]);
    // index of m_target
    final int target = spec.findColumnIndex(m_target);
    // this is the matrix (A^T x A) where A is the training data including
    // one column fixed to one.
    // (we do it here manually in order to avoid to get all the data in
    // double[][])
    double[][] ata = new double[nrUnknown][nrUnknown];
    double[] buffer = new double[nrUnknown];
    // we memorize for each row if it contains missing values.
    BitSet missingSet = new BitSet();
    m_nrRows = data.getRowCount();
    int myProgress = 0;
    // we need 2 or 3 scans on the data (first run was done already)
    final double totalProgress = (2 + (m_isCalcError ? 1 : 0)) * m_nrRows;
    int rowCount = 0;
    boolean hasPrintedWarning = false;
    for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
        DataRow row =;
        exec.setProgress(myProgress / totalProgress, "Calculating matrix " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
        DataCell targetValue = row.getCell(target);
        // read data from row into buffer, skip missing value rows
        boolean containsMissing = targetValue.isMissing() || readIntoBuffer(row, buffer, colIndizes);
        missingSet.set(rowCount, containsMissing);
        if (containsMissing) {
            String errorMessage = "Row \"" + row.getKey().getString() + "\" contains missing values, skipping it.";
            if (!hasPrintedWarning) {
                LOGGER.warn(errorMessage + " Suppress further warnings.");
                hasPrintedWarning = true;
            } else {
            // with next row
        updateMean(buffer, means);
        // the matrix is symmetric
        for (int i = 0; i < nrUnknown; i++) {
            for (int j = 0; j < nrUnknown; j++) {
                ata[i][j] += buffer[i] * buffer[j];
    assert (m_nrRows == rowCount);
    // no unique solution when there are less rows than unknown variables
    if (rowCount <= nrUnknown) {
        throw new Exception("Too few rows to perform regression (" + rowCount + " rows, but degree of freedom of " + nrUnknown + ")");
    exec.setMessage("Calculating pseudo inverse...");
    double[][] ataInverse = MathUtils.inverse(ata);
    // multiply with A^T and b, i.e. (A^T x A)^-1 x A^T x b
    double[] multipliers = new double[nrUnknown];
    rowCount = 0;
    for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
        DataRow row =;
        exec.setMessage("Determining output " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
        exec.setProgress(myProgress / totalProgress);
        // does row containing missing values?
        if (missingSet.get(rowCount)) {
            // error has printed above, silently ignore here.
        boolean containsMissing = readIntoBuffer(row, buffer, colIndizes);
        assert !containsMissing;
        DataCell targetValue = row.getCell(target);
        double b = ((DoubleValue) targetValue).getDoubleValue();
        for (int i = 0; i < nrUnknown; i++) {
            double buf = 0.0;
            for (int j = 0; j < nrUnknown; j++) {
                buf += ataInverse[i][j] * buffer[j];
            multipliers[i] += buf * b;
    if (m_isCalcError) {
        assert m_error == 0.0;
        rowCount = 0;
        for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
            DataRow row =;
            exec.setMessage("Calculating error " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
            exec.setProgress(myProgress / totalProgress);
            // does row containing missing values?
            if (missingSet.get(rowCount)) {
                // error has printed above, silently ignore here.
            boolean hasMissing = readIntoBuffer(row, buffer, colIndizes);
            assert !hasMissing;
            DataCell targetValue = row.getCell(target);
            double b = ((DoubleValue) targetValue).getDoubleValue();
            double out = 0.0;
            for (int i = 0; i < nrUnknown; i++) {
                out += multipliers[i] * buffer[i];
            m_error += (b - out) * (b - out);
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = (PMMLPortObject) inData[1];
    DataTableSpec outSpec = getLearningSpec(spec);
    double offset = multipliers[0];
    multipliers = Arrays.copyOfRange(multipliers, 1, multipliers.length);
    m_params = new LinearRegressionContent(outSpec, offset, multipliers, means);
    // cache the entire table as otherwise the color information
    // may be lost (filtering out the "colored" column)
    m_rowContainer = new DefaultDataArray(data, m_firstRowPaint, m_rowCountPaint);
    m_actualUsedColumns = includes;
    return new PortObject[] { m_params.createPortObject(inPMMLPort, spec, outSpec) };
Also used : DataTableSpec( DefaultDataArray(org.knime.base.node.util.DefaultDataArray) BitSet(java.util.BitSet) DataRow( InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException( DoubleValue( PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowIterator( LinearRegressionContent(org.knime.base.node.mine.regression.linear.LinearRegressionContent) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell( PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)


DefaultDataArray (org.knime.base.node.util.DefaultDataArray)32 BufferedDataTable (org.knime.core.node.BufferedDataTable)16 File ( ContainerTable ( DataTableSpec ( FileInputStream ( IOException ( InvalidSettingsException (org.knime.core.node.InvalidSettingsException)9 DataRow ( DataColumnSpec ( HashSet (java.util.HashSet)6 DataArray (org.knime.base.node.util.DataArray)6 DataTable ( Map (java.util.Map)5 RowKey ( NodeSettingsRO (org.knime.core.node.NodeSettingsRO)5 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)5 PortObject (org.knime.core.node.port.PortObject)5 BufferedInputStream ( ArrayList (java.util.ArrayList)4