Search in sources :

Example 21 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class MDSProjectionNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final DataTableSpec inSpecData = inData[IN_DATA_INDEX].getSpec();
    final ColumnRearranger colFilter = new ColumnRearranger(inSpecData);
    if (m_includeList != null) {
        colFilter.keepOnly(m_includeList.toArray(new String[m_includeList.size()]));
    }
    BufferedDataTable rowCutDataTable = exec.createColumnRearrangeTable(inData[IN_DATA_INDEX], colFilter, exec.createSilentSubProgress(0.0));
    int rowsToUse = m_rowsModel.getIntValue();
    if (m_useRowsModel.getBooleanValue()) {
        rowsToUse = inData[IN_DATA_INDEX].getRowCount();
    }
    // Warn if number of rows is greater than chosen number of rows
    if (inData[IN_DATA_INDEX].getRowCount() > rowsToUse) {
        setWarningMessage("Maximal number of rows to report is less than number of rows in input data table !");
    }
    // use only specified rows
    DataTable dataContainer = new DefaultDataArray(rowCutDataTable, 1, rowsToUse);
    // create BufferedDataTable
    rowCutDataTable = exec.createBufferedDataTable(dataContainer, exec);
    // get the indices of the fixed mds columns
    List<String> fixedCols = m_fixedMdsColModel.getIncludeList();
    int[] fixedMdsColsIndicies = new int[fixedCols.size()];
    DataTableSpec spec = inData[FIXED_DATA_INDEX].getSpec();
    for (int i = 0; i < fixedCols.size(); i++) {
        fixedMdsColsIndicies[i] = spec.findColumnIndex(fixedCols.get(i));
    }
    // create MDS manager, init and train stuff
    m_manager = new MDSProjectionManager(m_outputDimModel.getIntValue(), m_distModel.getStringValue(), m_fuzzy, rowCutDataTable, inData[FIXED_DATA_INDEX], fixedMdsColsIndicies, exec);
    m_manager.setProjectOnly(m_projectOnly.getBooleanValue());
    m_manager.init(m_seedModel.getIntValue());
    m_manager.train(m_epochsModel.getIntValue(), m_learnrateModel.getDoubleValue());
    // create BufferedDataTable out of mapped data.
    ColumnRearranger rearranger = createColumnRearranger(inSpecData, new MDSCellFactory(m_manager.getDataPoints(), m_manager.getDimension()));
    return new BufferedDataTable[] { exec.createColumnRearrangeTable(inData[IN_DATA_INDEX], rearranger, exec.createSubProgress(0.1)) };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) BufferedDataTable(org.knime.core.node.BufferedDataTable) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) MDSCellFactory(org.knime.base.node.mine.mds.MDSCellFactory)

Example 22 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class HierarchicalClusterNodeModel method loadInternals.

/**
 * {@inheritDoc}
 */
@Override
protected void loadInternals(final File nodeInternDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
    // distances
    File distFile = new File(nodeInternDir, CFG_DIST_DATA);
    ContainerTable table1 = DataContainer.readFromZip(distFile);
    m_fusionTable = new DefaultDataArray(table1, 1, table1.getRowCount());
    // data rows
    File dataFile = new File(nodeInternDir, CFG_H_CLUST_DATA);
    ContainerTable table2 = DataContainer.readFromZip(dataFile);
    m_dataArray = new DefaultDataArray(table2, 1, table2.getRowCount());
    File f = new File(nodeInternDir, CFG_HCLUST);
    FileInputStream fis = new FileInputStream(f);
    NodeSettingsRO settings = NodeSettings.loadFromXML(fis);
    // if we had some data...
    if (m_dataArray.size() > 0) {
        // we also have some clustering nodes
        try {
            m_rootNode = ClusterNode.loadFromXML(settings, m_dataArray);
        } catch (InvalidSettingsException e) {
            throw new IOException(e.getMessage());
        }
    }
}
Also used : InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) IOException(java.io.IOException) File(java.io.File) ContainerTable(org.knime.core.data.container.ContainerTable) FileInputStream(java.io.FileInputStream)

Example 23 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class SotaNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws CanceledExecutionException, Exception {
    if (!(inData[SotaNodeModel.INPORT] instanceof BufferedDataTable)) {
        throw new IllegalArgumentException("Given indata port object is " + " no BufferedDataTable!");
    }
    BufferedDataTable bdt = (BufferedDataTable) inData[SotaNodeModel.INPORT];
    final DataArray origRowContainer = new DefaultDataArray(bdt, 1, Integer.MAX_VALUE);
    DataTable dataTableToUse = bdt;
    int indexOfClassCol = -1;
    // get index of column containing class information
    indexOfClassCol = dataTableToUse.getDataTableSpec().findColumnIndex(m_classCol.getStringValue());
    m_sota.initializeTree(dataTableToUse, origRowContainer, exec, indexOfClassCol);
    m_sota.doTraining();
    if (m_withOutPort) {
        return new PortObject[] { new SotaPortObject(m_sota, dataTableToUse.getDataTableSpec(), indexOfClassCol) };
    }
    return new PortObject[] {};
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DataArray(org.knime.base.node.util.DataArray)

Example 24 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class SotaManager method initializeTree.

/**
 * Initializes the tree by creating the root node and two children cells of
 * the root node. The nodes data are the mean values of the input data
 * rows.
 *
 * @param inData the table with the input data
 * @param originalData the original data
 * @param exec the execution monitor to set
 * @param indexOfClassColumn The index of the column containing the class
 * information. If value is -1 class values are ignored.
 * @throws CanceledExecutionException if user canceled the process
 */
public void initializeTree(final DataTable inData, final DataArray originalData, final ExecutionMonitor exec, final int indexOfClassColumn) throws CanceledExecutionException {
    this.m_indexOfClassColumn = indexOfClassColumn;
    this.m_origData = originalData;
    this.m_exec = exec;
    this.m_inDataContainer = new DefaultDataArray(inData, 1, Integer.MAX_VALUE);
    m_exec.checkCanceled();
    m_state += 0.01;
    m_exec.setProgress(m_state, "Preparing data");
    // 
    // / Check for Fuzzy DataCells
    // 
    this.m_isFuzzy = false;
    for (int i = 0; i < m_inDataContainer.getDataTableSpec().getNumColumns(); i++) {
        DataType type = m_inDataContainer.getDataTableSpec().getColumnSpec(i).getType();
        if (SotaUtil.isFuzzyIntervalType(type)) {
            this.m_isFuzzy = true;
        }
    }
    if (m_useHierarchicalFuzzyData) {
        this.m_isFuzzy = true;
        this.m_inDataContainer = new FuzzyHierarchyFilterRowContainer(m_inDataContainer, m_currentHierarchyLevel);
        this.m_maxHierarchicalLevel = ((FuzzyHierarchyFilterRowContainer) m_inDataContainer).getMaxLevel();
    }
    // 
    // / Create distance metric
    // 
    double offset = 1;
    m_distanceManager = DistanceManagerFactory.createDistanceManager(m_distance, m_isFuzzy, offset);
    // 
    if (this.m_isFuzzy) {
        m_helper = new SotaFuzzyHelper(m_inDataContainer, m_exec);
    } else {
        m_helper = new SotaNumberHelper(m_inDataContainer, m_exec);
    }
    m_exec.checkCanceled();
    // Count all number cells in rows of row container
    m_dimension = m_helper.initializeDimension();
    // initialize root and children node/cells
    m_root = m_helper.initializeTree();
    m_root.setLevel(1);
    m_exec.checkCanceled();
    // assign all Data to the root cell which have no missing values
    for (int i = 0; i < m_inDataContainer.size(); i++) {
        if (m_root.getDataIds().indexOf(i) == -1) {
            DataRow row = m_inDataContainer.getRow(i);
            if (!SotaUtil.hasMissingValues(row)) {
                m_root.getDataIds().add(i);
            }
        }
        m_exec.checkCanceled();
        m_state += 0.1 / m_inDataContainer.size();
        m_exec.setProgress(m_state, "Assigning data");
    }
    // assign the data to the children of the root cell
    assignDataToChildren(m_root);
}
Also used : DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DataType(org.knime.core.data.DataType) DataRow(org.knime.core.data.DataRow)

Example 25 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class LinRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    /*
         * What comes next is the matrix calculation, solving A \times w = b
         * where A is the matrix having the training data (as many rows as there
         * are rows in inData[0], w is the vector of weights to learn (number of
         * variables) and b is the target output
         */
    // reset was called, must be cleared
    final BufferedDataTable data = (BufferedDataTable) inData[0];
    final DataTableSpec spec = data.getDataTableSpec();
    final String[] includes = computeIncludes(spec);
    final int nrUnknown = includes.length + 1;
    double[] means = new double[includes.length];
    // indices of the columns in m_includes
    final int[] colIndizes = new int[includes.length];
    for (int i = 0; i < includes.length; i++) {
        colIndizes[i] = spec.findColumnIndex(includes[i]);
    }
    // index of m_target
    final int target = spec.findColumnIndex(m_target);
    // this is the matrix (A^T x A) where A is the training data including
    // one column fixed to one.
    // (we do it here manually in order to avoid to get all the data in
    // double[][])
    double[][] ata = new double[nrUnknown][nrUnknown];
    double[] buffer = new double[nrUnknown];
    // we memorize for each row if it contains missing values.
    BitSet missingSet = new BitSet();
    m_nrRows = data.getRowCount();
    int myProgress = 0;
    // we need 2 or 3 scans on the data (first run was done already)
    final double totalProgress = (2 + (m_isCalcError ? 1 : 0)) * m_nrRows;
    int rowCount = 0;
    boolean hasPrintedWarning = false;
    for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
        DataRow row = it.next();
        myProgress++;
        exec.setProgress(myProgress / totalProgress, "Calculating matrix " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
        exec.checkCanceled();
        DataCell targetValue = row.getCell(target);
        // read data from row into buffer, skip missing value rows
        boolean containsMissing = targetValue.isMissing() || readIntoBuffer(row, buffer, colIndizes);
        missingSet.set(rowCount, containsMissing);
        if (containsMissing) {
            String errorMessage = "Row \"" + row.getKey().getString() + "\" contains missing values, skipping it.";
            if (!hasPrintedWarning) {
                LOGGER.warn(errorMessage + " Suppress further warnings.");
                hasPrintedWarning = true;
            } else {
                LOGGER.debug(errorMessage);
            }
            m_nrRowsSkipped++;
            // with next row
            continue;
        }
        updateMean(buffer, means);
        // the matrix is symmetric
        for (int i = 0; i < nrUnknown; i++) {
            for (int j = 0; j < nrUnknown; j++) {
                ata[i][j] += buffer[i] * buffer[j];
            }
        }
    }
    assert (m_nrRows == rowCount);
    normalizeMean(means);
    // no unique solution when there are less rows than unknown variables
    if (rowCount <= nrUnknown) {
        throw new Exception("Too few rows to perform regression (" + rowCount + " rows, but degree of freedom of " + nrUnknown + ")");
    }
    exec.setMessage("Calculating pseudo inverse...");
    double[][] ataInverse = MathUtils.inverse(ata);
    checkForNaN(ataInverse);
    // multiply with A^T and b, i.e. (A^T x A)^-1 x A^T x b
    double[] multipliers = new double[nrUnknown];
    rowCount = 0;
    for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
        DataRow row = it.next();
        exec.setMessage("Determining output " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
        myProgress++;
        exec.setProgress(myProgress / totalProgress);
        exec.checkCanceled();
        // does row containing missing values?
        if (missingSet.get(rowCount)) {
            // error has printed above, silently ignore here.
            continue;
        }
        boolean containsMissing = readIntoBuffer(row, buffer, colIndizes);
        assert !containsMissing;
        DataCell targetValue = row.getCell(target);
        double b = ((DoubleValue) targetValue).getDoubleValue();
        for (int i = 0; i < nrUnknown; i++) {
            double buf = 0.0;
            for (int j = 0; j < nrUnknown; j++) {
                buf += ataInverse[i][j] * buffer[j];
            }
            multipliers[i] += buf * b;
        }
    }
    if (m_isCalcError) {
        assert m_error == 0.0;
        rowCount = 0;
        for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
            DataRow row = it.next();
            exec.setMessage("Calculating error " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
            myProgress++;
            exec.setProgress(myProgress / totalProgress);
            exec.checkCanceled();
            // does row containing missing values?
            if (missingSet.get(rowCount)) {
                // error has printed above, silently ignore here.
                continue;
            }
            boolean hasMissing = readIntoBuffer(row, buffer, colIndizes);
            assert !hasMissing;
            DataCell targetValue = row.getCell(target);
            double b = ((DoubleValue) targetValue).getDoubleValue();
            double out = 0.0;
            for (int i = 0; i < nrUnknown; i++) {
                out += multipliers[i] * buffer[i];
            }
            m_error += (b - out) * (b - out);
        }
    }
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = (PMMLPortObject) inData[1];
    DataTableSpec outSpec = getLearningSpec(spec);
    double offset = multipliers[0];
    multipliers = Arrays.copyOfRange(multipliers, 1, multipliers.length);
    m_params = new LinearRegressionContent(outSpec, offset, multipliers, means);
    // cache the entire table as otherwise the color information
    // may be lost (filtering out the "colored" column)
    m_rowContainer = new DefaultDataArray(data, m_firstRowPaint, m_rowCountPaint);
    m_actualUsedColumns = includes;
    return new PortObject[] { m_params.createPortObject(inPMMLPort, spec, outSpec) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) BitSet(java.util.BitSet) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DoubleValue(org.knime.core.data.DoubleValue) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowIterator(org.knime.core.data.RowIterator) LinearRegressionContent(org.knime.base.node.mine.regression.linear.LinearRegressionContent) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)

Aggregations

DefaultDataArray (org.knime.base.node.util.DefaultDataArray)32 BufferedDataTable (org.knime.core.node.BufferedDataTable)16 File (java.io.File)14 ContainerTable (org.knime.core.data.container.ContainerTable)13 DataTableSpec (org.knime.core.data.DataTableSpec)12 FileInputStream (java.io.FileInputStream)10 IOException (java.io.IOException)9 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)9 DataRow (org.knime.core.data.DataRow)8 DataColumnSpec (org.knime.core.data.DataColumnSpec)7 HashSet (java.util.HashSet)6 DataArray (org.knime.base.node.util.DataArray)6 DataTable (org.knime.core.data.DataTable)6 Map (java.util.Map)5 RowKey (org.knime.core.data.RowKey)5 NodeSettingsRO (org.knime.core.node.NodeSettingsRO)5 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)5 PortObject (org.knime.core.node.port.PortObject)5 BufferedInputStream (java.io.BufferedInputStream)4 ArrayList (java.util.ArrayList)4