Search in sources :

Example 21 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class SmoteNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException, Exception {
    BufferedDataTable in = inData[0];
    Random rand;
    if (m_seed != null) {
        rand = new Random(m_seed);
    } else {
        rand = new Random();
    }
    Smoter smoter = new Smoter(in, m_class, exec, rand);
    if (m_method.equals(METHOD_ALL)) {
        // count number of rows to add
        int nrRowsToAdd = 0;
        for (Iterator<DataCell> it = smoter.getClassValues(); it.hasNext(); ) {
            int count = smoter.getCount(it.next());
            nrRowsToAdd += (int) (count * m_rate);
        }
        for (Iterator<DataCell> it = smoter.getClassValues(); it.hasNext(); ) {
            DataCell cur = it.next();
            int count = smoter.getCount(cur);
            int newCount = (int) (count * m_rate);
            exec.setMessage("Smoting '" + cur.toString() + "'");
            ExecutionMonitor subExec = exec.createSubProgress(newCount / (double) nrRowsToAdd);
            smoter.smote(cur, newCount, m_kNN, subExec);
        }
    } else if (m_method.equals(METHOD_MAJORITY)) {
        DataCell majority = smoter.getMajorityClass();
        int majorityCount = smoter.getCount(majority);
        Iterator<DataCell> it = smoter.getClassValues();
        int nrRowsToAdd = 0;
        while (it.hasNext()) {
            DataCell cur = it.next();
            nrRowsToAdd += (majorityCount - smoter.getCount(cur));
        }
        it = smoter.getClassValues();
        while (it.hasNext()) {
            DataCell cur = it.next();
            int count = smoter.getCount(cur);
            int newCount = majorityCount - count;
            exec.setMessage("Smoting '" + cur.toString() + "'");
            ExecutionMonitor subExec = exec.createSubProgress(newCount / (double) nrRowsToAdd);
            smoter.smote(cur, newCount, m_kNN, subExec);
        }
    }
    smoter.close();
    DataTable out = smoter.getSmotedTable();
    return new BufferedDataTable[] { exec.createBufferedDataTable(out, exec) };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) Random(java.util.Random) BufferedDataTable(org.knime.core.node.BufferedDataTable) Iterator(java.util.Iterator) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 22 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class SotaNodeModel method loadInternals.

/**
 * {@inheritDoc}
 */
@Override
protected void loadInternals(final File internDir, final ExecutionMonitor exec) throws IOException {
    File file = new File(internDir, TREE_FILE);
    FileInputStream fis = new FileInputStream(file);
    ModelContentRO modelContent = ModelContent.loadFromXML(fis);
    // Load settings
    int inDataSize = 0;
    int origDataSize = 0;
    try {
        m_sota.setUseHierarchicalFuzzyData(modelContent.getBoolean(SotaPortObject.CFG_KEY_USE_FUZZY_HIERARCHY));
        m_sota.setMaxHierarchicalLevel(modelContent.getInt(SotaPortObject.CFG_KEY_MAX_FUZZY_LEVEL));
        inDataSize = modelContent.getInt(SotaPortObject.CFG_KEY_INDATA_SIZE);
        origDataSize = modelContent.getInt(SotaPortObject.CFG_KEY_ORIGDATA_SIZE);
    } catch (InvalidSettingsException e1) {
        IOException ioe = new IOException("Could not load settings," + "due to invalid settings in model content !");
        ioe.initCause(e1);
        fis.close();
        throw ioe;
    }
    // Load in data
    DataTable table = DataContainer.readFromZip(new File(internDir, IN_DATA_FILE));
    final DataArray inData = new DefaultDataArray(table, 1, inDataSize);
    m_sota.setInData(inData);
    // Load orig data
    table = DataContainer.readFromZip(new File(internDir, ORIG_DATA_FILE));
    final DataArray origData = new DefaultDataArray(table, 1, origDataSize);
    m_sota.setOriginalData(origData);
    // Load tree
    SotaTreeCell root = new SotaTreeCell(0, false);
    try {
        root.loadFrom(modelContent, 0, null, false);
    } catch (InvalidSettingsException e) {
        IOException ioe = new IOException("Could not load tree cells," + "due to invalid settings in model content !");
        ioe.initCause(e);
        fis.close();
        throw ioe;
    }
    m_sota.setRoot(root);
    fis.close();
}
Also used : SotaTreeCell(org.knime.base.node.mine.sota.logic.SotaTreeCell) DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) ModelContentRO(org.knime.core.node.ModelContentRO) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) IOException(java.io.IOException) File(java.io.File) FileInputStream(java.io.FileInputStream) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DataArray(org.knime.base.node.util.DataArray)

Example 23 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class PCAReverseNodeModel method execute.

/**
 * Performs the PCA.
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final PCAModelPortObject model = (PCAModelPortObject) inData[MODEL_INPORT];
    final Matrix eigenvectors = EigenValue.getSortedEigenVectors(model.getEigenVectors(), model.getEigenvalues(), m_inputColumnIndices.length);
    if (m_failOnMissingValues.getBooleanValue()) {
        for (final DataRow row : (DataTable) inData[DATA_INPORT]) {
            for (int i = 0; i < m_inputColumnIndices.length; i++) {
                if (row.getCell(m_inputColumnIndices[i]).isMissing()) {
                    throw new IllegalArgumentException("data table contains missing values");
                }
            }
        }
    }
    final String[] origColumnNames = ((PCAModelPortObjectSpec) ((PCAModelPortObject) inData[MODEL_INPORT]).getSpec()).getColumnNames();
    final DataColumnSpec[] specs = createAddTableSpec((DataTableSpec) inData[DATA_INPORT].getSpec(), origColumnNames);
    final CellFactory fac = new CellFactory() {

        @Override
        public DataCell[] getCells(final DataRow row) {
            return convertInputRow(eigenvectors, row, model.getCenter(), m_inputColumnIndices, origColumnNames.length);
        }

        @Override
        public DataColumnSpec[] getColumnSpecs() {
            return specs;
        }

        @Override
        public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor texec) {
            texec.setProgress((double) curRowNr / rowCount);
        }
    };
    final ColumnRearranger cr = new ColumnRearranger((DataTableSpec) inData[DATA_INPORT].getSpec());
    cr.append(fac);
    if (m_removePCACols.getBooleanValue()) {
        cr.remove(m_inputColumnIndices);
    }
    final BufferedDataTable result = exec.createColumnRearrangeTable((BufferedDataTable) inData[DATA_INPORT], cr, exec);
    final PortObject[] out = { result };
    return out;
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) RowKey(org.knime.core.data.RowKey) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) Matrix(Jama.Matrix) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CellFactory(org.knime.core.data.container.CellFactory) PortObject(org.knime.core.node.port.PortObject)

Example 24 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class PolyRegLineScatterPlotter method modelChanged.

/**
 * This method must be called if the model has changed. It updates the
 * plotter to show the new model's values.
 */
public void modelChanged() {
    m_viewData = m_model.getViewData();
    DataTable data = m_viewData.getRowContainer();
    if (data != null) {
        final DataTableSpec origSpec = data.getDataTableSpec();
        final MyProperties props = (MyProperties) getProperties();
        DataColumnSpec[] colSpecs = new DataColumnSpec[origSpec.getNumColumns() - 1];
        int i = 0;
        for (DataColumnSpec cs : origSpec) {
            if (!m_viewData.targetColumn.equals(cs.getName())) {
                colSpecs[i++] = cs;
            } else {
                m_yColumnSpec = cs;
                getYAxis().setCoordinate(Coordinate.createCoordinate(cs));
            }
        }
        m_xColumnSpec = colSpecs[0];
        getXAxis().setCoordinate(Coordinate.createCoordinate(colSpecs[0]));
        m_filteredSpec = new DataTableSpec(colSpecs);
        try {
            props.m_xColumn.update(m_filteredSpec, colSpecs[0].getName());
        } catch (NotConfigurableException ex) {
            // cannot happen
            assert false : ex.getMessage();
        }
        reset();
        updatePaintModel();
    }
}
Also used : NotConfigurableException(org.knime.core.node.NotConfigurableException) DataTable(org.knime.core.data.DataTable) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec)

Example 25 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class HierarchicalClusterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
    // determine the indices of the selected columns
    List<String> inlcludedCols = m_selectedColumns.getIncludeList();
    int[] selectedColIndices = new int[inlcludedCols.size()];
    for (int count = 0; count < selectedColIndices.length; count++) {
        selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
    }
    BufferedDataTable inputData = data[0];
    if (inputData.size() > 65500) {
        throw new RuntimeException("At most 65,500 patterns can be clustered");
    }
    DataTable outputData = null;
    if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
        m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
    } else {
        m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
    }
    // generate initial clustering
    // which means that every data point is one cluster
    List<ClusterNode> clusters = initClusters(inputData, exec);
    // store the distance per each fusion step
    DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
    int iterationStep = 0;
    final HalfFloatMatrix cache;
    if (m_cacheDistances.getBooleanValue()) {
        cache = new HalfFloatMatrix((int) inputData.size(), false);
        cache.fill(Float.NaN);
    } else {
        cache = null;
    }
    double max = inputData.size();
    // the number of clusters at the beginning is equal to the number
    // of data rows (each row is a cluster)
    int numberDataRows = clusters.size();
    while (clusters.size() > 1) {
        // checks if number clusters to generate output table is reached
        if (m_numClustersForOutput.getIntValue() == clusters.size()) {
            outputData = createResultTable(inputData, clusters, exec);
        }
        exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
        iterationStep++;
        exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
        // calculate distance between all clusters
        float currentSmallestDist = Float.MAX_VALUE;
        ClusterNode currentClosestCluster1 = null;
        ClusterNode currentClosestCluster2 = null;
        // subprogress for loop
        double availableProgress = (1.0 / numberDataRows);
        ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
        for (int i = 0; i < clusters.size(); i++) {
            exec.checkCanceled();
            ClusterNode node1 = clusters.get(i);
            for (int j = i + 1; j < clusters.size(); j++) {
                final float dist;
                ClusterNode node2 = clusters.get(j);
                // and average linkage supported.
                if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
                    dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
                } else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
                    dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
                } else {
                    dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
                }
                if (dist < currentSmallestDist) {
                    currentClosestCluster1 = node1;
                    currentClosestCluster2 = node2;
                    currentSmallestDist = dist;
                }
            }
        }
        subexec.setProgress(1.0);
        // make one cluster of the two closest
        ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
        clusters.remove(currentClosestCluster1);
        clusters.remove(currentClosestCluster2);
        clusters.add(newNode);
        // store the distance per each fusion step
        fusionCont.addRowToTable(new DefaultRow(// row key
        Integer.toString(clusters.size()), // x-axis scatter plotter
        new IntCell(clusters.size()), // y-axis scatter plotter
        new DoubleCell(newNode.getDist())));
    // // print number clusters and their data points
    // LOGGER.debug("Iteration " + iterationStep + ":");
    // LOGGER.debug(" Number Clusters: " + clusters.size());
    // printClustersDataRows(clusters);
    }
    if (clusters.size() > 0) {
        m_rootNode = clusters.get(0);
    }
    fusionCont.close();
    // if there was no input data create an empty output data
    if (outputData == null) {
        outputData = createResultTable(inputData, clusters, exec);
    }
    m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
    m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
    return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) IntCell(org.knime.core.data.def.IntCell) DataContainer(org.knime.core.data.container.DataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) BufferedDataTable(org.knime.core.node.BufferedDataTable) HalfFloatMatrix(org.knime.base.util.HalfFloatMatrix) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

DataTable (org.knime.core.data.DataTable)64 BufferedDataTable (org.knime.core.node.BufferedDataTable)33 DataRow (org.knime.core.data.DataRow)20 DataTableSpec (org.knime.core.data.DataTableSpec)19 RowKey (org.knime.core.data.RowKey)14 DataCell (org.knime.core.data.DataCell)12 DataColumnSpec (org.knime.core.data.DataColumnSpec)12 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)11 DefaultTable (org.knime.core.data.def.DefaultTable)10 DefaultRow (org.knime.core.data.def.DefaultRow)8 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)7 DefaultDataArray (org.knime.base.node.util.DefaultDataArray)6 DataType (org.knime.core.data.DataType)6 PortObject (org.knime.core.node.port.PortObject)6 RowIterator (org.knime.core.data.RowIterator)5 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)5 ContainerTable (org.knime.core.data.container.ContainerTable)5 DataContainer (org.knime.core.data.container.DataContainer)5 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)5 SettingsModelFilterString (org.knime.core.node.defaultnodesettings.SettingsModelFilterString)5