Search in sources :

Example 1 with DataTableSorter

use of org.knime.core.data.sort.DataTableSorter in project knime-core by knime.

the class TreeDataCreator method readData.

/**
 * Reads the data from <b>learnData</b> into memory.
 * Each column is represented by a TreeColumnData object corresponding to its type
 * and whether it is a attribute or target column.
 *
 * @param learnData
 * @param configuration
 * @param exec
 * @return the TreeData object that holds all data in memory
 * @throws CanceledExecutionException
 */
public TreeData readData(final BufferedDataTable learnData, final TreeEnsembleLearnerConfiguration configuration, final ExecutionMonitor exec) throws CanceledExecutionException {
    if (learnData.size() <= 1) {
        throw new IllegalArgumentException("The input table must contain at least 2 rows!");
    }
    int index = 0;
    final long nrRows = learnData.size();
    final int nrLearnCols = m_attrColCreators.length;
    final boolean[] supportMissings = new boolean[nrLearnCols];
    for (int i = 0; i < nrLearnCols; i++) {
        supportMissings[i] = m_attrColCreators[i].acceptsMissing();
    }
    int rejectedMissings = 0;
    final int nrHilitePatterns = m_configuration.getNrHilitePatterns();
    // sort learnData according to the target column to enable equal size sampling
    final int targetColIdx = learnData.getDataTableSpec().findColumnIndex(m_configuration.getTargetColumn());
    Comparator<DataCell> targetComp = learnData.getDataTableSpec().getColumnSpec(targetColIdx).getType().getComparator();
    DataTableSorter sorter = new DataTableSorter(learnData, learnData.size(), new Comparator<DataRow>() {

        @Override
        public int compare(final DataRow arg0, final DataRow arg1) {
            return targetComp.compare(arg0.getCell(targetColIdx), arg1.getCell(targetColIdx));
        }
    });
    final ExecutionMonitor sortExec = exec.createSubProgress(0.5);
    final DataTable sortedTable = sorter.sort(sortExec);
    final ExecutionMonitor readExec = exec.createSubProgress(0.5);
    for (DataRow r : sortedTable) {
        double progress = index / (double) nrRows;
        readExec.setProgress(progress, "Row " + index + " of " + nrRows + " (\"" + r.getKey() + "\")");
        readExec.checkCanceled();
        boolean shouldReject = false;
        for (int i = 0; i < nrLearnCols; i++) {
            DataCell c = r.getCell(i);
            if (c.isMissing() && !supportMissings[i]) {
                shouldReject = true;
                break;
            }
        }
        DataCell targetCell = r.getCell(nrLearnCols);
        if (targetCell.isMissing()) {
            shouldReject = true;
        }
        if (shouldReject) {
            rejectedMissings += 1;
            continue;
        }
        if (index < nrHilitePatterns) {
            m_dataRowsForHiliteContainer.addRowToTable(r);
        }
        final RowKey key = r.getKey();
        for (int i = 0; i < nrLearnCols; i++) {
            DataCell c = r.getCell(i);
            m_attrColCreators[i].add(key, c);
        }
        m_targetColCreator.add(key, targetCell);
        index++;
    }
    if (nrHilitePatterns > 0 && index > nrHilitePatterns) {
        m_viewMessage = "Hilite (& color graphs) are based on a subset of " + "the data (" + nrHilitePatterns + "/" + index + ")";
    }
    if (rejectedMissings > 0) {
        StringBuffer warnMsgBuilder = new StringBuffer();
        warnMsgBuilder.append(rejectedMissings).append("/");
        warnMsgBuilder.append(learnData.size());
        warnMsgBuilder.append(" row(s) were ignored because they ");
        warnMsgBuilder.append("contain missing values.");
        m_warningMessage = warnMsgBuilder.toString();
    }
    CheckUtils.checkArgument(rejectedMissings < learnData.size(), "No rows left after removing missing values (table has %d row(s))", learnData.size());
    int nrLearnAttributes = 0;
    for (int i = 0; i < m_attrColCreators.length; i++) {
        nrLearnAttributes += m_attrColCreators[i].getNrAttributes();
    }
    TreeAttributeColumnData[] columns = new TreeAttributeColumnData[nrLearnAttributes];
    int learnAttributeIndex = 0;
    for (int i = 0; i < m_attrColCreators.length; i++) {
        TreeAttributeColumnDataCreator creator = m_attrColCreators[i];
        for (int a = 0; a < creator.getNrAttributes(); a++) {
            final TreeAttributeColumnData columnData = creator.createColumnData(a, configuration);
            columnData.getMetaData().setAttributeIndex(learnAttributeIndex);
            columns[learnAttributeIndex++] = columnData;
        }
    }
    TreeTargetColumnData targetCol = m_targetColCreator.createColumnData();
    return new TreeData(columns, targetCol, m_treeType);
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) DataTableSorter(org.knime.core.data.sort.DataTableSorter) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 2 with DataTableSorter

use of org.knime.core.data.sort.DataTableSorter in project knime-core by knime.

the class TableSorterWorker method doInBackground.

/**
 * {@inheritDoc}
 */
@Override
protected DataTable doInBackground() throws Exception {
    // passed to table sorter for progress
    long rowCount;
    if (m_inputTable instanceof BufferedDataTable) {
        rowCount = ((BufferedDataTable) m_inputTable).size();
    } else if (m_inputTable instanceof ContainerTable) {
        rowCount = ((ContainerTable) m_inputTable).size();
    } else {
        // unknown, no progress
        rowCount = -1;
    }
    publish(new NodeProgress(0.0, "Starting table sort..."));
    Collection<String> sortColNames = new ArrayList<String>(2);
    DataTableSpec spec = m_inputTable.getDataTableSpec();
    for (int i : m_sortOrder.getSortColumnIndices()) {
        String name;
        if (i < 0) {
            // row id
            name = DataTableSorter.ROWKEY_SORT_SPEC.getName();
        } else {
            name = spec.getColumnSpec(i).getName();
        }
        sortColNames.add(name);
    }
    long start = System.currentTimeMillis();
    LOGGER.debug("Starting interactive table sorting on column(s) " + sortColNames);
    boolean[] sortOrders = m_sortOrder.getSortColumnOrder();
    // it DOES NOT respect blobs -- they will be copied (expensive)
    DataTableSorter sorter = new DataTableSorter(m_inputTable, rowCount, sortColNames, sortOrders, false);
    NodeProgressListener progLis = new NodeProgressListener() {

        @Override
        public void progressChanged(final NodeProgressEvent pe) {
            publish(pe.getNodeProgress());
        }
    };
    m_nodeProgressMonitor = new DefaultNodeProgressMonitor();
    ExecutionMonitor exec = new ExecutionMonitor(m_nodeProgressMonitor);
    m_nodeProgressMonitor.addProgressListener(progLis);
    try {
        DataTable result = sorter.sort(exec);
        long elapsedMS = System.currentTimeMillis() - start;
        String time = StringFormat.formatElapsedTime(elapsedMS);
        LOGGER.debug("Interactive table sorting finished (" + time + ")");
        return result;
    } finally {
        m_nodeProgressMonitor.removeProgressListener(progLis);
    }
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec(org.knime.core.data.DataTableSpec) NodeProgressListener(org.knime.core.node.workflow.NodeProgressListener) NodeProgress(org.knime.core.node.workflow.NodeProgress) ArrayList(java.util.ArrayList) ContainerTable(org.knime.core.data.container.ContainerTable) NodeProgressEvent(org.knime.core.node.workflow.NodeProgressEvent) DataTableSorter(org.knime.core.data.sort.DataTableSorter) DefaultNodeProgressMonitor(org.knime.core.node.DefaultNodeProgressMonitor) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Aggregations

DataTable (org.knime.core.data.DataTable)2 DataTableSorter (org.knime.core.data.sort.DataTableSorter)2 BufferedDataTable (org.knime.core.node.BufferedDataTable)2 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)2 ArrayList (java.util.ArrayList)1 DataCell (org.knime.core.data.DataCell)1 DataRow (org.knime.core.data.DataRow)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 RowKey (org.knime.core.data.RowKey)1 ContainerTable (org.knime.core.data.container.ContainerTable)1 DefaultNodeProgressMonitor (org.knime.core.node.DefaultNodeProgressMonitor)1 NodeProgress (org.knime.core.node.workflow.NodeProgress)1 NodeProgressEvent (org.knime.core.node.workflow.NodeProgressEvent)1 NodeProgressListener (org.knime.core.node.workflow.NodeProgressListener)1