Search in sources :

Example 81 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class BitVectorGeneratorNodeModel method createBitVectorsFromStrings.

private BufferedDataTable[] createBitVectorsFromStrings(final BufferedDataTable data, final int stringColIndex, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    ColumnRearranger c = createColumnRearranger(data.getDataTableSpec(), stringColIndex);
    ExecutionMonitor creationExec = exec;
    if (m_type.equals(STRING_TYPES.ID)) {
        ExecutionMonitor scanExec = exec.createSubProgress(0.5);
        creationExec = exec.createSubProgress(0.5);
        exec.setMessage("preparing");
        int maxPos = scanMaxPos(data, scanExec);
        ((IdString2BitVectorCellFactory) m_factory).setMaxPos(maxPos);
    }
    exec.setMessage("creating output");
    BufferedDataTable out = exec.createColumnRearrangeTable(data, c, creationExec);
    return new BufferedDataTable[] { out };
}
Also used : ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) IdString2BitVectorCellFactory(org.knime.base.data.bitvector.IdString2BitVectorCellFactory)

Example 82 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class ColumnToGridNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    String[] includes = m_configuration.getIncludes();
    String groupColumn = m_configuration.getGroupColumn();
    final ExecutionMonitor mainExec;
    final BufferedDataTable inputTable;
    if (groupColumn != null) {
        exec.setMessage("Sorting input table");
        BufferedDataTable in = inData[0];
        ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
        ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
        String[] relevantCols = new String[includes.length + 1];
        System.arraycopy(includes, 0, relevantCols, 0, includes.length);
        relevantCols[relevantCols.length - 1] = groupColumn;
        sortFilterRearranger.keepOnly(relevantCols);
        BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
        SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
        inputTable = sorter.getBufferedDataTable();
        mainExec = exec.createSubProgress(0.5);
    } else {
        inputTable = inData[0];
        mainExec = exec;
    }
    exec.setMessage("Assembling output");
    DataTableSpec spec = inputTable.getDataTableSpec();
    DataTableSpec outSpec = createOutputSpec(spec);
    BufferedDataContainer cont = exec.createDataContainer(outSpec);
    int[] includeIndices = new int[includes.length];
    for (int i = 0; i < includes.length; i++) {
        int index = spec.findColumnIndex(includes[i]);
        includeIndices[i] = index;
    }
    int gridCount = m_configuration.getColCount();
    final int cellCount;
    final int groupColIndex;
    if (groupColumn != null) {
        cellCount = includeIndices.length * gridCount + 1;
        groupColIndex = spec.findColumnIndex(groupColumn);
    } else {
        cellCount = includeIndices.length * gridCount;
        groupColIndex = -1;
    }
    final DataCell[] cells = new DataCell[cellCount];
    PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
    long currentRow = 0;
    long totalRows = inputTable.size();
    long currentOutRow = 0;
    DataCell curGroupValue = null;
    while (it.hasNext()) {
        Arrays.fill(cells, DataType.getMissingCell());
        // assign group column (if enabled)
        if (groupColIndex >= 0) {
            DataRow row = it.next();
            curGroupValue = row.getCell(groupColIndex);
            cells[cells.length - 1] = curGroupValue;
            it.pushBack(row);
        }
        for (int grid = 0; grid < gridCount; grid++) {
            if (!it.hasNext()) {
                break;
            }
            DataRow inRow = it.next();
            DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
            if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
                mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
                currentRow += 1;
                mainExec.checkCanceled();
                for (int i = 0; i < includeIndices.length; i++) {
                    cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
                }
            } else {
                // start new group, i.e. new row
                it.pushBack(inRow);
                break;
            }
        }
        RowKey key = RowKey.createRowKey(currentOutRow++);
        cont.addRowToTable(new DefaultRow(key, cells));
    }
    cont.close();
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) SortedTable(org.knime.base.data.sort.SortedTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 83 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class MissingValueHandling2Table method createMissingValueHandlingTable.

// getColSetting(DataTableSpec, ColSetting[])
/**
 * Does missing value handling to the argument table given the col settings
 * in an array and also reports progress.
 *
 * @param table the table to do missing value handling on
 * @param colSettings the settings
 * @param exec for progress/cancel and to create the buffered data table
 * @param warningBuffer To which potential warning messages are added.
 * @return a cache table, cleaned up
 * @throws CanceledExecutionException if canceled
 */
public static BufferedDataTable createMissingValueHandlingTable(final DataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
    MissingValueHandling2ColSetting[] colSetting;
    try {
        colSetting = getColSetting(table.getDataTableSpec(), colSettings, false);
    } catch (InvalidSettingsException ise) {
        LOGGER.coding("getColSetting method is not supposed to throw " + "an exception, ignoring settings", ise);
        DataTableSpec s = table.getDataTableSpec();
        colSetting = new MissingValueHandling2ColSetting[s.getNumColumns()];
        for (int i = 0; i < s.getNumColumns(); i++) {
            colSetting[i] = new MissingValueHandling2ColSetting(s.getColumnSpec(i));
            colSetting[i].setMethod(MissingValueHandling2ColSetting.METHOD_NO_HANDLING);
        }
    }
    boolean needStatistics = false;
    int mostFrequentColCount = 0;
    for (int i = 0; i < colSetting.length; i++) {
        MissingValueHandling2ColSetting c = colSetting[i];
        switch(c.getMethod()) {
            case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
                mostFrequentColCount++;
            case MissingValueHandling2ColSetting.METHOD_MAX:
            case MissingValueHandling2ColSetting.METHOD_MIN:
            case MissingValueHandling2ColSetting.METHOD_MEAN:
                needStatistics = true;
                break;
            default:
        }
    }
    int[] mostFrequentCols = new int[mostFrequentColCount];
    if (mostFrequentColCount > 0) {
        int index = 0;
        for (int i = 0; i < colSetting.length; i++) {
            MissingValueHandling2ColSetting c = colSetting[i];
            switch(c.getMethod()) {
                case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
                    mostFrequentCols[index++] = i;
                    break;
                default:
            }
        }
    }
    DataTable t;
    ExecutionMonitor e;
    if (needStatistics && !(table instanceof StatisticsTable)) {
        // for creating statistics table
        ExecutionMonitor subExec = exec.createSubProgress(0.5);
        t = new MyStatisticsTable(table, subExec, mostFrequentCols);
        if (((MyStatisticsTable) t).m_warningMessage != null) {
            warningBuffer.append(((MyStatisticsTable) t).m_warningMessage);
        }
        // for the iterator
        e = exec.createSubProgress(0.5);
    } else {
        t = table;
        e = exec;
    }
    MissingValueHandling2Table mvht = new MissingValueHandling2Table(t, colSetting);
    BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
    e.setMessage("Adding rows...");
    int count = 0;
    try {
        MissingValueHandling2TableIterator it = new MissingValueHandling2TableIterator(mvht, e);
        while (it.hasNext()) {
            DataRow next;
            next = it.next();
            e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
            container.addRowToTable(next);
            count++;
        }
    } catch (MissingValueHandling2TableIterator.RuntimeCanceledExecutionException rcee) {
        throw rcee.getCause();
    } finally {
        container.close();
    }
    return container.getTable();
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) StatisticsTable(org.knime.base.data.statistics.StatisticsTable) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 84 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class FileAnalyzer method analyze.

/**
 * Tries to guess FileReader settings for the passed data file. It will use the settings in the settings object (if
 * any - but the file location is required), and will read in the first lines from the file. It will first detect
 * comment characters (if the first lines start with '#' or '%'), and then guess the delimiter (',', ';', or space)
 * depending on which cuts a line into (more than one) tokens.
 *
 * @param userSettings containing the URL of the file to examine and settings that should be used and considered
 *            fixed.
 * @param exec used to check for cancellations and to report progress. Could be null. If a
 *            {@link FileReaderExecutionMonitor} is provided it is distinguished between user cancellations cutting
 *            the analysis short, and interrupts that return immediately and return null as result.
 * @return settings that supposably provide more or less useful results. It will always be a non-null object - but
 *         may not contain any settings if guessing was just too hard.
 * @throws IOException if there was an error reading from the URL
 */
public static FileReaderNodeSettings analyze(final FileReaderNodeSettings userSettings, final ExecutionMonitor exec) throws IOException {
    if (userSettings.getDataFileLocation() == null) {
        throw new IllegalArgumentException("Must specify a valid file location for the file analyzer");
    }
    ExecutionMonitor execMon = exec;
    if (execMon == null) {
        // we create a default exec monitor. Doesn't hurt, because that
        // will never be canceled.
        execMon = new FileReaderExecutionMonitor();
    }
    // create the new and empty settings
    FileReaderNodeSettings result = new FileReaderNodeSettings();
    execMon.setProgress(0.0);
    try {
        result.setDataFileLocationAndUpdateTableName(userSettings.getDataFileLocation());
        result.setDecimalSeparator(userSettings.getDecimalSeparator());
        result.setThousandsSeparator(userSettings.getThousandsSeparator());
        result.setDecimalSeparatorUserSet(userSettings.decimalSeparatorUserSet());
        result.setUniquifyRowIDs(userSettings.uniquifyRowIDs());
        result.setMaximumNumberOfRowsToRead(userSettings.getMaximumNumberOfRowsToRead());
        result.setSkipFirstLines(userSettings.getSkipFirstLines());
        result.allowLFinQuotes(userSettings.allowLFinQuotes());
        result.setCharsetName(userSettings.getCharsetName());
        result.setAnalyzeUsedAllRows(true);
        result.setMissValuePatternStrCols(userSettings.getMissValuePatternStrCols());
        result.setConnectTimeout(userSettings.getConnectTimeout());
        // if the user didn't provide the charset, identify it by looking at the first bytes of the stream
        if (!userSettings.isCharsetUserSet()) {
            result.setCharsetName(guessCharSet(userSettings));
            result.setCharsetUserSet(false);
        } else {
            result.setCharsetName(userSettings.getCharsetName());
            result.setCharsetUserSet(true);
        }
        ExecutionMonitor subExec = execMon.createSubProgress(COMMENT_SUB);
        if (!userSettings.isCommentUserSet()) {
            // only guess comment patterns if user didn't provide any
            addComments(result, subExec);
            result.setCommentUserSet(false);
        } else {
            // take over user settings.
            for (Comment comment : userSettings.getAllComments()) {
                result.addBlockCommentPattern(comment.getBegin(), comment.getEnd(), comment.returnAsSeparateToken(), comment.includeInToken());
            }
            result.setCommentUserSet(true);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        subExec = execMon.createSubProgress(QUOTES_SUB);
        if (!userSettings.isQuoteUserSet()) {
            // only guess quotes if user didn't specify any
            addQuotes(result, subExec);
            result.setQuoteUserSet(false);
        } else {
            // take over user settings.
            for (Quote quote : userSettings.getAllQuotes()) {
                if (quote.hasEscapeChar()) {
                    result.addQuotePattern(quote.getLeft(), quote.getRight(), quote.getEscape());
                } else {
                    result.addQuotePattern(quote.getLeft(), quote.getRight());
                }
            }
            result.setQuoteUserSet(true);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // if user provided whitespace characters, we need to add them.
        if (userSettings.isWhiteSpaceUserSet()) {
            for (String ws : userSettings.getAllWhiteSpaces()) {
                result.addWhiteSpaceCharacter(ws);
            }
            result.setWhiteSpaceUserSet(true);
        } else {
            result.addWhiteSpaceCharacter(" ");
            result.addWhiteSpaceCharacter("\t");
            result.setWhiteSpaceUserSet(false);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // for now we just take over this flag:
        result.setSupportShortLines(userSettings.getSupportShortLines());
        // sets delimiter and column numbers (as many columns as it gets
        // with the delimiters - regardless of any row headers);
        // honors user settings
        subExec = execMon.createSubProgress(DELIMS_SUB);
        setDelimitersAndColNum(userSettings, result, subExec);
        assert result.getNumberOfColumns() > 0;
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // the number of column set as of now does not take into account the
        // skipped columns.
        subExec = execMon.createSubProgress(ROWHDR_SUB);
        if (userSettings.isFileHasRowHeadersUserSet()) {
            result.setFileHasRowHeaders(userSettings.getFileHasRowHeaders());
            result.setFileHasRowHeadersUserSet(true);
        } else {
            boolean hasRowHeaders;
            if (result.getNumberOfColumns() > 1) {
                // if we have at least 2 cols, one of them could be headers
                hasRowHeaders = checkRowHeader(result, subExec);
            } else {
                hasRowHeaders = false;
            }
            result.setFileHasRowHeaders(hasRowHeaders);
            result.setFileHasRowHeadersUserSet(false);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // we must correct the column number we've guessed
        if (result.getFileHasRowHeaders()) {
            result.setNumberOfColumns(result.getNumberOfColumns() - 1);
        }
        // guesses (or copies) column types and names.
        subExec = execMon.createSubProgress(TYPES_SUB + COLHDR_SUB);
        Vector<ColProperty> columnProps = createColumnProperties(userSettings, result, subExec);
        result.setColumnProperties(columnProps);
        subExec.setProgress(1.0);
        // set a default row header prefix
        if (userSettings.getRowHeaderPrefix() != null) {
            result.setRowHeaderPrefix(userSettings.getRowHeaderPrefix());
        } else {
            result.setRowHeaderPrefix("Row");
        }
        if (userSettings.isIgnoreEmptyLinesUserSet()) {
            result.setIgnoreEmptyLines(userSettings.getIgnoreEmtpyLines());
            result.setIgnoreEmptyLinesUserSet(true);
        } else {
            result.setIgnoreEmptyLines(true);
            result.setIgnoreEmptyLinesUserSet(false);
        }
        execMon.setProgress(1.0);
    } catch (InterruptedExecutionException iee) {
        return null;
    }
    return result;
}
Also used : Quote(org.knime.core.util.tokenizer.Quote) Comment(org.knime.core.util.tokenizer.Comment) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 85 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class CovarianceMatrixCalculator method calculateCovarianceMatrix.

/**
 * Computes the covariance matrix and puts the result in the given (optional) data container and additionally
 * returns a in memory representation. The data container is expected to have the data table spec returned at
 * {@link #getResultSpec()}. The implementation traverses the data once.
 *
 * @param exec the execution container
 * @param inTable input data
 * @param tableSize the data table size
 * @param resultDataContainer optional result data container
 * @return the covariance matrix
 * @throws CanceledExecutionException if the user canceled the execution
 */
public RealMatrix calculateCovarianceMatrix(final ExecutionMonitor exec, final DataTable inTable, final long tableSize, final DataContainer resultDataContainer) throws CanceledExecutionException {
    checkArgument(m_targetSpec.equalStructure(inTable.getDataTableSpec()), "Target tables spec is different from the one given in the constructor!");
    if (resultDataContainer != null) {
        checkArgument(m_resultSpec.equalStructure(resultDataContainer.getTableSpec()), "Result tables spec is invalid!");
    }
    final ExecutionMonitor computingProgress = exec.createSubProgress(resultDataContainer != null ? 0.8 : 1);
    List<StorelessCovariance> covariancesList = new ArrayList<>();
    // create covariance pairs
    for (int i = 0; i < m_indexes.length; i++) {
        for (int j = i; j < m_indexes.length; j++) {
            covariancesList.add(new StorelessCovariance(2));
        }
    }
    // compute rest of co-variance matrix
    int rowCount = 0;
    double[] buffer = new double[2];
    for (DataRow dataRow : inTable) {
        for (int i = 0; i < m_indexes.length; i++) {
            final int outerIndex = m_indexes[i];
            final DataCell outerCell = dataRow.getCell(outerIndex);
            if (outerCell.isMissing()) {
                // skip missing values
                continue;
            }
            final double outerDouble = ((DoubleValue) outerCell).getDoubleValue();
            for (int j = i; j < m_indexes.length; j++) {
                final int innerIndex = m_indexes[j];
                final DataCell innerCell = dataRow.getCell(innerIndex);
                if (innerCell.isMissing()) {
                    // skip missing values
                    continue;
                }
                final double innerDouble = ((DoubleValue) innerCell).getDoubleValue();
                buffer[0] = outerDouble;
                buffer[1] = innerDouble;
                int covListIndex = index(m_indexes.length, i, j);
                covariancesList.get(covListIndex).increment(buffer);
            }
        }
        computingProgress.setProgress(rowCount++ / (double) tableSize, "Calculate covariance values, processing row: '" + dataRow.getKey() + "'");
        computingProgress.checkCanceled();
    }
    // Copy the storeless covariances to a real matrix
    RealMatrix covMatrix = new Array2DRowRealMatrix(m_indexes.length, m_indexes.length);
    for (int i = 0; i < m_indexes.length; i++) {
        for (int j = i; j < m_indexes.length; j++) {
            int covListIndex = index(m_indexes.length, i, j);
            double covValue;
            try {
                covValue = i == j ? covariancesList.get(covListIndex).getCovariance(1, 1) : covariancesList.get(covListIndex).getCovariance(0, 1);
            } catch (NumberIsTooSmallException e) {
                throw new IllegalArgumentException(String.format("There were not enough valid values to " + "compute covariance between columns: '%s' and '%s'.", inTable.getDataTableSpec().getColumnSpec(m_indexes[i]).getName(), inTable.getDataTableSpec().getColumnSpec(m_indexes[j]).getName()), e);
            }
            covMatrix.setEntry(i, j, covValue);
            covMatrix.setEntry(j, i, covValue);
        }
    }
    if (resultDataContainer != null) {
        exec.setProgress("Writing matrix to data table");
        final ExecutionMonitor writingProgress = exec.createSubProgress(0.2);
        for (int i = 0; i < covMatrix.getRowDimension(); i++) {
            resultDataContainer.addRowToTable(new DefaultRow(RowKey.toRowKeys(resultDataContainer.getTableSpec().getColumnSpec(i).getName())[0], covMatrix.getRow(i)));
            exec.checkCanceled();
            writingProgress.setProgress((double) i / covMatrix.getRowDimension(), "Writing row: " + resultDataContainer.getTableSpec().getColumnSpec(i).getName());
        }
    }
    return covMatrix;
}
Also used : ArrayList(java.util.ArrayList) NumberIsTooSmallException(org.apache.commons.math3.exception.NumberIsTooSmallException) StorelessCovariance(org.apache.commons.math3.stat.correlation.StorelessCovariance) DataRow(org.knime.core.data.DataRow) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

ExecutionMonitor (org.knime.core.node.ExecutionMonitor)160 BufferedDataTable (org.knime.core.node.BufferedDataTable)50 DataTableSpec (org.knime.core.data.DataTableSpec)43 DataRow (org.knime.core.data.DataRow)39 DataCell (org.knime.core.data.DataCell)35 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)35 Test (org.junit.Test)33 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)33 File (java.io.File)29 IOException (java.io.IOException)25 PortObject (org.knime.core.node.port.PortObject)25 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 RowKey (org.knime.core.data.RowKey)20 ArrayList (java.util.ArrayList)19 WorkflowLoadResult (org.knime.core.node.workflow.WorkflowPersistor.WorkflowLoadResult)17 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)16 ExecutionException (java.util.concurrent.ExecutionException)14 ExecutionContext (org.knime.core.node.ExecutionContext)13 FileOutputStream (java.io.FileOutputStream)12