Search in sources :

Example 61 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class CorrelationComputeNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable in = (BufferedDataTable) inData[0];
    final DataTableSpec inSpec = in.getDataTableSpec();
    ColumnRearranger filteredTableRearranger = new ColumnRearranger(inSpec);
    String[] includeNames = m_columnFilterModel.applyTo(inSpec).getIncludes();
    filteredTableRearranger.keepOnly(includeNames);
    final BufferedDataTable filteredTable = exec.createColumnRearrangeTable(in, filteredTableRearranger, exec.createSilentSubExecutionContext(0.0));
    final DataTableSpec filteredTableSpec = filteredTable.getDataTableSpec();
    double progStep1 = 0.48;
    double progStep2 = 0.48;
    double progFinish = 1.0 - progStep1 - progStep2;
    CorrelationComputer calculator = new CorrelationComputer(filteredTableSpec, m_maxPossValueCountModel.getIntValue());
    exec.setMessage("Calculating table statistics");
    ExecutionContext execStep1 = exec.createSubExecutionContext(progStep1);
    calculator.calculateStatistics(filteredTable, execStep1);
    execStep1.setProgress(1.0);
    exec.setMessage("Calculating correlation values");
    ExecutionMonitor execStep2 = exec.createSubExecutionContext(progStep2);
    HalfDoubleMatrix correlationMatrix = calculator.calculateOutput(filteredTable, execStep2);
    execStep2.setProgress(1.0);
    exec.setMessage("Assembling output");
    ExecutionContext execFinish = exec.createSubExecutionContext(progFinish);
    PMCCPortObjectAndSpec pmccModel = new PMCCPortObjectAndSpec(includeNames, correlationMatrix);
    BufferedDataTable out = pmccModel.createCorrelationMatrix(execFinish);
    m_correlationTable = out;
    String missValueString = calculator.getNumericMissingValueWarning(4);
    StringBuilder warning = null;
    if (missValueString != null) {
        LOGGER.debug(calculator.getNumericMissingValueWarning(1000));
        warning = new StringBuilder(missValueString);
    }
    String constantColString = calculator.getNumericConstantColumnPairs(4);
    if (constantColString != null) {
        LOGGER.debug(calculator.getNumericConstantColumnPairs(1000));
        if (warning == null) {
            warning = new StringBuilder(constantColString);
        } else {
            warning.append("\n");
            warning.append(constantColString);
        }
    }
    if (warning != null) {
        setWarningMessage(warning.toString());
    }
    return new PortObject[] { out, pmccModel };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMCCPortObjectAndSpec(org.knime.base.node.preproc.correlation.pmcc.PMCCPortObjectAndSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) ExecutionContext(org.knime.core.node.ExecutionContext) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) PortObject(org.knime.core.node.port.PortObject)

Example 62 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class ColumnToGrid2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    String groupColumn = m_configuration.getGroupColumn();
    final ExecutionMonitor mainExec;
    final BufferedDataTable inputTable;
    if (groupColumn != null) {
        exec.setMessage("Sorting input table");
        BufferedDataTable in = inData[0];
        ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
        ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
        String[] relevantCols = new String[m_included.length + 1];
        System.arraycopy(m_included, 0, relevantCols, 0, m_included.length);
        relevantCols[relevantCols.length - 1] = groupColumn;
        sortFilterRearranger.keepOnly(relevantCols);
        BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
        SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
        inputTable = sorter.getBufferedDataTable();
        mainExec = exec.createSubProgress(0.5);
    } else {
        inputTable = inData[0];
        mainExec = exec;
    }
    exec.setMessage("Assembling output");
    DataTableSpec spec = inputTable.getDataTableSpec();
    DataTableSpec outSpec = createOutputSpec(spec);
    BufferedDataContainer cont = exec.createDataContainer(outSpec);
    int[] includeIndices = new int[m_included.length];
    for (int i = 0; i < m_included.length; i++) {
        int index = spec.findColumnIndex(m_included[i]);
        includeIndices[i] = index;
    }
    int gridCount = m_configuration.getColCount();
    final int cellCount;
    final int groupColIndex;
    if (groupColumn != null) {
        cellCount = includeIndices.length * gridCount + 1;
        groupColIndex = spec.findColumnIndex(groupColumn);
    } else {
        cellCount = includeIndices.length * gridCount;
        groupColIndex = -1;
    }
    final DataCell[] cells = new DataCell[cellCount];
    PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
    long currentRow = 0;
    long totalRows = inputTable.size();
    long currentOutRow = 0;
    DataCell curGroupValue = null;
    while (it.hasNext()) {
        Arrays.fill(cells, DataType.getMissingCell());
        // assign group column (if enabled)
        if (groupColIndex >= 0) {
            DataRow row = it.next();
            curGroupValue = row.getCell(groupColIndex);
            cells[cells.length - 1] = curGroupValue;
            it.pushBack(row);
        }
        for (int grid = 0; grid < gridCount; grid++) {
            if (!it.hasNext()) {
                break;
            }
            DataRow inRow = it.next();
            DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
            if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
                mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
                currentRow += 1;
                mainExec.checkCanceled();
                for (int i = 0; i < includeIndices.length; i++) {
                    cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
                }
            } else {
                // start new group, i.e. new row
                it.pushBack(inRow);
                break;
            }
        }
        RowKey key = RowKey.createRowKey(currentOutRow++);
        cont.addRowToTable(new DefaultRow(key, cells));
    }
    cont.close();
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) SortedTable(org.knime.base.data.sort.SortedTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 63 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class FileAnalyzer method createColumnProperties.

/**
 * Determines the type and name of each column. It tries to figure out if there are column headers in the file or
 * otherwise generates names for the columns. <br>
 * We read from the first line one token per column (plus one for the row header if we have row headers in the
 * file). Then we do three checks: first, if we have row headers and are missing one token we assume the column
 * header for the "row-header-column" is missing, thus we must have column headers. Second, we check the types of
 * the tokens read. If one of the tokens (except the first if we have row headers) cannot be converted into the
 * column's type, we assume its a column header. Last, if all tokens (except the first if we have row headers) start
 * with the same prefix followed by an increasing number, then that looks like column headers to us. Otherwise we
 * say we have no column headers.
 *
 * @param userSettings settings user provided. Must be honored!
 * @param result the settings so far, must contain data url, delimiters, comments, quotes, colNumber, and rowHeader
 *            flag
 * @param exec to check for cancellations and report progress to
 * @return a vector of colProperty objects, having the columnSpec set and the useFileHeader flag
 * @throws IOException if an I/O error occurs
 */
private static Vector<ColProperty> createColumnProperties(final FileReaderNodeSettings userSettings, final FileReaderNodeSettings result, final ExecutionMonitor exec) throws IOException, InterruptedExecutionException {
    // first detect the type of each column
    ExecutionMonitor subExec = exec.createSubProgress(TYPES_SUB);
    ColProperty[] colProps = createColumnTypes(userSettings, result, subExec);
    // extract the column types and column missing values from the result
    // of the above method call
    DataType[] columnTypes = new DataType[colProps.length];
    String[] missValues = new String[colProps.length];
    String[] formatParameters = new String[colProps.length];
    for (int c = 0; c < colProps.length; c++) {
        columnTypes[c] = colProps[c].getColumnSpec().getType();
        missValues[c] = colProps[c].getMissingValuePattern();
        formatParameters[c] = colProps[c].getFormatParameter().orElse(null);
    }
    subExec.setProgress(1.0);
    checkInterrupt(exec);
    // number of columns must be set accordingly (including skipped cols)
    assert result.getNumberOfColumns() == columnTypes.length;
    // store the first line here to analyze the tokens - depending on the
    // row header flag expect one more token to come.
    String rowHeader = null;
    String scndLineRowHeader = null;
    String[] columnHeaders = new String[result.getNumberOfColumns()];
    BufferedReader reader = result.createNewInputReader();
    Tokenizer tokenizer = new Tokenizer(reader);
    tokenizer.setSettings(result);
    exec.setProgress("Guessing column headers");
    // the first token is supposed to be the header for the "row column"
    if (result.getFileHasRowHeaders()) {
        rowHeader = tokenizer.nextToken();
    }
    // now read the (possible) data column headers
    for (int c = 0; c < columnHeaders.length; c++) {
        String token = tokenizer.nextToken();
        if (token == null) {
            // end of file... already?!?
            break;
        }
        if (result.isRowDelimiter(token, tokenizer.lastTokenWasQuoted())) {
            // end of line - a bit early, huh??
            scndLineRowHeader = tokenizer.nextToken();
            break;
        }
        columnHeaders[c] = token;
        try {
            checkInterrupt(exec);
        } catch (InterruptedExecutionException iee) {
            tokenizer.closeSourceStream();
            throw iee;
        }
    }
    // the next token is the row header in the next row (could be...)
    scndLineRowHeader = tokenizer.nextToken();
    tokenizer.closeSourceStream();
    Vector<ColProperty> userColProps = userSettings.getColumnProperties();
    if (userColProps == null) {
        // that saves us quite some checking later
        userColProps = new Vector<ColProperty>();
    }
    if (!userSettings.isFileHasColumnHeadersUserSet()) {
        // headers, we assume the rowHeader is a data column header.
        if (result.getFileHasRowHeaders() && // && (the last token is empty)
        (columnHeaders.length > 0) && (columnHeaders[columnHeaders.length - 1] == null)) {
            result.setFileHasColumnHeaders(true);
            // discard the last (=null) token
            String[] colNames = new String[result.getNumberOfColumns()];
            colNames[0] = rowHeader;
            System.arraycopy(columnHeaders, 0, colNames, 1, colNames.length - 1);
            return createColProps(colNames, userColProps, columnTypes, missValues, formatParameters, exec);
        }
        // another indication for a column_headers_must_have is when the
        // first line contains tokens that are not type compliant with all
        // other lines (e.g. all items in the column are integers except in
        // the first line).
        // we create simple cells only
        DataCellFactory cellFactory = new DataCellFactory(null);
        cellFactory.setDecimalSeparator(result.getDecimalSeparator());
        cellFactory.setThousandsSeparator(result.getThousandsSeparator());
        for (int c = 0; c < columnHeaders.length; c++) {
            checkInterrupt(exec);
            if (columnHeaders[c] == null) {
                // the first line ended early - could be anything...
                continue;
            }
            cellFactory.setMissingValuePattern(missValues[c]);
            cellFactory.setFormatParameter(formatParameters[c]);
            DataCell dc = cellFactory.createDataCellOfType(columnTypes[c], columnHeaders[c]);
            if (dc != null) {
                // this column header could be data - try the others...
                continue;
            }
            // header is not data: must be column header
            result.setFileHasColumnHeaders(true);
            return createColProps(columnHeaders, userColProps, columnTypes, missValues, formatParameters, exec);
        }
        // should also fit in - if we have row headers in the file
        if (!result.isFileHasRowHeadersUserSet()) {
            // prefix+index pattern, so we have nothing to test against.
            if (rowHeader != null && scndLineRowHeader != null) {
                HeaderHelper hh = HeaderHelper.extractPrefixAndIndexFromHeader(rowHeader);
                if (hh == null || !hh.testNextHeader(scndLineRowHeader)) {
                    // this first line row header isn't a good row header
                    // all the other lines have nice ones - create col hdrs
                    // also create colHdrs if they don't fit to each other
                    // header is not data: must be column header
                    result.setFileHasColumnHeaders(true);
                    return createColProps(columnHeaders, userColProps, columnTypes, missValues, formatParameters, exec);
                }
            }
        }
        // all have the same prefix and a growing index.
        if ((columnHeaders.length > 0) && consecutiveHeaders(columnHeaders, exec)) {
            result.setFileHasColumnHeaders(true);
            return createColProps(columnHeaders, userColProps, columnTypes, missValues, formatParameters, exec);
        }
        // otherwise we assume the first line doesn't contain headers.
        // pass an array with null strings and it will create headers for us
        result.setFileHasColumnHeaders(false);
        // null array
        String[] nulls = new String[columnHeaders.length];
        return createColProps(nulls, userColProps, columnTypes, missValues, formatParameters, exec);
    } else {
        // user set fileHasColHeaders - see if it's true or false
        result.setFileHasColumnHeaders(userSettings.getFileHasColumnHeaders());
        result.setFileHasColumnHeadersUserSet(true);
        if (userSettings.getFileHasColumnHeaders()) {
            // use the headers we read in
            if ((columnHeaders.length > 0) && (columnHeaders[columnHeaders.length - 1] == null) && rowHeader != null) {
                // okay, we got one too few, use row header
                String[] colNames = new String[result.getNumberOfColumns()];
                colNames[0] = rowHeader;
                System.arraycopy(columnHeaders, 0, colNames, 1, colNames.length - 1);
                return createColProps(colNames, userColProps, columnTypes, missValues, formatParameters, exec);
            } else {
                return createColProps(columnHeaders, userColProps, columnTypes, missValues, formatParameters, exec);
            }
        } else {
            // don't read col headers - create null array to generate names
            String[] colNames = new String[columnHeaders.length];
            return createColProps(colNames, userColProps, columnTypes, missValues, formatParameters, exec);
        }
    }
}
Also used : BufferedReader(java.io.BufferedReader) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) Tokenizer(org.knime.core.util.tokenizer.Tokenizer)

Example 64 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class FileAnalyzer method setDelimitersAndColNum.

/**
 * Splits the lines of the file (honoring the settings in the settings object), and tries to guess which delimiters
 * create the best results. It'll try out semicolon, comma, tab, or space delimiters; in this order. Whatever
 * produces more than one column (consistently) will be set. If no settings create more than one column no column
 * delimiters will be set. A row delimiter ('\n' and '\r') is always set.
 */
private static void setDelimitersAndColNum(final FileReaderNodeSettings userSettings, final FileReaderNodeSettings result, final ExecutionMonitor exec) throws IOException, InterruptedExecutionException {
    assert result != null;
    assert userSettings != null;
    assert result.getDataFileLocation() != null;
    if (!userSettings.isDelimiterUserSet()) {
        exec.setProgress("Guessing column separator");
        exec.setProgress(0.0);
        // 
        if ((userSettings.getThousandsSeparator() != ';') && (userSettings.getDecimalSeparator() != ';')) {
            ExecutionMonitor subExec = createSubExecWithRemainder(exec);
            try {
                result.removeAllDelimiters();
                // make sure '\n' and '\r' is a row delimiter. Always.
                result.addRowDelimiter("\n", true);
                result.addRowDelimiter("\r", true);
                result.addDelimiterPattern(";", false, false, false);
                if (testDelimiterSettingsSetColNum(result, subExec)) {
                    return;
                }
            } catch (IllegalArgumentException iae) {
            // seems we've added ';' as comment before - alright then.
            }
        }
        // 
        if ((userSettings.getThousandsSeparator() != ',') && (userSettings.getDecimalSeparator() != ',')) {
            // make sure '\n' and '\r' is a row delimiter. Always.
            ExecutionMonitor subExec = createSubExecWithRemainder(exec);
            try {
                result.removeAllDelimiters();
                result.addRowDelimiter("\n", true);
                result.addRowDelimiter("\r", true);
                result.addDelimiterPattern(",", false, false, false);
                if (testDelimiterSettingsSetColNum(result, subExec)) {
                    return;
                }
            } catch (IllegalArgumentException iae) {
            // seems they've added ',' as comment before - alright then.
            }
        }
        // 
        if ((userSettings.getThousandsSeparator() != '\t') && (userSettings.getDecimalSeparator() != '\t')) {
            ExecutionMonitor subExec = createSubExecWithRemainder(exec);
            try {
                result.removeAllDelimiters();
                // make sure '\n' and '\r' is a row delimiter. Always.
                result.addRowDelimiter("\n", true);
                result.addRowDelimiter("\r", true);
                result.addDelimiterPattern("\t", false, false, false);
                if (testDelimiterSettingsSetColNum(result, subExec)) {
                    return;
                }
            } catch (IllegalArgumentException iae) {
            // seems they've added '\t' as comment before - alright
            // then.
            }
        }
        // 
        if ((userSettings.getThousandsSeparator() != ' ') && (userSettings.getDecimalSeparator() != ' ')) {
            ExecutionMonitor subExec = createSubExecWithRemainder(exec);
            try {
                result.removeAllDelimiters();
                // make sure '\n' and '\r' is a row delimiter. Always.
                result.addRowDelimiter("\n", true);
                result.addRowDelimiter("\r", true);
                result.addDelimiterPattern(" ", true, false, false);
                result.setIgnoreEmptyTokensAtEndOfRow(true);
                if (testDelimiterSettingsSetColNum(result, subExec)) {
                    return;
                }
            } catch (IllegalArgumentException iae) {
            // seems they've added ' ' as comment before - alright then.
            }
            // restore it to false
            result.setIgnoreEmptyTokensAtEndOfRow(false);
            // 
            // try space separated columns
            // 
            subExec = createSubExecWithRemainder(exec);
            try {
                result.removeAllDelimiters();
                // make sure '\n' and '\r' is a row delimiter. Always.
                result.addRowDelimiter("\n", true);
                result.addRowDelimiter("\r", true);
                result.addDelimiterPattern(" ", true, false, false);
                if (testDelimiterSettingsSetColNum(result, subExec)) {
                    return;
                }
            } catch (IllegalArgumentException iae) {
            // seems we've added ' ' as comment before - alright then.
            }
        }
        // well - none of the above settings made sense - return without
        // delimiter
        result.removeAllDelimiters();
        // but always have one row per line
        result.addRowDelimiter("\n", true);
        result.addRowDelimiter("\r", true);
        result.setNumberOfColumns(1);
        return;
    } else {
        // user provided delimiters copy them
        for (Delimiter delim : userSettings.getAllDelimiters()) {
            if (userSettings.isRowDelimiter(delim.getDelimiter(), false)) {
                result.addRowDelimiter(delim.getDelimiter(), delim.combineConsecutiveDelims());
            } else {
                result.addDelimiterPattern(delim.getDelimiter(), delim.combineConsecutiveDelims(), delim.returnAsToken(), delim.includeInToken());
            }
        }
        result.setDelimiterUserSet(true);
        result.setIgnoreEmptyTokensAtEndOfRow(userSettings.ignoreEmptyTokensAtEndOfRow());
        if (userSettings.ignoreDelimsAtEORUserSet()) {
            result.setIgnoreDelimsAtEndOfRowUserValue(userSettings.ignoreDelimsAtEORUserValue());
        }
        // set the number of cols that we read in with user presets.
        // take the maximum if rows have different num of cols.
        result.setNumberOfColumns(getMaximumNumberOfColumns(result, exec));
    }
    return;
}
Also used : Delimiter(org.knime.core.util.tokenizer.Delimiter) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 65 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class DBAutoBinnerNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    final DatabasePortObjectSpec dbSpec = (DatabasePortObjectSpec) inSpecs[0];
    DatabaseQueryConnectionSettings connectionSettings = dbSpec.getConnectionSettings(getCredentialsProvider());
    boolean suppCase = connectionSettings.getUtility().supportsCase();
    if (!suppCase) {
        if (m_settings.getFilterConfiguration().applyTo(dbSpec.getDataTableSpec()).getIncludes().length > 1) {
            throw new InvalidSettingsException("Database does not support \"CASE\". Please choose only one column.");
        }
    }
    if (connectionSettings.getRetrieveMetadataInConfigure()) {
        PMMLPortObject pmmlPortObject = createPMMLPortObject(dbSpec, connectionSettings, new ExecutionMonitor());
        DatabasePortObject databasePortObject = createDatabasePortObject(dbSpec, connectionSettings, pmmlPortObject);
        return new PortObjectSpec[] { databasePortObject.getSpec(), pmmlPortObject.getSpec() };
    }
    return new PortObjectSpec[] { null, null };
}
Also used : DatabasePortObject(org.knime.core.node.port.database.DatabasePortObject) DatabaseQueryConnectionSettings(org.knime.core.node.port.database.DatabaseQueryConnectionSettings) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) DatabasePortObjectSpec(org.knime.core.node.port.database.DatabasePortObjectSpec) DatabasePortObjectSpec(org.knime.core.node.port.database.DatabasePortObjectSpec) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Aggregations

ExecutionMonitor (org.knime.core.node.ExecutionMonitor)160 BufferedDataTable (org.knime.core.node.BufferedDataTable)50 DataTableSpec (org.knime.core.data.DataTableSpec)43 DataRow (org.knime.core.data.DataRow)39 DataCell (org.knime.core.data.DataCell)35 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)35 Test (org.junit.Test)33 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)33 File (java.io.File)29 IOException (java.io.IOException)25 PortObject (org.knime.core.node.port.PortObject)25 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 RowKey (org.knime.core.data.RowKey)20 ArrayList (java.util.ArrayList)19 WorkflowLoadResult (org.knime.core.node.workflow.WorkflowPersistor.WorkflowLoadResult)17 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)16 ExecutionException (java.util.concurrent.ExecutionException)14 ExecutionContext (org.knime.core.node.ExecutionContext)13 FileOutputStream (java.io.FileOutputStream)12