use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class CorrelationComputeNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable in = (BufferedDataTable) inData[0];
final DataTableSpec inSpec = in.getDataTableSpec();
ColumnRearranger filteredTableRearranger = new ColumnRearranger(inSpec);
String[] includeNames = m_columnFilterModel.applyTo(inSpec).getIncludes();
filteredTableRearranger.keepOnly(includeNames);
final BufferedDataTable filteredTable = exec.createColumnRearrangeTable(in, filteredTableRearranger, exec.createSilentSubExecutionContext(0.0));
final DataTableSpec filteredTableSpec = filteredTable.getDataTableSpec();
double progStep1 = 0.48;
double progStep2 = 0.48;
double progFinish = 1.0 - progStep1 - progStep2;
CorrelationComputer calculator = new CorrelationComputer(filteredTableSpec, m_maxPossValueCountModel.getIntValue());
exec.setMessage("Calculating table statistics");
ExecutionContext execStep1 = exec.createSubExecutionContext(progStep1);
calculator.calculateStatistics(filteredTable, execStep1);
execStep1.setProgress(1.0);
exec.setMessage("Calculating correlation values");
ExecutionMonitor execStep2 = exec.createSubExecutionContext(progStep2);
HalfDoubleMatrix correlationMatrix = calculator.calculateOutput(filteredTable, execStep2);
execStep2.setProgress(1.0);
exec.setMessage("Assembling output");
ExecutionContext execFinish = exec.createSubExecutionContext(progFinish);
PMCCPortObjectAndSpec pmccModel = new PMCCPortObjectAndSpec(includeNames, correlationMatrix);
BufferedDataTable out = pmccModel.createCorrelationMatrix(execFinish);
m_correlationTable = out;
String missValueString = calculator.getNumericMissingValueWarning(4);
StringBuilder warning = null;
if (missValueString != null) {
LOGGER.debug(calculator.getNumericMissingValueWarning(1000));
warning = new StringBuilder(missValueString);
}
String constantColString = calculator.getNumericConstantColumnPairs(4);
if (constantColString != null) {
LOGGER.debug(calculator.getNumericConstantColumnPairs(1000));
if (warning == null) {
warning = new StringBuilder(constantColString);
} else {
warning.append("\n");
warning.append(constantColString);
}
}
if (warning != null) {
setWarningMessage(warning.toString());
}
return new PortObject[] { out, pmccModel };
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class ColumnToGrid2NodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
String groupColumn = m_configuration.getGroupColumn();
final ExecutionMonitor mainExec;
final BufferedDataTable inputTable;
if (groupColumn != null) {
exec.setMessage("Sorting input table");
BufferedDataTable in = inData[0];
ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
String[] relevantCols = new String[m_included.length + 1];
System.arraycopy(m_included, 0, relevantCols, 0, m_included.length);
relevantCols[relevantCols.length - 1] = groupColumn;
sortFilterRearranger.keepOnly(relevantCols);
BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
inputTable = sorter.getBufferedDataTable();
mainExec = exec.createSubProgress(0.5);
} else {
inputTable = inData[0];
mainExec = exec;
}
exec.setMessage("Assembling output");
DataTableSpec spec = inputTable.getDataTableSpec();
DataTableSpec outSpec = createOutputSpec(spec);
BufferedDataContainer cont = exec.createDataContainer(outSpec);
int[] includeIndices = new int[m_included.length];
for (int i = 0; i < m_included.length; i++) {
int index = spec.findColumnIndex(m_included[i]);
includeIndices[i] = index;
}
int gridCount = m_configuration.getColCount();
final int cellCount;
final int groupColIndex;
if (groupColumn != null) {
cellCount = includeIndices.length * gridCount + 1;
groupColIndex = spec.findColumnIndex(groupColumn);
} else {
cellCount = includeIndices.length * gridCount;
groupColIndex = -1;
}
final DataCell[] cells = new DataCell[cellCount];
PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
long currentRow = 0;
long totalRows = inputTable.size();
long currentOutRow = 0;
DataCell curGroupValue = null;
while (it.hasNext()) {
Arrays.fill(cells, DataType.getMissingCell());
// assign group column (if enabled)
if (groupColIndex >= 0) {
DataRow row = it.next();
curGroupValue = row.getCell(groupColIndex);
cells[cells.length - 1] = curGroupValue;
it.pushBack(row);
}
for (int grid = 0; grid < gridCount; grid++) {
if (!it.hasNext()) {
break;
}
DataRow inRow = it.next();
DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
currentRow += 1;
mainExec.checkCanceled();
for (int i = 0; i < includeIndices.length; i++) {
cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
}
} else {
// start new group, i.e. new row
it.pushBack(inRow);
break;
}
}
RowKey key = RowKey.createRowKey(currentOutRow++);
cont.addRowToTable(new DefaultRow(key, cells));
}
cont.close();
return new BufferedDataTable[] { cont.getTable() };
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class FileAnalyzer method createColumnProperties.
/**
* Determines the type and name of each column. It tries to figure out if there are column headers in the file or
* otherwise generates names for the columns. <br>
* We read from the first line one token per column (plus one for the row header if we have row headers in the
* file). Then we do three checks: first, if we have row headers and are missing one token we assume the column
* header for the "row-header-column" is missing, thus we must have column headers. Second, we check the types of
* the tokens read. If one of the tokens (except the first if we have row headers) cannot be converted into the
* column's type, we assume its a column header. Last, if all tokens (except the first if we have row headers) start
* with the same prefix followed by an increasing number, then that looks like column headers to us. Otherwise we
* say we have no column headers.
*
* @param userSettings settings user provided. Must be honored!
* @param result the settings so far, must contain data url, delimiters, comments, quotes, colNumber, and rowHeader
* flag
* @param exec to check for cancellations and report progress to
* @return a vector of colProperty objects, having the columnSpec set and the useFileHeader flag
* @throws IOException if an I/O error occurs
*/
private static Vector<ColProperty> createColumnProperties(final FileReaderNodeSettings userSettings, final FileReaderNodeSettings result, final ExecutionMonitor exec) throws IOException, InterruptedExecutionException {
// first detect the type of each column
ExecutionMonitor subExec = exec.createSubProgress(TYPES_SUB);
ColProperty[] colProps = createColumnTypes(userSettings, result, subExec);
// extract the column types and column missing values from the result
// of the above method call
DataType[] columnTypes = new DataType[colProps.length];
String[] missValues = new String[colProps.length];
String[] formatParameters = new String[colProps.length];
for (int c = 0; c < colProps.length; c++) {
columnTypes[c] = colProps[c].getColumnSpec().getType();
missValues[c] = colProps[c].getMissingValuePattern();
formatParameters[c] = colProps[c].getFormatParameter().orElse(null);
}
subExec.setProgress(1.0);
checkInterrupt(exec);
// number of columns must be set accordingly (including skipped cols)
assert result.getNumberOfColumns() == columnTypes.length;
// store the first line here to analyze the tokens - depending on the
// row header flag expect one more token to come.
String rowHeader = null;
String scndLineRowHeader = null;
String[] columnHeaders = new String[result.getNumberOfColumns()];
BufferedReader reader = result.createNewInputReader();
Tokenizer tokenizer = new Tokenizer(reader);
tokenizer.setSettings(result);
exec.setProgress("Guessing column headers");
// the first token is supposed to be the header for the "row column"
if (result.getFileHasRowHeaders()) {
rowHeader = tokenizer.nextToken();
}
// now read the (possible) data column headers
for (int c = 0; c < columnHeaders.length; c++) {
String token = tokenizer.nextToken();
if (token == null) {
// end of file... already?!?
break;
}
if (result.isRowDelimiter(token, tokenizer.lastTokenWasQuoted())) {
// end of line - a bit early, huh??
scndLineRowHeader = tokenizer.nextToken();
break;
}
columnHeaders[c] = token;
try {
checkInterrupt(exec);
} catch (InterruptedExecutionException iee) {
tokenizer.closeSourceStream();
throw iee;
}
}
// the next token is the row header in the next row (could be...)
scndLineRowHeader = tokenizer.nextToken();
tokenizer.closeSourceStream();
Vector<ColProperty> userColProps = userSettings.getColumnProperties();
if (userColProps == null) {
// that saves us quite some checking later
userColProps = new Vector<ColProperty>();
}
if (!userSettings.isFileHasColumnHeadersUserSet()) {
// headers, we assume the rowHeader is a data column header.
if (result.getFileHasRowHeaders() && // && (the last token is empty)
(columnHeaders.length > 0) && (columnHeaders[columnHeaders.length - 1] == null)) {
result.setFileHasColumnHeaders(true);
// discard the last (=null) token
String[] colNames = new String[result.getNumberOfColumns()];
colNames[0] = rowHeader;
System.arraycopy(columnHeaders, 0, colNames, 1, colNames.length - 1);
return createColProps(colNames, userColProps, columnTypes, missValues, formatParameters, exec);
}
// another indication for a column_headers_must_have is when the
// first line contains tokens that are not type compliant with all
// other lines (e.g. all items in the column are integers except in
// the first line).
// we create simple cells only
DataCellFactory cellFactory = new DataCellFactory(null);
cellFactory.setDecimalSeparator(result.getDecimalSeparator());
cellFactory.setThousandsSeparator(result.getThousandsSeparator());
for (int c = 0; c < columnHeaders.length; c++) {
checkInterrupt(exec);
if (columnHeaders[c] == null) {
// the first line ended early - could be anything...
continue;
}
cellFactory.setMissingValuePattern(missValues[c]);
cellFactory.setFormatParameter(formatParameters[c]);
DataCell dc = cellFactory.createDataCellOfType(columnTypes[c], columnHeaders[c]);
if (dc != null) {
// this column header could be data - try the others...
continue;
}
// header is not data: must be column header
result.setFileHasColumnHeaders(true);
return createColProps(columnHeaders, userColProps, columnTypes, missValues, formatParameters, exec);
}
// should also fit in - if we have row headers in the file
if (!result.isFileHasRowHeadersUserSet()) {
// prefix+index pattern, so we have nothing to test against.
if (rowHeader != null && scndLineRowHeader != null) {
HeaderHelper hh = HeaderHelper.extractPrefixAndIndexFromHeader(rowHeader);
if (hh == null || !hh.testNextHeader(scndLineRowHeader)) {
// this first line row header isn't a good row header
// all the other lines have nice ones - create col hdrs
// also create colHdrs if they don't fit to each other
// header is not data: must be column header
result.setFileHasColumnHeaders(true);
return createColProps(columnHeaders, userColProps, columnTypes, missValues, formatParameters, exec);
}
}
}
// all have the same prefix and a growing index.
if ((columnHeaders.length > 0) && consecutiveHeaders(columnHeaders, exec)) {
result.setFileHasColumnHeaders(true);
return createColProps(columnHeaders, userColProps, columnTypes, missValues, formatParameters, exec);
}
// otherwise we assume the first line doesn't contain headers.
// pass an array with null strings and it will create headers for us
result.setFileHasColumnHeaders(false);
// null array
String[] nulls = new String[columnHeaders.length];
return createColProps(nulls, userColProps, columnTypes, missValues, formatParameters, exec);
} else {
// user set fileHasColHeaders - see if it's true or false
result.setFileHasColumnHeaders(userSettings.getFileHasColumnHeaders());
result.setFileHasColumnHeadersUserSet(true);
if (userSettings.getFileHasColumnHeaders()) {
// use the headers we read in
if ((columnHeaders.length > 0) && (columnHeaders[columnHeaders.length - 1] == null) && rowHeader != null) {
// okay, we got one too few, use row header
String[] colNames = new String[result.getNumberOfColumns()];
colNames[0] = rowHeader;
System.arraycopy(columnHeaders, 0, colNames, 1, colNames.length - 1);
return createColProps(colNames, userColProps, columnTypes, missValues, formatParameters, exec);
} else {
return createColProps(columnHeaders, userColProps, columnTypes, missValues, formatParameters, exec);
}
} else {
// don't read col headers - create null array to generate names
String[] colNames = new String[columnHeaders.length];
return createColProps(colNames, userColProps, columnTypes, missValues, formatParameters, exec);
}
}
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class FileAnalyzer method setDelimitersAndColNum.
/**
* Splits the lines of the file (honoring the settings in the settings object), and tries to guess which delimiters
* create the best results. It'll try out semicolon, comma, tab, or space delimiters; in this order. Whatever
* produces more than one column (consistently) will be set. If no settings create more than one column no column
* delimiters will be set. A row delimiter ('\n' and '\r') is always set.
*/
private static void setDelimitersAndColNum(final FileReaderNodeSettings userSettings, final FileReaderNodeSettings result, final ExecutionMonitor exec) throws IOException, InterruptedExecutionException {
assert result != null;
assert userSettings != null;
assert result.getDataFileLocation() != null;
if (!userSettings.isDelimiterUserSet()) {
exec.setProgress("Guessing column separator");
exec.setProgress(0.0);
//
if ((userSettings.getThousandsSeparator() != ';') && (userSettings.getDecimalSeparator() != ';')) {
ExecutionMonitor subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
// make sure '\n' and '\r' is a row delimiter. Always.
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern(";", false, false, false);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems we've added ';' as comment before - alright then.
}
}
//
if ((userSettings.getThousandsSeparator() != ',') && (userSettings.getDecimalSeparator() != ',')) {
// make sure '\n' and '\r' is a row delimiter. Always.
ExecutionMonitor subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern(",", false, false, false);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems they've added ',' as comment before - alright then.
}
}
//
if ((userSettings.getThousandsSeparator() != '\t') && (userSettings.getDecimalSeparator() != '\t')) {
ExecutionMonitor subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
// make sure '\n' and '\r' is a row delimiter. Always.
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern("\t", false, false, false);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems they've added '\t' as comment before - alright
// then.
}
}
//
if ((userSettings.getThousandsSeparator() != ' ') && (userSettings.getDecimalSeparator() != ' ')) {
ExecutionMonitor subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
// make sure '\n' and '\r' is a row delimiter. Always.
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern(" ", true, false, false);
result.setIgnoreEmptyTokensAtEndOfRow(true);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems they've added ' ' as comment before - alright then.
}
// restore it to false
result.setIgnoreEmptyTokensAtEndOfRow(false);
//
// try space separated columns
//
subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
// make sure '\n' and '\r' is a row delimiter. Always.
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern(" ", true, false, false);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems we've added ' ' as comment before - alright then.
}
}
// well - none of the above settings made sense - return without
// delimiter
result.removeAllDelimiters();
// but always have one row per line
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.setNumberOfColumns(1);
return;
} else {
// user provided delimiters copy them
for (Delimiter delim : userSettings.getAllDelimiters()) {
if (userSettings.isRowDelimiter(delim.getDelimiter(), false)) {
result.addRowDelimiter(delim.getDelimiter(), delim.combineConsecutiveDelims());
} else {
result.addDelimiterPattern(delim.getDelimiter(), delim.combineConsecutiveDelims(), delim.returnAsToken(), delim.includeInToken());
}
}
result.setDelimiterUserSet(true);
result.setIgnoreEmptyTokensAtEndOfRow(userSettings.ignoreEmptyTokensAtEndOfRow());
if (userSettings.ignoreDelimsAtEORUserSet()) {
result.setIgnoreDelimsAtEndOfRowUserValue(userSettings.ignoreDelimsAtEORUserValue());
}
// set the number of cols that we read in with user presets.
// take the maximum if rows have different num of cols.
result.setNumberOfColumns(getMaximumNumberOfColumns(result, exec));
}
return;
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class DBAutoBinnerNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
final DatabasePortObjectSpec dbSpec = (DatabasePortObjectSpec) inSpecs[0];
DatabaseQueryConnectionSettings connectionSettings = dbSpec.getConnectionSettings(getCredentialsProvider());
boolean suppCase = connectionSettings.getUtility().supportsCase();
if (!suppCase) {
if (m_settings.getFilterConfiguration().applyTo(dbSpec.getDataTableSpec()).getIncludes().length > 1) {
throw new InvalidSettingsException("Database does not support \"CASE\". Please choose only one column.");
}
}
if (connectionSettings.getRetrieveMetadataInConfigure()) {
PMMLPortObject pmmlPortObject = createPMMLPortObject(dbSpec, connectionSettings, new ExecutionMonitor());
DatabasePortObject databasePortObject = createDatabasePortObject(dbSpec, connectionSettings, pmmlPortObject);
return new PortObjectSpec[] { databasePortObject.getSpec(), pmmlPortObject.getSpec() };
}
return new PortObjectSpec[] { null, null };
}
Aggregations