use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class BitVectorGeneratorNodeModel method createBitVectorsFromStrings.
private BufferedDataTable[] createBitVectorsFromStrings(final BufferedDataTable data, final int stringColIndex, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
ColumnRearranger c = createColumnRearranger(data.getDataTableSpec(), stringColIndex);
ExecutionMonitor creationExec = exec;
if (m_type.equals(STRING_TYPES.ID)) {
ExecutionMonitor scanExec = exec.createSubProgress(0.5);
creationExec = exec.createSubProgress(0.5);
exec.setMessage("preparing");
int maxPos = scanMaxPos(data, scanExec);
((IdString2BitVectorCellFactory) m_factory).setMaxPos(maxPos);
}
exec.setMessage("creating output");
BufferedDataTable out = exec.createColumnRearrangeTable(data, c, creationExec);
return new BufferedDataTable[] { out };
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class ColumnToGridNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
String[] includes = m_configuration.getIncludes();
String groupColumn = m_configuration.getGroupColumn();
final ExecutionMonitor mainExec;
final BufferedDataTable inputTable;
if (groupColumn != null) {
exec.setMessage("Sorting input table");
BufferedDataTable in = inData[0];
ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
String[] relevantCols = new String[includes.length + 1];
System.arraycopy(includes, 0, relevantCols, 0, includes.length);
relevantCols[relevantCols.length - 1] = groupColumn;
sortFilterRearranger.keepOnly(relevantCols);
BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
inputTable = sorter.getBufferedDataTable();
mainExec = exec.createSubProgress(0.5);
} else {
inputTable = inData[0];
mainExec = exec;
}
exec.setMessage("Assembling output");
DataTableSpec spec = inputTable.getDataTableSpec();
DataTableSpec outSpec = createOutputSpec(spec);
BufferedDataContainer cont = exec.createDataContainer(outSpec);
int[] includeIndices = new int[includes.length];
for (int i = 0; i < includes.length; i++) {
int index = spec.findColumnIndex(includes[i]);
includeIndices[i] = index;
}
int gridCount = m_configuration.getColCount();
final int cellCount;
final int groupColIndex;
if (groupColumn != null) {
cellCount = includeIndices.length * gridCount + 1;
groupColIndex = spec.findColumnIndex(groupColumn);
} else {
cellCount = includeIndices.length * gridCount;
groupColIndex = -1;
}
final DataCell[] cells = new DataCell[cellCount];
PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
long currentRow = 0;
long totalRows = inputTable.size();
long currentOutRow = 0;
DataCell curGroupValue = null;
while (it.hasNext()) {
Arrays.fill(cells, DataType.getMissingCell());
// assign group column (if enabled)
if (groupColIndex >= 0) {
DataRow row = it.next();
curGroupValue = row.getCell(groupColIndex);
cells[cells.length - 1] = curGroupValue;
it.pushBack(row);
}
for (int grid = 0; grid < gridCount; grid++) {
if (!it.hasNext()) {
break;
}
DataRow inRow = it.next();
DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
currentRow += 1;
mainExec.checkCanceled();
for (int i = 0; i < includeIndices.length; i++) {
cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
}
} else {
// start new group, i.e. new row
it.pushBack(inRow);
break;
}
}
RowKey key = RowKey.createRowKey(currentOutRow++);
cont.addRowToTable(new DefaultRow(key, cells));
}
cont.close();
return new BufferedDataTable[] { cont.getTable() };
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class MissingValueHandling2Table method createMissingValueHandlingTable.
// getColSetting(DataTableSpec, ColSetting[])
/**
* Does missing value handling to the argument table given the col settings
* in an array and also reports progress.
*
* @param table the table to do missing value handling on
* @param colSettings the settings
* @param exec for progress/cancel and to create the buffered data table
* @param warningBuffer To which potential warning messages are added.
* @return a cache table, cleaned up
* @throws CanceledExecutionException if canceled
*/
public static BufferedDataTable createMissingValueHandlingTable(final DataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
MissingValueHandling2ColSetting[] colSetting;
try {
colSetting = getColSetting(table.getDataTableSpec(), colSettings, false);
} catch (InvalidSettingsException ise) {
LOGGER.coding("getColSetting method is not supposed to throw " + "an exception, ignoring settings", ise);
DataTableSpec s = table.getDataTableSpec();
colSetting = new MissingValueHandling2ColSetting[s.getNumColumns()];
for (int i = 0; i < s.getNumColumns(); i++) {
colSetting[i] = new MissingValueHandling2ColSetting(s.getColumnSpec(i));
colSetting[i].setMethod(MissingValueHandling2ColSetting.METHOD_NO_HANDLING);
}
}
boolean needStatistics = false;
int mostFrequentColCount = 0;
for (int i = 0; i < colSetting.length; i++) {
MissingValueHandling2ColSetting c = colSetting[i];
switch(c.getMethod()) {
case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
mostFrequentColCount++;
case MissingValueHandling2ColSetting.METHOD_MAX:
case MissingValueHandling2ColSetting.METHOD_MIN:
case MissingValueHandling2ColSetting.METHOD_MEAN:
needStatistics = true;
break;
default:
}
}
int[] mostFrequentCols = new int[mostFrequentColCount];
if (mostFrequentColCount > 0) {
int index = 0;
for (int i = 0; i < colSetting.length; i++) {
MissingValueHandling2ColSetting c = colSetting[i];
switch(c.getMethod()) {
case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
mostFrequentCols[index++] = i;
break;
default:
}
}
}
DataTable t;
ExecutionMonitor e;
if (needStatistics && !(table instanceof StatisticsTable)) {
// for creating statistics table
ExecutionMonitor subExec = exec.createSubProgress(0.5);
t = new MyStatisticsTable(table, subExec, mostFrequentCols);
if (((MyStatisticsTable) t).m_warningMessage != null) {
warningBuffer.append(((MyStatisticsTable) t).m_warningMessage);
}
// for the iterator
e = exec.createSubProgress(0.5);
} else {
t = table;
e = exec;
}
MissingValueHandling2Table mvht = new MissingValueHandling2Table(t, colSetting);
BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
e.setMessage("Adding rows...");
int count = 0;
try {
MissingValueHandling2TableIterator it = new MissingValueHandling2TableIterator(mvht, e);
while (it.hasNext()) {
DataRow next;
next = it.next();
e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
container.addRowToTable(next);
count++;
}
} catch (MissingValueHandling2TableIterator.RuntimeCanceledExecutionException rcee) {
throw rcee.getCause();
} finally {
container.close();
}
return container.getTable();
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class FileAnalyzer method analyze.
/**
* Tries to guess FileReader settings for the passed data file. It will use the settings in the settings object (if
* any - but the file location is required), and will read in the first lines from the file. It will first detect
* comment characters (if the first lines start with '#' or '%'), and then guess the delimiter (',', ';', or space)
* depending on which cuts a line into (more than one) tokens.
*
* @param userSettings containing the URL of the file to examine and settings that should be used and considered
* fixed.
* @param exec used to check for cancellations and to report progress. Could be null. If a
* {@link FileReaderExecutionMonitor} is provided it is distinguished between user cancellations cutting
* the analysis short, and interrupts that return immediately and return null as result.
* @return settings that supposably provide more or less useful results. It will always be a non-null object - but
* may not contain any settings if guessing was just too hard.
* @throws IOException if there was an error reading from the URL
*/
public static FileReaderNodeSettings analyze(final FileReaderNodeSettings userSettings, final ExecutionMonitor exec) throws IOException {
if (userSettings.getDataFileLocation() == null) {
throw new IllegalArgumentException("Must specify a valid file location for the file analyzer");
}
ExecutionMonitor execMon = exec;
if (execMon == null) {
// we create a default exec monitor. Doesn't hurt, because that
// will never be canceled.
execMon = new FileReaderExecutionMonitor();
}
// create the new and empty settings
FileReaderNodeSettings result = new FileReaderNodeSettings();
execMon.setProgress(0.0);
try {
result.setDataFileLocationAndUpdateTableName(userSettings.getDataFileLocation());
result.setDecimalSeparator(userSettings.getDecimalSeparator());
result.setThousandsSeparator(userSettings.getThousandsSeparator());
result.setDecimalSeparatorUserSet(userSettings.decimalSeparatorUserSet());
result.setUniquifyRowIDs(userSettings.uniquifyRowIDs());
result.setMaximumNumberOfRowsToRead(userSettings.getMaximumNumberOfRowsToRead());
result.setSkipFirstLines(userSettings.getSkipFirstLines());
result.allowLFinQuotes(userSettings.allowLFinQuotes());
result.setCharsetName(userSettings.getCharsetName());
result.setAnalyzeUsedAllRows(true);
result.setMissValuePatternStrCols(userSettings.getMissValuePatternStrCols());
result.setConnectTimeout(userSettings.getConnectTimeout());
// if the user didn't provide the charset, identify it by looking at the first bytes of the stream
if (!userSettings.isCharsetUserSet()) {
result.setCharsetName(guessCharSet(userSettings));
result.setCharsetUserSet(false);
} else {
result.setCharsetName(userSettings.getCharsetName());
result.setCharsetUserSet(true);
}
ExecutionMonitor subExec = execMon.createSubProgress(COMMENT_SUB);
if (!userSettings.isCommentUserSet()) {
// only guess comment patterns if user didn't provide any
addComments(result, subExec);
result.setCommentUserSet(false);
} else {
// take over user settings.
for (Comment comment : userSettings.getAllComments()) {
result.addBlockCommentPattern(comment.getBegin(), comment.getEnd(), comment.returnAsSeparateToken(), comment.includeInToken());
}
result.setCommentUserSet(true);
}
subExec.setProgress(1.0);
checkInterrupt(execMon);
subExec = execMon.createSubProgress(QUOTES_SUB);
if (!userSettings.isQuoteUserSet()) {
// only guess quotes if user didn't specify any
addQuotes(result, subExec);
result.setQuoteUserSet(false);
} else {
// take over user settings.
for (Quote quote : userSettings.getAllQuotes()) {
if (quote.hasEscapeChar()) {
result.addQuotePattern(quote.getLeft(), quote.getRight(), quote.getEscape());
} else {
result.addQuotePattern(quote.getLeft(), quote.getRight());
}
}
result.setQuoteUserSet(true);
}
subExec.setProgress(1.0);
checkInterrupt(execMon);
// if user provided whitespace characters, we need to add them.
if (userSettings.isWhiteSpaceUserSet()) {
for (String ws : userSettings.getAllWhiteSpaces()) {
result.addWhiteSpaceCharacter(ws);
}
result.setWhiteSpaceUserSet(true);
} else {
result.addWhiteSpaceCharacter(" ");
result.addWhiteSpaceCharacter("\t");
result.setWhiteSpaceUserSet(false);
}
subExec.setProgress(1.0);
checkInterrupt(execMon);
// for now we just take over this flag:
result.setSupportShortLines(userSettings.getSupportShortLines());
// sets delimiter and column numbers (as many columns as it gets
// with the delimiters - regardless of any row headers);
// honors user settings
subExec = execMon.createSubProgress(DELIMS_SUB);
setDelimitersAndColNum(userSettings, result, subExec);
assert result.getNumberOfColumns() > 0;
subExec.setProgress(1.0);
checkInterrupt(execMon);
// the number of column set as of now does not take into account the
// skipped columns.
subExec = execMon.createSubProgress(ROWHDR_SUB);
if (userSettings.isFileHasRowHeadersUserSet()) {
result.setFileHasRowHeaders(userSettings.getFileHasRowHeaders());
result.setFileHasRowHeadersUserSet(true);
} else {
boolean hasRowHeaders;
if (result.getNumberOfColumns() > 1) {
// if we have at least 2 cols, one of them could be headers
hasRowHeaders = checkRowHeader(result, subExec);
} else {
hasRowHeaders = false;
}
result.setFileHasRowHeaders(hasRowHeaders);
result.setFileHasRowHeadersUserSet(false);
}
subExec.setProgress(1.0);
checkInterrupt(execMon);
// we must correct the column number we've guessed
if (result.getFileHasRowHeaders()) {
result.setNumberOfColumns(result.getNumberOfColumns() - 1);
}
// guesses (or copies) column types and names.
subExec = execMon.createSubProgress(TYPES_SUB + COLHDR_SUB);
Vector<ColProperty> columnProps = createColumnProperties(userSettings, result, subExec);
result.setColumnProperties(columnProps);
subExec.setProgress(1.0);
// set a default row header prefix
if (userSettings.getRowHeaderPrefix() != null) {
result.setRowHeaderPrefix(userSettings.getRowHeaderPrefix());
} else {
result.setRowHeaderPrefix("Row");
}
if (userSettings.isIgnoreEmptyLinesUserSet()) {
result.setIgnoreEmptyLines(userSettings.getIgnoreEmtpyLines());
result.setIgnoreEmptyLinesUserSet(true);
} else {
result.setIgnoreEmptyLines(true);
result.setIgnoreEmptyLinesUserSet(false);
}
execMon.setProgress(1.0);
} catch (InterruptedExecutionException iee) {
return null;
}
return result;
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class CovarianceMatrixCalculator method calculateCovarianceMatrix.
/**
* Computes the covariance matrix and puts the result in the given (optional) data container and additionally
* returns a in memory representation. The data container is expected to have the data table spec returned at
* {@link #getResultSpec()}. The implementation traverses the data once.
*
* @param exec the execution container
* @param inTable input data
* @param tableSize the data table size
* @param resultDataContainer optional result data container
* @return the covariance matrix
* @throws CanceledExecutionException if the user canceled the execution
*/
public RealMatrix calculateCovarianceMatrix(final ExecutionMonitor exec, final DataTable inTable, final long tableSize, final DataContainer resultDataContainer) throws CanceledExecutionException {
checkArgument(m_targetSpec.equalStructure(inTable.getDataTableSpec()), "Target tables spec is different from the one given in the constructor!");
if (resultDataContainer != null) {
checkArgument(m_resultSpec.equalStructure(resultDataContainer.getTableSpec()), "Result tables spec is invalid!");
}
final ExecutionMonitor computingProgress = exec.createSubProgress(resultDataContainer != null ? 0.8 : 1);
List<StorelessCovariance> covariancesList = new ArrayList<>();
// create covariance pairs
for (int i = 0; i < m_indexes.length; i++) {
for (int j = i; j < m_indexes.length; j++) {
covariancesList.add(new StorelessCovariance(2));
}
}
// compute rest of co-variance matrix
int rowCount = 0;
double[] buffer = new double[2];
for (DataRow dataRow : inTable) {
for (int i = 0; i < m_indexes.length; i++) {
final int outerIndex = m_indexes[i];
final DataCell outerCell = dataRow.getCell(outerIndex);
if (outerCell.isMissing()) {
// skip missing values
continue;
}
final double outerDouble = ((DoubleValue) outerCell).getDoubleValue();
for (int j = i; j < m_indexes.length; j++) {
final int innerIndex = m_indexes[j];
final DataCell innerCell = dataRow.getCell(innerIndex);
if (innerCell.isMissing()) {
// skip missing values
continue;
}
final double innerDouble = ((DoubleValue) innerCell).getDoubleValue();
buffer[0] = outerDouble;
buffer[1] = innerDouble;
int covListIndex = index(m_indexes.length, i, j);
covariancesList.get(covListIndex).increment(buffer);
}
}
computingProgress.setProgress(rowCount++ / (double) tableSize, "Calculate covariance values, processing row: '" + dataRow.getKey() + "'");
computingProgress.checkCanceled();
}
// Copy the storeless covariances to a real matrix
RealMatrix covMatrix = new Array2DRowRealMatrix(m_indexes.length, m_indexes.length);
for (int i = 0; i < m_indexes.length; i++) {
for (int j = i; j < m_indexes.length; j++) {
int covListIndex = index(m_indexes.length, i, j);
double covValue;
try {
covValue = i == j ? covariancesList.get(covListIndex).getCovariance(1, 1) : covariancesList.get(covListIndex).getCovariance(0, 1);
} catch (NumberIsTooSmallException e) {
throw new IllegalArgumentException(String.format("There were not enough valid values to " + "compute covariance between columns: '%s' and '%s'.", inTable.getDataTableSpec().getColumnSpec(m_indexes[i]).getName(), inTable.getDataTableSpec().getColumnSpec(m_indexes[j]).getName()), e);
}
covMatrix.setEntry(i, j, covValue);
covMatrix.setEntry(j, i, covValue);
}
}
if (resultDataContainer != null) {
exec.setProgress("Writing matrix to data table");
final ExecutionMonitor writingProgress = exec.createSubProgress(0.2);
for (int i = 0; i < covMatrix.getRowDimension(); i++) {
resultDataContainer.addRowToTable(new DefaultRow(RowKey.toRowKeys(resultDataContainer.getTableSpec().getColumnSpec(i).getName())[0], covMatrix.getRow(i)));
exec.checkCanceled();
writingProgress.setProgress((double) i / covMatrix.getRowDimension(), "Writing row: " + resultDataContainer.getTableSpec().getColumnSpec(i).getName());
}
}
return covMatrix;
}
Aggregations