use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class ColumnToGrid2NodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
String groupColumn = m_configuration.getGroupColumn();
final ExecutionMonitor mainExec;
final BufferedDataTable inputTable;
if (groupColumn != null) {
exec.setMessage("Sorting input table");
BufferedDataTable in = inData[0];
ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
String[] relevantCols = new String[m_included.length + 1];
System.arraycopy(m_included, 0, relevantCols, 0, m_included.length);
relevantCols[relevantCols.length - 1] = groupColumn;
sortFilterRearranger.keepOnly(relevantCols);
BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
inputTable = sorter.getBufferedDataTable();
mainExec = exec.createSubProgress(0.5);
} else {
inputTable = inData[0];
mainExec = exec;
}
exec.setMessage("Assembling output");
DataTableSpec spec = inputTable.getDataTableSpec();
DataTableSpec outSpec = createOutputSpec(spec);
BufferedDataContainer cont = exec.createDataContainer(outSpec);
int[] includeIndices = new int[m_included.length];
for (int i = 0; i < m_included.length; i++) {
int index = spec.findColumnIndex(m_included[i]);
includeIndices[i] = index;
}
int gridCount = m_configuration.getColCount();
final int cellCount;
final int groupColIndex;
if (groupColumn != null) {
cellCount = includeIndices.length * gridCount + 1;
groupColIndex = spec.findColumnIndex(groupColumn);
} else {
cellCount = includeIndices.length * gridCount;
groupColIndex = -1;
}
final DataCell[] cells = new DataCell[cellCount];
PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
long currentRow = 0;
long totalRows = inputTable.size();
long currentOutRow = 0;
DataCell curGroupValue = null;
while (it.hasNext()) {
Arrays.fill(cells, DataType.getMissingCell());
// assign group column (if enabled)
if (groupColIndex >= 0) {
DataRow row = it.next();
curGroupValue = row.getCell(groupColIndex);
cells[cells.length - 1] = curGroupValue;
it.pushBack(row);
}
for (int grid = 0; grid < gridCount; grid++) {
if (!it.hasNext()) {
break;
}
DataRow inRow = it.next();
DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
currentRow += 1;
mainExec.checkCanceled();
for (int i = 0; i < includeIndices.length; i++) {
cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
}
} else {
// start new group, i.e. new row
it.pushBack(inRow);
break;
}
}
RowKey key = RowKey.createRowKey(currentOutRow++);
cont.addRowToTable(new DefaultRow(key, cells));
}
cont.close();
return new BufferedDataTable[] { cont.getTable() };
}
use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class FileRowIterator method next.
/**
* {@inheritDoc}
*/
@Override
public DataRow next() {
int rowLength = m_tableSpec.getNumColumns();
int colsToRead = m_skipColumns.length;
assert rowLength <= colsToRead;
String token = null;
boolean isMissingCell;
String rowHeader;
DataCell[] row = new DataCell[rowLength];
// lines (if we are supposed to).
if (!hasNext()) {
throw new NoSuchElementException("The row iterator proceeded beyond the last line of '" + m_frSettings.getDataFileLocation().toString() + "'.");
}
// counts the columns (tokens) read from the file
int readCols = 0;
// counts the number of columns we've created (excl. skipped columns)
int createdCols = 0;
// This will also read it from file, if supposed to.
try {
rowHeader = createRowHeader(m_rowNumber - 1);
} catch (TokenizerException fte) {
throw prepareForException(fte.getMessage() + " (line: " + m_tokenizer.getLineNumber() + " source: '" + m_frSettings.getDataFileLocation() + "')", m_tokenizer.getLineNumber(), "ERR", row);
}
// we made sure before that there is at least one token in the stream
assert rowHeader != null;
// if the last token ended with the delimiter (and not a LF)
boolean lastTokenWasDelimited = false;
// Now, read the columns until we have enough or see a row delimiter
while (readCols < colsToRead) {
try {
token = m_tokenizer.nextToken();
} catch (TokenizerException fte) {
throw prepareForException(fte.getMessage() + " (line: " + m_tokenizer.getLineNumber() + " (" + rowHeader + ") source: '" + m_frSettings.getDataFileLocation() + "')", m_tokenizer.getLineNumber(), rowHeader, row);
}
if (token != null) {
// remember the delimiter of the last token before the EOF
lastTokenWasDelimited = m_tokenizer.lastTokenWasDelimited();
}
// row delims are returned as token
if ((token == null) || m_frSettings.isRowDelimiter(token, m_tokenizer.lastTokenWasQuoted())) {
// line ended early.
m_tokenizer.pushBack();
// we need the row delim in the file, for after the loop
break;
}
// column delimiters).
if (token.equals("") && (!m_tokenizer.lastTokenWasQuoted())) {
isMissingCell = true;
} else if (token.equals(m_frSettings.getMissingValueOfColumn(readCols))) {
// equals(null) if it was not specified - which is fine.
isMissingCell = true;
} else {
isMissingCell = false;
}
if (!m_skipColumns[readCols]) {
DataColumnSpec cSpec = m_tableSpec.getColumnSpec(createdCols);
// now get that new cell
// (it throws an exception at us if it couldn't)
row[createdCols] = createNewDataCellOfType(cSpec.getType(), token, isMissingCell, m_frSettings.getFormatParameterForColumn(readCols).orElse(null), rowHeader, row);
createdCols++;
}
readCols++;
}
// but only if the last token was actually delimited (with a swallowed delimiter - not LF)
if (token == null && readCols == colsToRead - 1 && lastTokenWasDelimited) {
if (!m_skipColumns[readCols]) {
row[createdCols++] = DataType.getMissingCell();
}
// we consumed this last delimiter:
lastTokenWasDelimited = false;
}
int lineNr = m_tokenizer.getLineNumber();
if ((lineNr > 0) && (token != null) && (token.equals("\n"))) {
lineNr--;
}
// puke and die - unless we are told otherwise
if (m_frSettings.getSupportShortLines()) {
// pad the row with missing values
while (createdCols < rowLength) {
row[createdCols++] = DataType.getMissingCell();
}
} else {
if (createdCols < rowLength) {
FileReaderException ex = prepareForException("Too few data elements " + "(line: " + lineNr + " (" + rowHeader + "), source: '" + m_frSettings.getDataFileLocation() + "')", lineNr, rowHeader, row);
if (m_frSettings.getColumnNumDeterminingLineNumber() >= 0) {
ex.setDetailsMessage("The number of columns was " + "determined by the entries above line no." + m_frSettings.getColumnNumDeterminingLineNumber());
}
throw ex;
}
}
token = m_tokenizer.nextToken();
if (!m_frSettings.isRowDelimiter(token, m_tokenizer.lastTokenWasQuoted())) {
// flag for real data tokens
lastTokenWasDelimited = m_tokenizer.lastTokenWasDelimited();
}
// eat all empty tokens til the end of the row, if we're supposed to
if (m_frSettings.ignoreEmptyTokensAtEndOfRow()) {
lastTokenWasDelimited = false;
while (!m_frSettings.isRowDelimiter(token, m_tokenizer.lastTokenWasQuoted()) && token.equals("") && (!m_tokenizer.lastTokenWasQuoted())) {
try {
token = m_tokenizer.nextToken();
} catch (TokenizerException fte) {
throw prepareForException(fte.getMessage() + "(line: " + lineNr + " (" + rowHeader + "), source: '" + m_frSettings.getDataFileLocation() + "')", lineNr, rowHeader, row);
}
}
}
// data items in the file than we needed for one row: barf and die.
if (!m_frSettings.isRowDelimiter(token, m_tokenizer.lastTokenWasQuoted()) || lastTokenWasDelimited) {
FileReaderException ex = prepareForException("Too many data elements " + "(line: " + lineNr + " (" + rowHeader + "), source: '" + m_frSettings.getDataFileLocation() + "')", lineNr, rowHeader, row);
if (m_frSettings.getColumnNumDeterminingLineNumber() >= 0) {
ex.setDetailsMessage("The number of columns was " + "determined by line no." + m_frSettings.getColumnNumDeterminingLineNumber());
}
throw ex;
}
m_rowNumber++;
// report progress
// only if an execution context exists an if the underlying
// URL is a file whose size can be determined
double readBytes = m_source.getNumberOfBytesRead();
if (m_exec != null && m_source.getFileSize() > 0 && readBytes / PROGRESS_JUNK_SIZE > m_lastReport) {
// assert readBytes <= m_frSettings.getDataFileSize();
m_exec.setProgress(readBytes / m_source.getFileSize());
m_lastReport++;
}
return new DefaultRow(rowHeader, row);
}
use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class FileRowIterator method prepareForException.
/*
* !!!!!!!!!! Creates the exception object (storing the last read items in
* the row of the exception), sets the global "exception thrown" flag, and
* closes the input stream. !!!!!!!!!!
*/
private FileReaderException prepareForException(final String msg, final int lineNumber, final String rowHeader, final DataCell[] cellsRead) {
/*
* indicate we have thrown (actually will throw...) an exception, and
* close the stream as we will not read anymore from the stream after
* the exception.
*/
m_exceptionThrown = true;
m_tokenizer.closeSourceStream();
DataCell[] errCells = new DataCell[cellsRead.length];
System.arraycopy(cellsRead, 0, errCells, 0, errCells.length);
for (int c = 0; c < errCells.length; c++) {
if (errCells[c] == null) {
errCells[c] = DataType.getMissingCell();
}
}
String errRowHeader = "ERROR_ROW (" + rowHeader.toString() + ")";
DataRow errRow = new DefaultRow(errRowHeader, errCells);
return new FileReaderException(msg, errRow, lineNumber);
}
use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class ListFiles method addLocationToContainer.
/**
* Adds a File to the table.
*
* @param file
*/
private void addLocationToContainer(final URL url) throws UnsupportedEncodingException, URISyntaxException {
DataCell[] row = new DataCell[2];
if ("file".equalsIgnoreCase(url.getProtocol())) {
row[0] = new StringCell(Paths.get(url.toURI()).toString());
} else {
row[0] = new MissingCell("URL is remote and does not have a local location");
}
row[1] = new StringCell(url.toString());
m_dc.addRowToTable(new DefaultRow(RowKey.createRowKey(m_currentRowID), row));
m_currentRowID++;
}
use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class FixedWidthRowIterator method next.
/**
* {@inheritDoc}
*/
@Override
public DataRow next() {
int rowLength = m_tableSpec.getNumColumns();
int createdCols = 0;
String token = null;
String rowHeader;
if (!hasNext()) {
throw new NoSuchElementException("The row iterator proceeded beyond the last line of '" + m_nodeSettings.getFileLocation().toString() + "'.");
}
DataCell[] row;
if (!m_nodeSettings.getHasRowHeader()) {
rowHeader = "Row" + m_lineNumber++;
row = new DataCell[rowLength];
} else {
rowHeader = m_tokenizer.nextToken();
row = new DataCell[rowLength];
}
DataColumnSpec cSpec = null;
while (createdCols < rowLength) {
m_dataCellFactory.setMissingValuePattern(m_missingValuePatterns[createdCols]);
m_dataCellFactory.setFormatParameter(m_formatParameters[createdCols]);
token = m_tokenizer.nextToken();
if (!m_tokenizer.getReachedEndOfLine()) {
cSpec = m_tableSpec.getColumnSpec(createdCols);
DataCell result = m_dataCellFactory.createDataCellOfType(cSpec.getType(), token);
if (result != null) {
row[createdCols] = result;
} else {
// something went wrong during cell creation.
// figure out which column we were trying to read
int errCol = 0;
while (errCol < row.length && row[errCol] != null) {
errCol++;
}
// create an error message
String errorMsg = m_dataCellFactory.getErrorMessage();
errorMsg += " In line " + m_tokenizer.getLineNumber() + " (" + rowHeader + ") at column #" + errCol + " ('" + m_tableSpec.getColumnSpec(errCol).getName() + "').";
assert rowHeader != null;
// wrong, and close the stream
throw prepareForException(errorMsg, m_tokenizer.getLineNumber(), rowHeader, row);
}
} else {
// no more characters in this line but we need more columns
// just add missing cells
row[createdCols] = new MissingCell(null);
}
createdCols++;
}
double readBytes = m_inputStream.getNumberOfBytesRead();
if (m_exec != null && m_inputStream.getFileSize() > 0 && readBytes / PROGRESS_JUNK_SIZE > m_lastReport) {
// assert readBytes <= m_frSettings.getDataFileSize();
m_exec.setProgress(readBytes / m_inputStream.getFileSize());
m_lastReport++;
}
return new DefaultRow(rowHeader, row);
}
Aggregations