Search in sources :

Example 86 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class ColumnToGrid2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    String groupColumn = m_configuration.getGroupColumn();
    final ExecutionMonitor mainExec;
    final BufferedDataTable inputTable;
    if (groupColumn != null) {
        exec.setMessage("Sorting input table");
        BufferedDataTable in = inData[0];
        ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
        ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
        String[] relevantCols = new String[m_included.length + 1];
        System.arraycopy(m_included, 0, relevantCols, 0, m_included.length);
        relevantCols[relevantCols.length - 1] = groupColumn;
        sortFilterRearranger.keepOnly(relevantCols);
        BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
        SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
        inputTable = sorter.getBufferedDataTable();
        mainExec = exec.createSubProgress(0.5);
    } else {
        inputTable = inData[0];
        mainExec = exec;
    }
    exec.setMessage("Assembling output");
    DataTableSpec spec = inputTable.getDataTableSpec();
    DataTableSpec outSpec = createOutputSpec(spec);
    BufferedDataContainer cont = exec.createDataContainer(outSpec);
    int[] includeIndices = new int[m_included.length];
    for (int i = 0; i < m_included.length; i++) {
        int index = spec.findColumnIndex(m_included[i]);
        includeIndices[i] = index;
    }
    int gridCount = m_configuration.getColCount();
    final int cellCount;
    final int groupColIndex;
    if (groupColumn != null) {
        cellCount = includeIndices.length * gridCount + 1;
        groupColIndex = spec.findColumnIndex(groupColumn);
    } else {
        cellCount = includeIndices.length * gridCount;
        groupColIndex = -1;
    }
    final DataCell[] cells = new DataCell[cellCount];
    PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
    long currentRow = 0;
    long totalRows = inputTable.size();
    long currentOutRow = 0;
    DataCell curGroupValue = null;
    while (it.hasNext()) {
        Arrays.fill(cells, DataType.getMissingCell());
        // assign group column (if enabled)
        if (groupColIndex >= 0) {
            DataRow row = it.next();
            curGroupValue = row.getCell(groupColIndex);
            cells[cells.length - 1] = curGroupValue;
            it.pushBack(row);
        }
        for (int grid = 0; grid < gridCount; grid++) {
            if (!it.hasNext()) {
                break;
            }
            DataRow inRow = it.next();
            DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
            if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
                mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
                currentRow += 1;
                mainExec.checkCanceled();
                for (int i = 0; i < includeIndices.length; i++) {
                    cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
                }
            } else {
                // start new group, i.e. new row
                it.pushBack(inRow);
                break;
            }
        }
        RowKey key = RowKey.createRowKey(currentOutRow++);
        cont.addRowToTable(new DefaultRow(key, cells));
    }
    cont.close();
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) SortedTable(org.knime.base.data.sort.SortedTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 87 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class FileRowIterator method next.

/**
 * {@inheritDoc}
 */
@Override
public DataRow next() {
    int rowLength = m_tableSpec.getNumColumns();
    int colsToRead = m_skipColumns.length;
    assert rowLength <= colsToRead;
    String token = null;
    boolean isMissingCell;
    String rowHeader;
    DataCell[] row = new DataCell[rowLength];
    // lines (if we are supposed to).
    if (!hasNext()) {
        throw new NoSuchElementException("The row iterator proceeded beyond the last line of '" + m_frSettings.getDataFileLocation().toString() + "'.");
    }
    // counts the columns (tokens) read from the file
    int readCols = 0;
    // counts the number of columns we've created (excl. skipped columns)
    int createdCols = 0;
    // This will also read it from file, if supposed to.
    try {
        rowHeader = createRowHeader(m_rowNumber - 1);
    } catch (TokenizerException fte) {
        throw prepareForException(fte.getMessage() + " (line: " + m_tokenizer.getLineNumber() + " source: '" + m_frSettings.getDataFileLocation() + "')", m_tokenizer.getLineNumber(), "ERR", row);
    }
    // we made sure before that there is at least one token in the stream
    assert rowHeader != null;
    // if the last token ended with the delimiter (and not a LF)
    boolean lastTokenWasDelimited = false;
    // Now, read the columns until we have enough or see a row delimiter
    while (readCols < colsToRead) {
        try {
            token = m_tokenizer.nextToken();
        } catch (TokenizerException fte) {
            throw prepareForException(fte.getMessage() + " (line: " + m_tokenizer.getLineNumber() + " (" + rowHeader + ") source: '" + m_frSettings.getDataFileLocation() + "')", m_tokenizer.getLineNumber(), rowHeader, row);
        }
        if (token != null) {
            // remember the delimiter of the last token before the EOF
            lastTokenWasDelimited = m_tokenizer.lastTokenWasDelimited();
        }
        // row delims are returned as token
        if ((token == null) || m_frSettings.isRowDelimiter(token, m_tokenizer.lastTokenWasQuoted())) {
            // line ended early.
            m_tokenizer.pushBack();
            // we need the row delim in the file, for after the loop
            break;
        }
        // column delimiters).
        if (token.equals("") && (!m_tokenizer.lastTokenWasQuoted())) {
            isMissingCell = true;
        } else if (token.equals(m_frSettings.getMissingValueOfColumn(readCols))) {
            // equals(null) if it was not specified - which is fine.
            isMissingCell = true;
        } else {
            isMissingCell = false;
        }
        if (!m_skipColumns[readCols]) {
            DataColumnSpec cSpec = m_tableSpec.getColumnSpec(createdCols);
            // now get that new cell
            // (it throws an exception at us if it couldn't)
            row[createdCols] = createNewDataCellOfType(cSpec.getType(), token, isMissingCell, m_frSettings.getFormatParameterForColumn(readCols).orElse(null), rowHeader, row);
            createdCols++;
        }
        readCols++;
    }
    // but only if the last token was actually delimited (with a swallowed delimiter - not LF)
    if (token == null && readCols == colsToRead - 1 && lastTokenWasDelimited) {
        if (!m_skipColumns[readCols]) {
            row[createdCols++] = DataType.getMissingCell();
        }
        // we consumed this last delimiter:
        lastTokenWasDelimited = false;
    }
    int lineNr = m_tokenizer.getLineNumber();
    if ((lineNr > 0) && (token != null) && (token.equals("\n"))) {
        lineNr--;
    }
    // puke and die - unless we are told otherwise
    if (m_frSettings.getSupportShortLines()) {
        // pad the row with missing values
        while (createdCols < rowLength) {
            row[createdCols++] = DataType.getMissingCell();
        }
    } else {
        if (createdCols < rowLength) {
            FileReaderException ex = prepareForException("Too few data elements " + "(line: " + lineNr + " (" + rowHeader + "), source: '" + m_frSettings.getDataFileLocation() + "')", lineNr, rowHeader, row);
            if (m_frSettings.getColumnNumDeterminingLineNumber() >= 0) {
                ex.setDetailsMessage("The number of columns was " + "determined by the entries above line no." + m_frSettings.getColumnNumDeterminingLineNumber());
            }
            throw ex;
        }
    }
    token = m_tokenizer.nextToken();
    if (!m_frSettings.isRowDelimiter(token, m_tokenizer.lastTokenWasQuoted())) {
        // flag for real data tokens
        lastTokenWasDelimited = m_tokenizer.lastTokenWasDelimited();
    }
    // eat all empty tokens til the end of the row, if we're supposed to
    if (m_frSettings.ignoreEmptyTokensAtEndOfRow()) {
        lastTokenWasDelimited = false;
        while (!m_frSettings.isRowDelimiter(token, m_tokenizer.lastTokenWasQuoted()) && token.equals("") && (!m_tokenizer.lastTokenWasQuoted())) {
            try {
                token = m_tokenizer.nextToken();
            } catch (TokenizerException fte) {
                throw prepareForException(fte.getMessage() + "(line: " + lineNr + " (" + rowHeader + "), source: '" + m_frSettings.getDataFileLocation() + "')", lineNr, rowHeader, row);
            }
        }
    }
    // data items in the file than we needed for one row: barf and die.
    if (!m_frSettings.isRowDelimiter(token, m_tokenizer.lastTokenWasQuoted()) || lastTokenWasDelimited) {
        FileReaderException ex = prepareForException("Too many data elements " + "(line: " + lineNr + " (" + rowHeader + "), source: '" + m_frSettings.getDataFileLocation() + "')", lineNr, rowHeader, row);
        if (m_frSettings.getColumnNumDeterminingLineNumber() >= 0) {
            ex.setDetailsMessage("The number of columns was " + "determined by line no." + m_frSettings.getColumnNumDeterminingLineNumber());
        }
        throw ex;
    }
    m_rowNumber++;
    // report progress
    // only if an execution context exists an if the underlying
    // URL is a file whose size can be determined
    double readBytes = m_source.getNumberOfBytesRead();
    if (m_exec != null && m_source.getFileSize() > 0 && readBytes / PROGRESS_JUNK_SIZE > m_lastReport) {
        // assert readBytes <= m_frSettings.getDataFileSize();
        m_exec.setProgress(readBytes / m_source.getFileSize());
        m_lastReport++;
    }
    return new DefaultRow(rowHeader, row);
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) TokenizerException(org.knime.core.util.tokenizer.TokenizerException) DefaultRow(org.knime.core.data.def.DefaultRow) NoSuchElementException(java.util.NoSuchElementException)

Example 88 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class FileRowIterator method prepareForException.

/*
     * !!!!!!!!!! Creates the exception object (storing the last read items in
     * the row of the exception), sets the global "exception thrown" flag, and
     * closes the input stream. !!!!!!!!!!
     */
private FileReaderException prepareForException(final String msg, final int lineNumber, final String rowHeader, final DataCell[] cellsRead) {
    /*
         * indicate we have thrown (actually will throw...) an exception, and
         * close the stream as we will not read anymore from the stream after
         * the exception.
         */
    m_exceptionThrown = true;
    m_tokenizer.closeSourceStream();
    DataCell[] errCells = new DataCell[cellsRead.length];
    System.arraycopy(cellsRead, 0, errCells, 0, errCells.length);
    for (int c = 0; c < errCells.length; c++) {
        if (errCells[c] == null) {
            errCells[c] = DataType.getMissingCell();
        }
    }
    String errRowHeader = "ERROR_ROW (" + rowHeader.toString() + ")";
    DataRow errRow = new DefaultRow(errRowHeader, errCells);
    return new FileReaderException(msg, errRow, lineNumber);
}
Also used : DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) DataRow(org.knime.core.data.DataRow)

Example 89 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class ListFiles method addLocationToContainer.

/**
 * Adds a File to the table.
 *
 * @param file
 */
private void addLocationToContainer(final URL url) throws UnsupportedEncodingException, URISyntaxException {
    DataCell[] row = new DataCell[2];
    if ("file".equalsIgnoreCase(url.getProtocol())) {
        row[0] = new StringCell(Paths.get(url.toURI()).toString());
    } else {
        row[0] = new MissingCell("URL is remote and does not have a local location");
    }
    row[1] = new StringCell(url.toString());
    m_dc.addRowToTable(new DefaultRow(RowKey.createRowKey(m_currentRowID), row));
    m_currentRowID++;
}
Also used : StringCell(org.knime.core.data.def.StringCell) MissingCell(org.knime.core.data.MissingCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 90 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class FixedWidthRowIterator method next.

/**
 * {@inheritDoc}
 */
@Override
public DataRow next() {
    int rowLength = m_tableSpec.getNumColumns();
    int createdCols = 0;
    String token = null;
    String rowHeader;
    if (!hasNext()) {
        throw new NoSuchElementException("The row iterator proceeded beyond the last line of '" + m_nodeSettings.getFileLocation().toString() + "'.");
    }
    DataCell[] row;
    if (!m_nodeSettings.getHasRowHeader()) {
        rowHeader = "Row" + m_lineNumber++;
        row = new DataCell[rowLength];
    } else {
        rowHeader = m_tokenizer.nextToken();
        row = new DataCell[rowLength];
    }
    DataColumnSpec cSpec = null;
    while (createdCols < rowLength) {
        m_dataCellFactory.setMissingValuePattern(m_missingValuePatterns[createdCols]);
        m_dataCellFactory.setFormatParameter(m_formatParameters[createdCols]);
        token = m_tokenizer.nextToken();
        if (!m_tokenizer.getReachedEndOfLine()) {
            cSpec = m_tableSpec.getColumnSpec(createdCols);
            DataCell result = m_dataCellFactory.createDataCellOfType(cSpec.getType(), token);
            if (result != null) {
                row[createdCols] = result;
            } else {
                // something went wrong during cell creation.
                // figure out which column we were trying to read
                int errCol = 0;
                while (errCol < row.length && row[errCol] != null) {
                    errCol++;
                }
                // create an error message
                String errorMsg = m_dataCellFactory.getErrorMessage();
                errorMsg += " In line " + m_tokenizer.getLineNumber() + " (" + rowHeader + ") at column #" + errCol + " ('" + m_tableSpec.getColumnSpec(errCol).getName() + "').";
                assert rowHeader != null;
                // wrong, and close the stream
                throw prepareForException(errorMsg, m_tokenizer.getLineNumber(), rowHeader, row);
            }
        } else {
            // no more characters in this line but we need more columns
            // just add missing cells
            row[createdCols] = new MissingCell(null);
        }
        createdCols++;
    }
    double readBytes = m_inputStream.getNumberOfBytesRead();
    if (m_exec != null && m_inputStream.getFileSize() > 0 && readBytes / PROGRESS_JUNK_SIZE > m_lastReport) {
        // assert readBytes <= m_frSettings.getDataFileSize();
        m_exec.setProgress(readBytes / m_inputStream.getFileSize());
        m_lastReport++;
    }
    return new DefaultRow(rowHeader, row);
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) MissingCell(org.knime.core.data.MissingCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) NoSuchElementException(java.util.NoSuchElementException)

Aggregations

DefaultRow (org.knime.core.data.def.DefaultRow)207 DataCell (org.knime.core.data.DataCell)165 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)94 DataTableSpec (org.knime.core.data.DataTableSpec)92 DataRow (org.knime.core.data.DataRow)88 RowKey (org.knime.core.data.RowKey)80 DoubleCell (org.knime.core.data.def.DoubleCell)66 StringCell (org.knime.core.data.def.StringCell)65 BufferedDataTable (org.knime.core.node.BufferedDataTable)56 IntCell (org.knime.core.data.def.IntCell)46 ArrayList (java.util.ArrayList)26 DataType (org.knime.core.data.DataType)26 DataColumnSpec (org.knime.core.data.DataColumnSpec)22 DataContainer (org.knime.core.data.container.DataContainer)21 HashSet (java.util.HashSet)18 LinkedHashMap (java.util.LinkedHashMap)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 LinkedHashSet (java.util.LinkedHashSet)14 DoubleValue (org.knime.core.data.DoubleValue)14 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)14