Search in sources :

Example 1 with DuplicateKeyException

use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.

the class VariableFileReaderNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    Map<String, FlowVariable> stack = createStack(m_frSettings.getVariableName());
    VariableFileReaderNodeSettings settings = m_frSettings.createSettingsFrom(stack);
    LOGGER.info("Preparing to read from '" + m_frSettings.getDataFileLocation().toString() + "'.");
    // check again the settings - especially file existence (under Linux
    // files could be deleted/renamed since last config-call...
    SettingsStatus status = settings.getStatusOfSettings(true, null);
    if (status.getNumOfErrors() > 0) {
        throw new InvalidSettingsException(status.getAllErrorMessages(10));
    }
    DataTableSpec tSpec = settings.createDataTableSpec();
    FileTable fTable = new FileTable(tSpec, settings, settings.getSkippedColumns(), exec);
    // create a DataContainer and fill it with the rows read. It is faster
    // then reading the file every time (for each row iterator), and it
    // collects the domain for each column for us. Also, if things fail,
    // the error message is printed during file reader execution (were it
    // belongs to) and not some time later when a node uses the row
    // iterator from the file table.
    BufferedDataContainer c = exec.createDataContainer(fTable.getDataTableSpec(), /* initDomain= */
    true);
    int row = 0;
    FileRowIterator it = fTable.iterator();
    try {
        if (it.getZipEntryName() != null) {
            // seems we are reading a ZIP archive.
            LOGGER.info("Reading entry '" + it.getZipEntryName() + "' from the specified ZIP archive.");
        }
        while (it.hasNext()) {
            row++;
            DataRow next = it.next();
            String message = "Caching row #" + row + " (\"" + next.getKey() + "\")";
            exec.setMessage(message);
            exec.checkCanceled();
            c.addRowToTable(next);
        }
        if (it.zippedSourceHasMoreEntries()) {
            // after reading til the end of the file this returns a valid
            // result
            setWarningMessage("Source is a ZIP archive with multiple " + "entries. Only reading first entry!");
        }
    } catch (DuplicateKeyException dke) {
        String msg = dke.getMessage();
        if (msg == null) {
            msg = "Duplicate row IDs";
        }
        msg += ". Consider making IDs unique in the advanced settings.";
        DuplicateKeyException newDKE = new DuplicateKeyException(msg);
        newDKE.initCause(dke);
        throw newDKE;
    } finally {
        c.close();
    }
    // user settings allow for truncating the table
    if (it.iteratorEndedEarly()) {
        setWarningMessage("Data was truncated due to user settings.");
    }
    BufferedDataTable out = c.getTable();
    // closes all sources.
    fTable.dispose();
    return new BufferedDataTable[] { out };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) SettingsStatus(org.knime.core.util.tokenizer.SettingsStatus) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) FlowVariable(org.knime.core.node.workflow.FlowVariable)

Example 2 with DuplicateKeyException

use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.

the class DataContainerTest method testDuplicateKey.

/**
 * method being tested: addRowToTable().
 */
public final void testDuplicateKey() {
    String[] colNames = new String[] { "Column 1", "Column 2" };
    DataType[] colTypes = new DataType[] { StringCell.TYPE, IntCell.TYPE };
    DataTableSpec spec1 = new DataTableSpec(colNames, colTypes);
    DataContainer c = new DataContainer(spec1);
    RowKey r1Key = new RowKey("row 1");
    DataCell r1Cell1 = new StringCell("Row 1, Cell 1");
    DataCell r1Cell2 = new IntCell(12);
    DataRow r1 = new DefaultRow(r1Key, new DataCell[] { r1Cell1, r1Cell2 });
    RowKey r2Key = new RowKey("row 2");
    DataCell r2Cell1 = new StringCell("Row 2, Cell 1");
    DataCell r2Cell2 = new IntCell(22);
    DataRow r2 = new DefaultRow(r2Key, new DataCell[] { r2Cell1, r2Cell2 });
    c.addRowToTable(r1);
    c.addRowToTable(r2);
    // add row 1 twice
    try {
        c.addRowToTable(r1);
        c.close();
        // ... eh eh, you don't do this
        fail("Expected " + DuplicateKeyException.class + " not thrown");
    } catch (DuplicateKeyException e) {
        NodeLogger.getLogger(getClass()).debug("Got expected exception: " + e.getClass(), e);
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException) IntCell(org.knime.core.data.def.IntCell) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 3 with DuplicateKeyException

use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.

the class GroupLoopStartNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    // /////////////////////////
    // 
    // / DATA TABLES (SORTING)
    // 
    // /////////////////////////
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getDataTableSpec();
    if (table.size() <= 0) {
        m_endLoop = true;
    }
    // parameters
    m_includedColIndices = getIncludedColIndices(table.getDataTableSpec());
    boolean checkDuplicates = m_sortedInputTableModel.getBooleanValue();
    // remember table and sort table if necessary
    if (m_iteration == 0) {
        assert getLoopEndNode() == null : "1st iteration but end node set";
        m_table = table;
        m_spec = m_table.getDataTableSpec();
        // sort if not already sorted
        if (!m_sortedInputTableModel.getBooleanValue()) {
            // asc
            final String[] includes = m_filterGroupColModel.applyTo(spec).getIncludes();
            boolean[] sortAsc = new boolean[includes.length];
            Arrays.fill(sortAsc, true);
            BufferedDataTableSorter tableSorter = new BufferedDataTableSorter(table, Arrays.asList(includes), sortAsc, false);
            m_sortedTable = tableSorter.sort(exec);
        } else {
            // no sort necessary
            m_sortedTable = table;
        }
        m_iterator = m_sortedTable.iterator();
    } else {
        assert getLoopEndNode() != null : "No end node set";
        assert table == m_table : "Input tables differ between iterations";
    }
    // /////////////////////////
    // 
    // / INIT
    // 
    // /////////////////////////
    BufferedDataContainer cont = exec.createDataContainer(table.getSpec());
    // create new duplicate checker if null
    if (m_duplicateChecker == null) {
        m_duplicateChecker = new DuplicateChecker();
    }
    // initialize grouping states if null
    if (m_currentGroupingState == null) {
        m_currentGroupingState = new GroupingState("", false, null);
    }
    m_lastGroupingState = m_currentGroupingState;
    // add now to new group
    if (m_lastRow != null) {
        cont.addRowToTable(m_lastRow);
    }
    // if the final row has been reached and added set end loop flag
    if (m_isFinalRow) {
        m_endLoop = true;
    }
    // walk trough input table and group data
    // as long as new row fits into the current group or there are no more
    // rows left.
    boolean groupEnd = false;
    while (!groupEnd && m_iterator.hasNext()) {
        DataRow row = m_iterator.next();
        // get grouping state according to new row
        m_currentGroupingState = getGroupingState(row);
        groupEnd = m_currentGroupingState.isGroupEnd();
        // to duplicate checker.
        if (m_lastRow == null) {
            m_lastGroupingState = m_currentGroupingState;
            if (checkDuplicates) {
                m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
            }
        }
        m_lastRow = row;
        // if group end has not been reached add row
        if (!groupEnd) {
            cont.addRowToTable(row);
            m_lastGroupingState = m_currentGroupingState;
        // if group end has been reached add identifier of new group to
        // duplicate checker
        } else {
            if (checkDuplicates) {
                try {
                    m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
                } catch (DuplicateKeyException e) {
                    throw new DuplicateKeyException("Input table was " + "not sorted, found duplicate (group identifier:" + m_currentGroupingState.getGroupIdentifier() + ")");
                }
            }
        }
        // which row will be added.
        if (!m_iterator.hasNext() && !m_isFinalRow) {
            m_isFinalRow = true;
            // thus end loop
            if (!groupEnd) {
                m_endLoop = true;
            }
        }
    }
    cont.close();
    if (m_endLoop) {
        // check for duplicates and throw exception if duplicate exist
        try {
            m_duplicateChecker.checkForDuplicates();
        } catch (DuplicateKeyException e) {
            throw new DuplicateKeyException("Input table was not sorted, found duplicate group identifier " + e.getKey());
        } finally {
            m_duplicateChecker.clear();
            m_duplicateChecker = null;
        }
    }
    // push variables
    pushFlowVariableInt("currentIteration", m_iteration);
    pushGroupColumnValuesAsFlowVariables(m_lastGroupingState);
    pushFlowVariableString("groupIdentifier", m_lastGroupingState.getGroupIdentifier());
    m_iteration++;
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) BufferedDataTableSorter(org.knime.core.data.sort.BufferedDataTableSorter) DuplicateChecker(org.knime.core.util.DuplicateChecker) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException)

Example 4 with DuplicateKeyException

use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.

the class FileReaderNodeModel method createStreamableOperator.

@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        @Override
        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
            assert inputs.length == 0;
            LOGGER.info("Preparing to read from '" + m_frSettings.getDataFileLocation().toString() + "'.");
            // check again the settings - especially file existence (under Linux
            // files could be deleted/renamed since last config-call...
            SettingsStatus status = m_frSettings.getStatusOfSettings(true, null);
            if (status.getNumOfErrors() > 0) {
                throw new InvalidSettingsException(status.getAllErrorMessages(10));
            }
            DataTableSpec tSpec = m_frSettings.createDataTableSpec();
            FileTable fTable = new FileTable(tSpec, m_frSettings, m_frSettings.getSkippedColumns(), exec);
            // data output port
            RowOutput rowOutput = (RowOutput) outputs[0];
            int row = 0;
            FileRowIterator it = fTable.iterator();
            try {
                if (it.getZipEntryName() != null) {
                    // seems we are reading a ZIP archive.
                    LOGGER.info("Reading entry '" + it.getZipEntryName() + "' from the specified ZIP archive.");
                }
                while (it.hasNext()) {
                    row++;
                    DataRow next = it.next();
                    final int finalRow = row;
                    exec.setMessage(() -> "Reading row #" + finalRow + " (\"" + next.getKey() + "\")");
                    exec.checkCanceled();
                    rowOutput.push(next);
                }
                rowOutput.close();
                if (it.zippedSourceHasMoreEntries()) {
                    // after reading til the end of the file this returns a valid
                    // result
                    setWarningMessage("Source is a ZIP archive with multiple " + "entries. Only reading first entry!");
                }
            } catch (DuplicateKeyException dke) {
                String msg = dke.getMessage();
                if (msg == null) {
                    msg = "Duplicate row IDs";
                }
                msg += ". Consider making IDs unique in the advanced settings.";
                DuplicateKeyException newDKE = new DuplicateKeyException(msg);
                newDKE.initCause(dke);
                throw newDKE;
            }
            // user settings allow for truncating the table
            if (it.iteratorEndedEarly()) {
                setWarningMessage("Data was truncated due to user settings.");
            }
            // closes all sources.
            fTable.dispose();
        }
    };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataTableRowOutput(org.knime.core.node.streamable.BufferedDataTableRowOutput) RowOutput(org.knime.core.node.streamable.RowOutput) ExecutionContext(org.knime.core.node.ExecutionContext) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) SettingsStatus(org.knime.core.util.tokenizer.SettingsStatus) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException)

Example 5 with DuplicateKeyException

use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.

the class DataContainer method close.

/**
 * Closes container and creates table that can be accessed by <code>getTable()</code>. Successive calls of
 * <code>addRowToTable</code> will fail with an exception.
 *
 * @throws IllegalStateException If container is not open.
 * @throws DuplicateKeyException If the final check for duplicate row keys fails.
 * @throws DataContainerException If the duplicate check fails for an unknown IO problem
 */
public void close() {
    if (isClosed()) {
        return;
    }
    if (m_buffer == null) {
        m_buffer = m_bufferCreator.createBuffer(m_spec, m_maxRowsInMemory, createInternalBufferID(), getGlobalTableRepository(), getLocalTableRepository(), getFileStoreHandler());
    }
    if (!m_isSynchronousWrite) {
        try {
            offerToAsynchronousQueue(CONTAINER_CLOSE);
            m_asyncAddFuture.get();
            checkAsyncWriteThrowable();
        } catch (InterruptedException e) {
            throw new DataContainerException("Adding rows to table was interrupted", e);
        } catch (ExecutionException e) {
            throw new DataContainerException("Adding rows to table threw exception", e);
        }
    }
    // create table spec _after_ all_ rows have been added (i.e. wait for
    // asynchronous write thread to finish)
    DataTableSpec finalSpec = m_domainCreator.createSpec();
    m_buffer.close(finalSpec);
    try {
        m_duplicateChecker.checkForDuplicates();
    } catch (IOException ioe) {
        throw new DataContainerException("Failed to check for duplicate row IDs", ioe);
    } catch (DuplicateKeyException dke) {
        String key = dke.getKey();
        throw new DuplicateKeyException("Found duplicate row ID \"" + key + "\" (at unknown position)", key);
    }
    m_table = new ContainerTable(m_buffer);
    getLocalTableRepository().put(m_table.getBufferID(), m_table);
    m_buffer = null;
    m_spec = null;
    m_duplicateChecker.clear();
    m_duplicateChecker = null;
    m_domainCreator = null;
    m_size = -1;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) IOException(java.io.IOException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) DuplicateKeyException(org.knime.core.util.DuplicateKeyException)

Aggregations

DuplicateKeyException (org.knime.core.util.DuplicateKeyException)7 DataRow (org.knime.core.data.DataRow)6 DataTableSpec (org.knime.core.data.DataTableSpec)6 RowKey (org.knime.core.data.RowKey)3 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3 BufferedDataTable (org.knime.core.node.BufferedDataTable)3 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)3 IOException (java.io.IOException)2 DataCell (org.knime.core.data.DataCell)2 DefaultRow (org.knime.core.data.def.DefaultRow)2 DuplicateChecker (org.knime.core.util.DuplicateChecker)2 SettingsStatus (org.knime.core.util.tokenizer.SettingsStatus)2 Comparator (java.util.Comparator)1 ExecutionException (java.util.concurrent.ExecutionException)1 SortedTable (org.knime.base.data.sort.SortedTable)1 DataType (org.knime.core.data.DataType)1 IntCell (org.knime.core.data.def.IntCell)1 StringCell (org.knime.core.data.def.StringCell)1 BufferedDataTableSorter (org.knime.core.data.sort.BufferedDataTableSorter)1 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)1