Search in sources :

Example 1 with Quote

use of org.knime.core.util.tokenizer.Quote in project knime-core by knime.

the class FileAnalyzerTest method testQuote.

/**
 * makes sure double quotes and single quotes are only supported when they
 * appear in even numbers.
 */
public void testQuote() {
    URL url;
    FileReaderNodeSettings settings;
    FileReaderNodeSettings analSettings;
    try {
        /*
             * nice quoting
             */
        url = initTempFile("\"col1\",'col2',col3,col4\n" + "\"foo\",poo,\"moo\",zoo\n" + "oof,'oo p',oom,' ooz '");
        settings = new FileReaderNodeSettings();
        settings.setDataFileLocationAndUpdateTableName(url);
        analSettings = FileAnalyzer.analyze(settings, null);
        assertTrue(analSettings.getFileHasColumnHeaders());
        assertEquals(analSettings.getNumberOfColumns(), 4);
        Vector<Quote> quotes = analSettings.getAllQuotes();
        // we support '"' and '
        assertEquals(quotes.size(), 2);
        assertEquals((quotes.get(0)).getLeft(), "\"");
        assertEquals((quotes.get(0)).getRight(), "\"");
        assertTrue((quotes.get(0)).hasEscapeChar());
        assertEquals((quotes.get(0)).getEscape(), '\\');
        assertEquals((quotes.get(1)).getLeft(), "'");
        assertEquals((quotes.get(1)).getRight(), "'");
        assertTrue((quotes.get(1)).hasEscapeChar());
        assertEquals((quotes.get(1)).getEscape(), '\\');
        /*
             * the tick (') is part of the data - don't consider it a quote (it
             * must show up an odd number of times...)
             */
        url = initTempFile("\"col1\",col2,col3,col4\n" + "\"foo\",poo,\"moo\",zoo\n" + "oo'f,o'op,o'om,ooz");
        settings = new FileReaderNodeSettings();
        settings.setDataFileLocationAndUpdateTableName(url);
        analSettings = FileAnalyzer.analyze(settings, null);
        assertTrue(analSettings.getFileHasColumnHeaders());
        assertEquals(analSettings.getNumberOfColumns(), 4);
        quotes = analSettings.getAllQuotes();
        // we support '"' still
        assertEquals(quotes.size(), 1);
        assertEquals(quotes.get(0).getLeft(), "\"");
        assertEquals((quotes.get(0)).getRight(), "\"");
        assertTrue((quotes.get(0)).hasEscapeChar());
        assertEquals((quotes.get(0)).getEscape(), '\\');
        /*
             * there is also a single double quote in the data
             */
        url = initTempFile("\"col1,col2,col3,col4\n" + "fo\"o,poo,moo,zoo\n" + "oo'f,o'op,o'om,ooz");
        settings = new FileReaderNodeSettings();
        settings.setDataFileLocationAndUpdateTableName(url);
        analSettings = FileAnalyzer.analyze(settings, null);
        assertEquals(analSettings.getNumberOfColumns(), 4);
        assertEquals(analSettings.getAllQuotes().size(), 0);
        /*
             * don't stumble over escaped quotes
             */
        url = initTempFile("col1,col2,col3,col4\n" + "\"foo\",\"po\\\"o\",moo,zoo\n" + "oo'f,o'op,o'om,ooz");
        settings = new FileReaderNodeSettings();
        settings.setDataFileLocationAndUpdateTableName(url);
        analSettings = FileAnalyzer.analyze(settings, null);
        assertEquals(analSettings.getNumberOfColumns(), 4);
        // we must support the double quotes with the escape char
        quotes = analSettings.getAllQuotes();
        assertEquals(quotes.size(), 1);
        assertEquals((quotes.get(0)).getLeft(), "\"");
        assertEquals((quotes.get(0)).getRight(), "\"");
        assertTrue((quotes.get(0)).hasEscapeChar());
        assertEquals((quotes.get(0)).getEscape(), '\\');
    } catch (IOException ioe) {
        // if this goes off the temp file couldn't be created.
        assertTrue(false);
    }
}
Also used : Quote(org.knime.core.util.tokenizer.Quote) IOException(java.io.IOException) URL(java.net.URL)

Example 2 with Quote

use of org.knime.core.util.tokenizer.Quote in project knime-core by knime.

the class QuotePanel method overrideSettings.

/**
 * Deletes all quotes defined in the passed object, reads the currently
 * listed quotes from the JList and adds them to the settings object.
 *
 * @param settings the settings object to replace the quotes in with the
 *            quotes currently defined in the panel
 * @return true if the new settings are different from the one passed in.
 */
boolean overrideSettings(final FileReaderNodeSettings settings) {
    // save'm to decide whether the new settings are different
    Vector<Quote> oldQuotes = settings.getAllQuotes();
    settings.removeAllQuotes();
    for (int i = 0; i < m_currQuotes.getModel().getSize(); i++) {
        String lEntry = (String) m_currQuotes.getModel().getElementAt(i);
        String quotes = getQuotePattern(lEntry);
        int escChar = getEscCharacter(lEntry);
        if (escChar != -1) {
            settings.addQuotePattern(quotes, quotes, (char) escChar);
        } else {
            settings.addQuotePattern(quotes, quotes);
        }
    }
    // fix the settings.
    settings.setQuoteUserSet(true);
    // LF support
    settings.allowLFinQuotes(m_allowLFCheckbox.isSelected());
    // decide whether we need to re-analyze the file (whether we have
    // new quote settings)
    Vector<Quote> newQuotes = settings.getAllQuotes();
    if (newQuotes.size() != oldQuotes.size()) {
        // need to re-analyze with different quotes.
        return true;
    }
    for (Quote q : newQuotes) {
        if (!oldQuotes.contains(q)) {
            return true;
        }
    }
    return false;
}
Also used : Quote(org.knime.core.util.tokenizer.Quote)

Example 3 with Quote

use of org.knime.core.util.tokenizer.Quote in project knime-core by knime.

the class FileAnalyzer method analyze.

/**
 * Tries to guess FileReader settings for the passed data file. It will use the settings in the settings object (if
 * any - but the file location is required), and will read in the first lines from the file. It will first detect
 * comment characters (if the first lines start with '#' or '%'), and then guess the delimiter (',', ';', or space)
 * depending on which cuts a line into (more than one) tokens.
 *
 * @param userSettings containing the URL of the file to examine and settings that should be used and considered
 *            fixed.
 * @param exec used to check for cancellations and to report progress. Could be null. If a
 *            {@link FileReaderExecutionMonitor} is provided it is distinguished between user cancellations cutting
 *            the analysis short, and interrupts that return immediately and return null as result.
 * @return settings that supposably provide more or less useful results. It will always be a non-null object - but
 *         may not contain any settings if guessing was just too hard.
 * @throws IOException if there was an error reading from the URL
 */
public static FileReaderNodeSettings analyze(final FileReaderNodeSettings userSettings, final ExecutionMonitor exec) throws IOException {
    if (userSettings.getDataFileLocation() == null) {
        throw new IllegalArgumentException("Must specify a valid file location for the file analyzer");
    }
    ExecutionMonitor execMon = exec;
    if (execMon == null) {
        // we create a default exec monitor. Doesn't hurt, because that
        // will never be canceled.
        execMon = new FileReaderExecutionMonitor();
    }
    // create the new and empty settings
    FileReaderNodeSettings result = new FileReaderNodeSettings();
    execMon.setProgress(0.0);
    try {
        result.setDataFileLocationAndUpdateTableName(userSettings.getDataFileLocation());
        result.setDecimalSeparator(userSettings.getDecimalSeparator());
        result.setThousandsSeparator(userSettings.getThousandsSeparator());
        result.setDecimalSeparatorUserSet(userSettings.decimalSeparatorUserSet());
        result.setUniquifyRowIDs(userSettings.uniquifyRowIDs());
        result.setMaximumNumberOfRowsToRead(userSettings.getMaximumNumberOfRowsToRead());
        result.setSkipFirstLines(userSettings.getSkipFirstLines());
        result.allowLFinQuotes(userSettings.allowLFinQuotes());
        result.setCharsetName(userSettings.getCharsetName());
        result.setAnalyzeUsedAllRows(true);
        result.setMissValuePatternStrCols(userSettings.getMissValuePatternStrCols());
        result.setConnectTimeout(userSettings.getConnectTimeout());
        // if the user didn't provide the charset, identify it by looking at the first bytes of the stream
        if (!userSettings.isCharsetUserSet()) {
            result.setCharsetName(guessCharSet(userSettings));
            result.setCharsetUserSet(false);
        } else {
            result.setCharsetName(userSettings.getCharsetName());
            result.setCharsetUserSet(true);
        }
        ExecutionMonitor subExec = execMon.createSubProgress(COMMENT_SUB);
        if (!userSettings.isCommentUserSet()) {
            // only guess comment patterns if user didn't provide any
            addComments(result, subExec);
            result.setCommentUserSet(false);
        } else {
            // take over user settings.
            for (Comment comment : userSettings.getAllComments()) {
                result.addBlockCommentPattern(comment.getBegin(), comment.getEnd(), comment.returnAsSeparateToken(), comment.includeInToken());
            }
            result.setCommentUserSet(true);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        subExec = execMon.createSubProgress(QUOTES_SUB);
        if (!userSettings.isQuoteUserSet()) {
            // only guess quotes if user didn't specify any
            addQuotes(result, subExec);
            result.setQuoteUserSet(false);
        } else {
            // take over user settings.
            for (Quote quote : userSettings.getAllQuotes()) {
                if (quote.hasEscapeChar()) {
                    result.addQuotePattern(quote.getLeft(), quote.getRight(), quote.getEscape());
                } else {
                    result.addQuotePattern(quote.getLeft(), quote.getRight());
                }
            }
            result.setQuoteUserSet(true);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // if user provided whitespace characters, we need to add them.
        if (userSettings.isWhiteSpaceUserSet()) {
            for (String ws : userSettings.getAllWhiteSpaces()) {
                result.addWhiteSpaceCharacter(ws);
            }
            result.setWhiteSpaceUserSet(true);
        } else {
            result.addWhiteSpaceCharacter(" ");
            result.addWhiteSpaceCharacter("\t");
            result.setWhiteSpaceUserSet(false);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // for now we just take over this flag:
        result.setSupportShortLines(userSettings.getSupportShortLines());
        // sets delimiter and column numbers (as many columns as it gets
        // with the delimiters - regardless of any row headers);
        // honors user settings
        subExec = execMon.createSubProgress(DELIMS_SUB);
        setDelimitersAndColNum(userSettings, result, subExec);
        assert result.getNumberOfColumns() > 0;
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // the number of column set as of now does not take into account the
        // skipped columns.
        subExec = execMon.createSubProgress(ROWHDR_SUB);
        if (userSettings.isFileHasRowHeadersUserSet()) {
            result.setFileHasRowHeaders(userSettings.getFileHasRowHeaders());
            result.setFileHasRowHeadersUserSet(true);
        } else {
            boolean hasRowHeaders;
            if (result.getNumberOfColumns() > 1) {
                // if we have at least 2 cols, one of them could be headers
                hasRowHeaders = checkRowHeader(result, subExec);
            } else {
                hasRowHeaders = false;
            }
            result.setFileHasRowHeaders(hasRowHeaders);
            result.setFileHasRowHeadersUserSet(false);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // we must correct the column number we've guessed
        if (result.getFileHasRowHeaders()) {
            result.setNumberOfColumns(result.getNumberOfColumns() - 1);
        }
        // guesses (or copies) column types and names.
        subExec = execMon.createSubProgress(TYPES_SUB + COLHDR_SUB);
        Vector<ColProperty> columnProps = createColumnProperties(userSettings, result, subExec);
        result.setColumnProperties(columnProps);
        subExec.setProgress(1.0);
        // set a default row header prefix
        if (userSettings.getRowHeaderPrefix() != null) {
            result.setRowHeaderPrefix(userSettings.getRowHeaderPrefix());
        } else {
            result.setRowHeaderPrefix("Row");
        }
        if (userSettings.isIgnoreEmptyLinesUserSet()) {
            result.setIgnoreEmptyLines(userSettings.getIgnoreEmtpyLines());
            result.setIgnoreEmptyLinesUserSet(true);
        } else {
            result.setIgnoreEmptyLines(true);
            result.setIgnoreEmptyLinesUserSet(false);
        }
        execMon.setProgress(1.0);
    } catch (InterruptedExecutionException iee) {
        return null;
    }
    return result;
}
Also used : Quote(org.knime.core.util.tokenizer.Quote) Comment(org.knime.core.util.tokenizer.Comment) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 4 with Quote

use of org.knime.core.util.tokenizer.Quote in project knime-core by knime.

the class QuotePanel method loadSettings.

/**
 * Transfers the values from the specified object into the components of the
 * panel.
 *
 * @param settings the object containing the settings to display
 */
private void loadSettings(final FileReaderNodeSettings settings) {
    // take over the quotes defined in the settings object into our JList
    final Vector<String> newModel = new Vector<String>();
    for (Quote q : settings.getAllQuotes()) {
        newModel.add(getListEntry(q));
    }
    m_currQuotes.setModel(new AbstractListModel() {

        public int getSize() {
            return newModel.size();
        }

        public Object getElementAt(final int index) {
            return newModel.get(index);
        }
    });
    // also clear the edit field
    getQEditField().setText("");
    clearErrorText();
    m_allowLFCheckbox.setSelected(settings.allowLFinQuotes());
}
Also used : Quote(org.knime.core.util.tokenizer.Quote) AbstractListModel(javax.swing.AbstractListModel) Vector(java.util.Vector)

Aggregations

Quote (org.knime.core.util.tokenizer.Quote)4 IOException (java.io.IOException)1 URL (java.net.URL)1 Vector (java.util.Vector)1 AbstractListModel (javax.swing.AbstractListModel)1 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)1 Comment (org.knime.core.util.tokenizer.Comment)1