Search in sources :

Example 1 with Comment

use of org.knime.core.util.tokenizer.Comment in project knime-core by knime.

the class FileAnalyzer method analyze.

/**
 * Tries to guess FileReader settings for the passed data file. It will use the settings in the settings object (if
 * any - but the file location is required), and will read in the first lines from the file. It will first detect
 * comment characters (if the first lines start with '#' or '%'), and then guess the delimiter (',', ';', or space)
 * depending on which cuts a line into (more than one) tokens.
 *
 * @param userSettings containing the URL of the file to examine and settings that should be used and considered
 *            fixed.
 * @param exec used to check for cancellations and to report progress. Could be null. If a
 *            {@link FileReaderExecutionMonitor} is provided it is distinguished between user cancellations cutting
 *            the analysis short, and interrupts that return immediately and return null as result.
 * @return settings that supposably provide more or less useful results. It will always be a non-null object - but
 *         may not contain any settings if guessing was just too hard.
 * @throws IOException if there was an error reading from the URL
 */
public static FileReaderNodeSettings analyze(final FileReaderNodeSettings userSettings, final ExecutionMonitor exec) throws IOException {
    if (userSettings.getDataFileLocation() == null) {
        throw new IllegalArgumentException("Must specify a valid file location for the file analyzer");
    }
    ExecutionMonitor execMon = exec;
    if (execMon == null) {
        // we create a default exec monitor. Doesn't hurt, because that
        // will never be canceled.
        execMon = new FileReaderExecutionMonitor();
    }
    // create the new and empty settings
    FileReaderNodeSettings result = new FileReaderNodeSettings();
    execMon.setProgress(0.0);
    try {
        result.setDataFileLocationAndUpdateTableName(userSettings.getDataFileLocation());
        result.setDecimalSeparator(userSettings.getDecimalSeparator());
        result.setThousandsSeparator(userSettings.getThousandsSeparator());
        result.setDecimalSeparatorUserSet(userSettings.decimalSeparatorUserSet());
        result.setUniquifyRowIDs(userSettings.uniquifyRowIDs());
        result.setMaximumNumberOfRowsToRead(userSettings.getMaximumNumberOfRowsToRead());
        result.setSkipFirstLines(userSettings.getSkipFirstLines());
        result.allowLFinQuotes(userSettings.allowLFinQuotes());
        result.setCharsetName(userSettings.getCharsetName());
        result.setAnalyzeUsedAllRows(true);
        result.setMissValuePatternStrCols(userSettings.getMissValuePatternStrCols());
        result.setConnectTimeout(userSettings.getConnectTimeout());
        // if the user didn't provide the charset, identify it by looking at the first bytes of the stream
        if (!userSettings.isCharsetUserSet()) {
            result.setCharsetName(guessCharSet(userSettings));
            result.setCharsetUserSet(false);
        } else {
            result.setCharsetName(userSettings.getCharsetName());
            result.setCharsetUserSet(true);
        }
        ExecutionMonitor subExec = execMon.createSubProgress(COMMENT_SUB);
        if (!userSettings.isCommentUserSet()) {
            // only guess comment patterns if user didn't provide any
            addComments(result, subExec);
            result.setCommentUserSet(false);
        } else {
            // take over user settings.
            for (Comment comment : userSettings.getAllComments()) {
                result.addBlockCommentPattern(comment.getBegin(), comment.getEnd(), comment.returnAsSeparateToken(), comment.includeInToken());
            }
            result.setCommentUserSet(true);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        subExec = execMon.createSubProgress(QUOTES_SUB);
        if (!userSettings.isQuoteUserSet()) {
            // only guess quotes if user didn't specify any
            addQuotes(result, subExec);
            result.setQuoteUserSet(false);
        } else {
            // take over user settings.
            for (Quote quote : userSettings.getAllQuotes()) {
                if (quote.hasEscapeChar()) {
                    result.addQuotePattern(quote.getLeft(), quote.getRight(), quote.getEscape());
                } else {
                    result.addQuotePattern(quote.getLeft(), quote.getRight());
                }
            }
            result.setQuoteUserSet(true);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // if user provided whitespace characters, we need to add them.
        if (userSettings.isWhiteSpaceUserSet()) {
            for (String ws : userSettings.getAllWhiteSpaces()) {
                result.addWhiteSpaceCharacter(ws);
            }
            result.setWhiteSpaceUserSet(true);
        } else {
            result.addWhiteSpaceCharacter(" ");
            result.addWhiteSpaceCharacter("\t");
            result.setWhiteSpaceUserSet(false);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // for now we just take over this flag:
        result.setSupportShortLines(userSettings.getSupportShortLines());
        // sets delimiter and column numbers (as many columns as it gets
        // with the delimiters - regardless of any row headers);
        // honors user settings
        subExec = execMon.createSubProgress(DELIMS_SUB);
        setDelimitersAndColNum(userSettings, result, subExec);
        assert result.getNumberOfColumns() > 0;
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // the number of column set as of now does not take into account the
        // skipped columns.
        subExec = execMon.createSubProgress(ROWHDR_SUB);
        if (userSettings.isFileHasRowHeadersUserSet()) {
            result.setFileHasRowHeaders(userSettings.getFileHasRowHeaders());
            result.setFileHasRowHeadersUserSet(true);
        } else {
            boolean hasRowHeaders;
            if (result.getNumberOfColumns() > 1) {
                // if we have at least 2 cols, one of them could be headers
                hasRowHeaders = checkRowHeader(result, subExec);
            } else {
                hasRowHeaders = false;
            }
            result.setFileHasRowHeaders(hasRowHeaders);
            result.setFileHasRowHeadersUserSet(false);
        }
        subExec.setProgress(1.0);
        checkInterrupt(execMon);
        // we must correct the column number we've guessed
        if (result.getFileHasRowHeaders()) {
            result.setNumberOfColumns(result.getNumberOfColumns() - 1);
        }
        // guesses (or copies) column types and names.
        subExec = execMon.createSubProgress(TYPES_SUB + COLHDR_SUB);
        Vector<ColProperty> columnProps = createColumnProperties(userSettings, result, subExec);
        result.setColumnProperties(columnProps);
        subExec.setProgress(1.0);
        // set a default row header prefix
        if (userSettings.getRowHeaderPrefix() != null) {
            result.setRowHeaderPrefix(userSettings.getRowHeaderPrefix());
        } else {
            result.setRowHeaderPrefix("Row");
        }
        if (userSettings.isIgnoreEmptyLinesUserSet()) {
            result.setIgnoreEmptyLines(userSettings.getIgnoreEmtpyLines());
            result.setIgnoreEmptyLinesUserSet(true);
        } else {
            result.setIgnoreEmptyLines(true);
            result.setIgnoreEmptyLinesUserSet(false);
        }
        execMon.setProgress(1.0);
    } catch (InterruptedExecutionException iee) {
        return null;
    }
    return result;
}
Also used : Quote(org.knime.core.util.tokenizer.Quote) Comment(org.knime.core.util.tokenizer.Comment) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 2 with Comment

use of org.knime.core.util.tokenizer.Comment in project knime-core by knime.

the class FileReaderNodeDialog method loadCommentSettings.

/*
     * sets the Java-Style comment check box from the current settings object
     */
private void loadCommentSettings() {
    if (m_insideCommentChange) {
        return;
    }
    m_insideLoadComment = true;
    boolean jBlockFound = false;
    boolean jSingleLineFound = false;
    // there might be an extra sl comment
    Comment singleLine = null;
    for (Comment comment : m_frSettings.getAllComments()) {
        if (comment.getEnd().equals("\n")) {
            // its a single line comment
            if (comment.getBegin().equals("//")) {
                jSingleLineFound = true;
            } else {
                singleLine = comment;
            }
        } else {
            // its a block comment
            if (comment.getBegin().equals("/*") && comment.getEnd().equals("*/")) {
                jBlockFound = true;
            }
        // all other block comments we ignore - but the analyzer doesnt
        // add them - and the user cant (without expert settings!)
        }
    }
    m_cStyleComment.setSelected(jBlockFound && jSingleLineFound);
    String singlePattern = "";
    if (singleLine != null) {
        singlePattern = singleLine.getBegin();
    }
    m_singleLineComment.setText(singlePattern);
    m_insideLoadComment = false;
}
Also used : Comment(org.knime.core.util.tokenizer.Comment)

Example 3 with Comment

use of org.knime.core.util.tokenizer.Comment in project knime-core by knime.

the class VariableFileReaderNodeDialog method loadCommentSettings.

/*
     * sets the Java-Style comment check box from the current settings object
     */
private void loadCommentSettings() {
    if (m_insideCommentChange) {
        return;
    }
    m_insideLoadComment = true;
    boolean jBlockFound = false;
    boolean jSingleLineFound = false;
    // there might be an extra sl comment
    Comment singleLine = null;
    for (Comment comment : m_frSettings.getAllComments()) {
        if (comment.getEnd().equals("\n")) {
            // its a single line comment
            if (comment.getBegin().equals("//")) {
                jSingleLineFound = true;
            } else {
                singleLine = comment;
            }
        } else {
            // its a block comment
            if (comment.getBegin().equals("/*") && comment.getEnd().equals("*/")) {
                jBlockFound = true;
            }
        // all other block comments we ignore - but the analyzer doesnt
        // add them - and the user cant (without expert settings!)
        }
    }
    m_cStyleComment.setSelected(jBlockFound && jSingleLineFound);
    String singlePattern = "";
    if (singleLine != null) {
        singlePattern = singleLine.getBegin();
    }
    m_singleLineComment.setText(singlePattern);
    m_insideLoadComment = false;
}
Also used : Comment(org.knime.core.util.tokenizer.Comment)

Aggregations

Comment (org.knime.core.util.tokenizer.Comment)3 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)1 Quote (org.knime.core.util.tokenizer.Quote)1