use of org.knime.core.util.tokenizer.Comment in project knime-core by knime.
the class FileAnalyzer method analyze.
/**
* Tries to guess FileReader settings for the passed data file. It will use the settings in the settings object (if
* any - but the file location is required), and will read in the first lines from the file. It will first detect
* comment characters (if the first lines start with '#' or '%'), and then guess the delimiter (',', ';', or space)
* depending on which cuts a line into (more than one) tokens.
*
* @param userSettings containing the URL of the file to examine and settings that should be used and considered
* fixed.
* @param exec used to check for cancellations and to report progress. Could be null. If a
* {@link FileReaderExecutionMonitor} is provided it is distinguished between user cancellations cutting
* the analysis short, and interrupts that return immediately and return null as result.
* @return settings that supposably provide more or less useful results. It will always be a non-null object - but
* may not contain any settings if guessing was just too hard.
* @throws IOException if there was an error reading from the URL
*/
public static FileReaderNodeSettings analyze(final FileReaderNodeSettings userSettings, final ExecutionMonitor exec) throws IOException {
if (userSettings.getDataFileLocation() == null) {
throw new IllegalArgumentException("Must specify a valid file location for the file analyzer");
}
ExecutionMonitor execMon = exec;
if (execMon == null) {
// we create a default exec monitor. Doesn't hurt, because that
// will never be canceled.
execMon = new FileReaderExecutionMonitor();
}
// create the new and empty settings
FileReaderNodeSettings result = new FileReaderNodeSettings();
execMon.setProgress(0.0);
try {
result.setDataFileLocationAndUpdateTableName(userSettings.getDataFileLocation());
result.setDecimalSeparator(userSettings.getDecimalSeparator());
result.setThousandsSeparator(userSettings.getThousandsSeparator());
result.setDecimalSeparatorUserSet(userSettings.decimalSeparatorUserSet());
result.setUniquifyRowIDs(userSettings.uniquifyRowIDs());
result.setMaximumNumberOfRowsToRead(userSettings.getMaximumNumberOfRowsToRead());
result.setSkipFirstLines(userSettings.getSkipFirstLines());
result.allowLFinQuotes(userSettings.allowLFinQuotes());
result.setCharsetName(userSettings.getCharsetName());
result.setAnalyzeUsedAllRows(true);
result.setMissValuePatternStrCols(userSettings.getMissValuePatternStrCols());
result.setConnectTimeout(userSettings.getConnectTimeout());
// if the user didn't provide the charset, identify it by looking at the first bytes of the stream
if (!userSettings.isCharsetUserSet()) {
result.setCharsetName(guessCharSet(userSettings));
result.setCharsetUserSet(false);
} else {
result.setCharsetName(userSettings.getCharsetName());
result.setCharsetUserSet(true);
}
ExecutionMonitor subExec = execMon.createSubProgress(COMMENT_SUB);
if (!userSettings.isCommentUserSet()) {
// only guess comment patterns if user didn't provide any
addComments(result, subExec);
result.setCommentUserSet(false);
} else {
// take over user settings.
for (Comment comment : userSettings.getAllComments()) {
result.addBlockCommentPattern(comment.getBegin(), comment.getEnd(), comment.returnAsSeparateToken(), comment.includeInToken());
}
result.setCommentUserSet(true);
}
subExec.setProgress(1.0);
checkInterrupt(execMon);
subExec = execMon.createSubProgress(QUOTES_SUB);
if (!userSettings.isQuoteUserSet()) {
// only guess quotes if user didn't specify any
addQuotes(result, subExec);
result.setQuoteUserSet(false);
} else {
// take over user settings.
for (Quote quote : userSettings.getAllQuotes()) {
if (quote.hasEscapeChar()) {
result.addQuotePattern(quote.getLeft(), quote.getRight(), quote.getEscape());
} else {
result.addQuotePattern(quote.getLeft(), quote.getRight());
}
}
result.setQuoteUserSet(true);
}
subExec.setProgress(1.0);
checkInterrupt(execMon);
// if user provided whitespace characters, we need to add them.
if (userSettings.isWhiteSpaceUserSet()) {
for (String ws : userSettings.getAllWhiteSpaces()) {
result.addWhiteSpaceCharacter(ws);
}
result.setWhiteSpaceUserSet(true);
} else {
result.addWhiteSpaceCharacter(" ");
result.addWhiteSpaceCharacter("\t");
result.setWhiteSpaceUserSet(false);
}
subExec.setProgress(1.0);
checkInterrupt(execMon);
// for now we just take over this flag:
result.setSupportShortLines(userSettings.getSupportShortLines());
// sets delimiter and column numbers (as many columns as it gets
// with the delimiters - regardless of any row headers);
// honors user settings
subExec = execMon.createSubProgress(DELIMS_SUB);
setDelimitersAndColNum(userSettings, result, subExec);
assert result.getNumberOfColumns() > 0;
subExec.setProgress(1.0);
checkInterrupt(execMon);
// the number of column set as of now does not take into account the
// skipped columns.
subExec = execMon.createSubProgress(ROWHDR_SUB);
if (userSettings.isFileHasRowHeadersUserSet()) {
result.setFileHasRowHeaders(userSettings.getFileHasRowHeaders());
result.setFileHasRowHeadersUserSet(true);
} else {
boolean hasRowHeaders;
if (result.getNumberOfColumns() > 1) {
// if we have at least 2 cols, one of them could be headers
hasRowHeaders = checkRowHeader(result, subExec);
} else {
hasRowHeaders = false;
}
result.setFileHasRowHeaders(hasRowHeaders);
result.setFileHasRowHeadersUserSet(false);
}
subExec.setProgress(1.0);
checkInterrupt(execMon);
// we must correct the column number we've guessed
if (result.getFileHasRowHeaders()) {
result.setNumberOfColumns(result.getNumberOfColumns() - 1);
}
// guesses (or copies) column types and names.
subExec = execMon.createSubProgress(TYPES_SUB + COLHDR_SUB);
Vector<ColProperty> columnProps = createColumnProperties(userSettings, result, subExec);
result.setColumnProperties(columnProps);
subExec.setProgress(1.0);
// set a default row header prefix
if (userSettings.getRowHeaderPrefix() != null) {
result.setRowHeaderPrefix(userSettings.getRowHeaderPrefix());
} else {
result.setRowHeaderPrefix("Row");
}
if (userSettings.isIgnoreEmptyLinesUserSet()) {
result.setIgnoreEmptyLines(userSettings.getIgnoreEmtpyLines());
result.setIgnoreEmptyLinesUserSet(true);
} else {
result.setIgnoreEmptyLines(true);
result.setIgnoreEmptyLinesUserSet(false);
}
execMon.setProgress(1.0);
} catch (InterruptedExecutionException iee) {
return null;
}
return result;
}
use of org.knime.core.util.tokenizer.Comment in project knime-core by knime.
the class FileReaderNodeDialog method loadCommentSettings.
/*
* sets the Java-Style comment check box from the current settings object
*/
private void loadCommentSettings() {
if (m_insideCommentChange) {
return;
}
m_insideLoadComment = true;
boolean jBlockFound = false;
boolean jSingleLineFound = false;
// there might be an extra sl comment
Comment singleLine = null;
for (Comment comment : m_frSettings.getAllComments()) {
if (comment.getEnd().equals("\n")) {
// its a single line comment
if (comment.getBegin().equals("//")) {
jSingleLineFound = true;
} else {
singleLine = comment;
}
} else {
// its a block comment
if (comment.getBegin().equals("/*") && comment.getEnd().equals("*/")) {
jBlockFound = true;
}
// all other block comments we ignore - but the analyzer doesnt
// add them - and the user cant (without expert settings!)
}
}
m_cStyleComment.setSelected(jBlockFound && jSingleLineFound);
String singlePattern = "";
if (singleLine != null) {
singlePattern = singleLine.getBegin();
}
m_singleLineComment.setText(singlePattern);
m_insideLoadComment = false;
}
use of org.knime.core.util.tokenizer.Comment in project knime-core by knime.
the class VariableFileReaderNodeDialog method loadCommentSettings.
/*
* sets the Java-Style comment check box from the current settings object
*/
private void loadCommentSettings() {
if (m_insideCommentChange) {
return;
}
m_insideLoadComment = true;
boolean jBlockFound = false;
boolean jSingleLineFound = false;
// there might be an extra sl comment
Comment singleLine = null;
for (Comment comment : m_frSettings.getAllComments()) {
if (comment.getEnd().equals("\n")) {
// its a single line comment
if (comment.getBegin().equals("//")) {
jSingleLineFound = true;
} else {
singleLine = comment;
}
} else {
// its a block comment
if (comment.getBegin().equals("/*") && comment.getEnd().equals("*/")) {
jBlockFound = true;
}
// all other block comments we ignore - but the analyzer doesnt
// add them - and the user cant (without expert settings!)
}
}
m_cStyleComment.setSelected(jBlockFound && jSingleLineFound);
String singlePattern = "";
if (singleLine != null) {
singlePattern = singleLine.getBegin();
}
m_singleLineComment.setText(singlePattern);
m_insideLoadComment = false;
}
Aggregations