use of org.knime.core.util.tokenizer.Delimiter in project knime-core by knime.
the class FileAnalyzer method setDelimitersAndColNum.
/**
* Splits the lines of the file (honoring the settings in the settings object), and tries to guess which delimiters
* create the best results. It'll try out semicolon, comma, tab, or space delimiters; in this order. Whatever
* produces more than one column (consistently) will be set. If no settings create more than one column no column
* delimiters will be set. A row delimiter ('\n' and '\r') is always set.
*/
private static void setDelimitersAndColNum(final FileReaderNodeSettings userSettings, final FileReaderNodeSettings result, final ExecutionMonitor exec) throws IOException, InterruptedExecutionException {
assert result != null;
assert userSettings != null;
assert result.getDataFileLocation() != null;
if (!userSettings.isDelimiterUserSet()) {
exec.setProgress("Guessing column separator");
exec.setProgress(0.0);
//
if ((userSettings.getThousandsSeparator() != ';') && (userSettings.getDecimalSeparator() != ';')) {
ExecutionMonitor subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
// make sure '\n' and '\r' is a row delimiter. Always.
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern(";", false, false, false);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems we've added ';' as comment before - alright then.
}
}
//
if ((userSettings.getThousandsSeparator() != ',') && (userSettings.getDecimalSeparator() != ',')) {
// make sure '\n' and '\r' is a row delimiter. Always.
ExecutionMonitor subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern(",", false, false, false);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems they've added ',' as comment before - alright then.
}
}
//
if ((userSettings.getThousandsSeparator() != '\t') && (userSettings.getDecimalSeparator() != '\t')) {
ExecutionMonitor subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
// make sure '\n' and '\r' is a row delimiter. Always.
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern("\t", false, false, false);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems they've added '\t' as comment before - alright
// then.
}
}
//
if ((userSettings.getThousandsSeparator() != ' ') && (userSettings.getDecimalSeparator() != ' ')) {
ExecutionMonitor subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
// make sure '\n' and '\r' is a row delimiter. Always.
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern(" ", true, false, false);
result.setIgnoreEmptyTokensAtEndOfRow(true);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems they've added ' ' as comment before - alright then.
}
// restore it to false
result.setIgnoreEmptyTokensAtEndOfRow(false);
//
// try space separated columns
//
subExec = createSubExecWithRemainder(exec);
try {
result.removeAllDelimiters();
// make sure '\n' and '\r' is a row delimiter. Always.
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.addDelimiterPattern(" ", true, false, false);
if (testDelimiterSettingsSetColNum(result, subExec)) {
return;
}
} catch (IllegalArgumentException iae) {
// seems we've added ' ' as comment before - alright then.
}
}
// well - none of the above settings made sense - return without
// delimiter
result.removeAllDelimiters();
// but always have one row per line
result.addRowDelimiter("\n", true);
result.addRowDelimiter("\r", true);
result.setNumberOfColumns(1);
return;
} else {
// user provided delimiters copy them
for (Delimiter delim : userSettings.getAllDelimiters()) {
if (userSettings.isRowDelimiter(delim.getDelimiter(), false)) {
result.addRowDelimiter(delim.getDelimiter(), delim.combineConsecutiveDelims());
} else {
result.addDelimiterPattern(delim.getDelimiter(), delim.combineConsecutiveDelims(), delim.returnAsToken(), delim.includeInToken());
}
}
result.setDelimiterUserSet(true);
result.setIgnoreEmptyTokensAtEndOfRow(userSettings.ignoreEmptyTokensAtEndOfRow());
if (userSettings.ignoreDelimsAtEORUserSet()) {
result.setIgnoreDelimsAtEndOfRowUserValue(userSettings.ignoreDelimsAtEORUserValue());
}
// set the number of cols that we read in with user presets.
// take the maximum if rows have different num of cols.
result.setNumberOfColumns(getMaximumNumberOfColumns(result, exec));
}
return;
}
use of org.knime.core.util.tokenizer.Delimiter in project knime-core by knime.
the class FileReaderNodeDialog method loadDelimSettings.
/**
* Loads the settings from the global settings object into the delimiter box
* and creates the basicDelim vector.
*/
private void loadDelimSettings() {
if (m_insideDelimChange) {
return;
}
m_insideLoadDelim = true;
m_delimField.removeAllItems();
m_delimField.setModel(new DefaultComboBoxModel(DEFAULT_DELIMS));
// the above selects the first in the list - which is the <none>.
m_delimApplied = DEFAULT_DELIMS[0].getDelimiter();
for (Delimiter delim : m_frSettings.getAllDelimiters()) {
if (m_frSettings.isRowDelimiter(delim.getDelimiter(), false)) {
continue;
}
if (((DefaultComboBoxModel) m_delimField.getModel()).getIndexOf(delim) < 0) {
// add all delimiters to the selection list of the combo box
m_delimField.addItem(delim);
}
m_delimField.setSelectedItem(delim);
m_delimApplied = delim.getDelimiter();
}
m_insideLoadDelim = false;
}
use of org.knime.core.util.tokenizer.Delimiter in project knime-core by knime.
the class FileReaderNodeDialog method createSettingsPanel.
private JPanel createSettingsPanel() {
JButton advanced = new JButton("Advanced...");
int buttonHeight = advanced.getPreferredSize().height;
m_hasRowHeaders = new JCheckBox("read row IDs");
m_hasRowHeaders.setToolTipText("Check if the file contains row IDs" + " in the first column");
m_hasColHeaders = new JCheckBox("read column headers");
m_hasColHeaders.setToolTipText("Check if the file contains column" + " headers in the first line");
JLabel deliLabel = new JLabel("Column delimiter:");
m_delimField = new JComboBox();
m_delimField.setMaximumSize(new Dimension(70, buttonHeight));
m_delimField.setMinimumSize(new Dimension(70, buttonHeight));
m_delimField.setPreferredSize(new Dimension(70, buttonHeight));
m_delimField.setEditable(true);
Delimiter[] selDelims = DEFAULT_DELIMS;
m_delimField.setModel(new DefaultComboBoxModel(selDelims));
deliLabel.setToolTipText("Specify the data delimiter character(s)");
m_delimField.setToolTipText("Specify the data delimiter character(s)");
m_cStyleComment = new JCheckBox("Java-style comments");
m_cStyleComment.setToolTipText("Check to add support for '//' and " + "\"'/*' and '*/'\" comment");
m_singleLineComment = new JTextField(2);
m_singleLineComment.setMaximumSize(new Dimension(55, buttonHeight));
m_singleLineComment.setMinimumSize(new Dimension(55, buttonHeight));
m_singleLineComment.setPreferredSize(new Dimension(55, buttonHeight));
JLabel commentLabel = new JLabel("Single line comment:");
m_ignoreWS = new JCheckBox("ignore spaces and tabs");
m_ignoreWS.setToolTipText("If checked, whitespaces (spaces and tabs)" + " will be discarded (if not quoted)");
JPanel panel = new JPanel();
panel.setLayout(new GridLayout(3, 3));
panel.setBorder(BorderFactory.createTitledBorder(BorderFactory.createEtchedBorder(), "Basic Settings"));
// top row
Box rowBox = Box.createHorizontalBox();
rowBox.add(m_hasRowHeaders);
rowBox.add(Box.createGlue());
Box delimBox = Box.createHorizontalBox();
delimBox.add(Box.createHorizontalStrut(4));
delimBox.add(deliLabel);
delimBox.add(Box.createHorizontalStrut(3));
delimBox.add(m_delimField);
delimBox.add(Box.createGlue());
Box advBox = Box.createHorizontalBox();
advBox.add(Box.createGlue());
advBox.add(advanced);
advBox.add(Box.createGlue());
// middle row
Box colBox = Box.createHorizontalBox();
colBox.add(m_hasColHeaders);
colBox.add(Box.createGlue());
Box wsBox = Box.createHorizontalBox();
wsBox.add(m_ignoreWS);
wsBox.add(Box.createGlue());
// bottom row
Box pValBox = Box.createHorizontalBox();
// placeholder
pValBox.add(new JLabel(""));
pValBox.add(Box.createGlue());
Box cCmtBox = Box.createHorizontalBox();
cCmtBox.add(m_cStyleComment);
cCmtBox.add(Box.createGlue());
Box slcBox = Box.createHorizontalBox();
slcBox.add(commentLabel);
slcBox.add(Box.createHorizontalStrut(3));
slcBox.add(m_singleLineComment);
slcBox.add(Box.createGlue());
// now fill the grid: first row
panel.add(rowBox);
panel.add(delimBox);
panel.add(advBox);
// second row
panel.add(colBox);
panel.add(wsBox);
panel.add(new JLabel(""));
// third row
panel.add(pValBox);
panel.add(cCmtBox);
panel.add(slcBox);
int componentsHeight = (2 * COMP_HEIGHT) + 30 + buttonHeight;
panel.setMaximumSize(new Dimension(PANEL_WIDTH, componentsHeight));
advanced.addActionListener(new ActionListener() {
@Override
public void actionPerformed(final ActionEvent e) {
advancedSettings();
}
});
m_hasRowHeaders.addItemListener(new ItemListener() {
@Override
public void itemStateChanged(final ItemEvent e) {
rowHeadersSettingsChanged();
}
});
m_hasColHeaders.addItemListener(new ItemListener() {
@Override
public void itemStateChanged(final ItemEvent e) {
colHeadersSettingsChanged();
}
});
m_cStyleComment.addItemListener(new ItemListener() {
@Override
public void itemStateChanged(final ItemEvent e) {
commentSettingsChanged();
}
});
m_delimField.addActionListener(new ActionListener() {
@Override
public void actionPerformed(final ActionEvent e) {
delimSettingsChanged();
}
});
m_ignoreWS.addItemListener(new ItemListener() {
@Override
public void itemStateChanged(final ItemEvent e) {
ignoreWSChanged();
}
});
m_singleLineComment.getDocument().addDocumentListener(new DocumentListener() {
@Override
public void changedUpdate(final DocumentEvent e) {
commentSettingsChanged();
}
@Override
public void insertUpdate(final DocumentEvent e) {
commentSettingsChanged();
}
@Override
public void removeUpdate(final DocumentEvent e) {
commentSettingsChanged();
}
});
// add a panel for the errors:
m_errorLabel = new JLabel("");
m_errorLabel.setForeground(Color.red);
m_errorDetail = new JLabel("");
m_errorDetail.setForeground(Color.red);
JPanel errorBox = new JPanel();
errorBox.setLayout(new BoxLayout(errorBox, BoxLayout.X_AXIS));
errorBox.add(Box.createHorizontalGlue());
errorBox.add(m_errorLabel);
// reserve a certain height for the (in the beginning invisible) label
errorBox.add(Box.createVerticalStrut(17));
errorBox.add(Box.createHorizontalGlue());
JPanel detailBox = new JPanel();
detailBox.setLayout(new BoxLayout(detailBox, BoxLayout.X_AXIS));
detailBox.add(Box.createHorizontalGlue());
detailBox.add(m_errorDetail);
// reserve a certain height for the (in the beginning invisible) label
detailBox.add(Box.createVerticalStrut(17));
detailBox.add(Box.createHorizontalGlue());
JPanel result = new JPanel();
result.setLayout(new BoxLayout(result, BoxLayout.Y_AXIS));
result.add(panel);
result.add(errorBox);
result.add(detailBox);
return result;
}
use of org.knime.core.util.tokenizer.Delimiter in project knime-core by knime.
the class FileReaderNodeDialog method delimSettingsChanged.
/**
* Reads the settings of the column delimiter box and transfers them into
* the internal settings object.
*/
protected void delimSettingsChanged() {
if (m_insideLoadDelim) {
// course. We are not triggering any action then.
return;
}
m_insideDelimChange = true;
// to avoid unnecessary re-analyzing of the file, find out if the
// delimiter actually changed.
String newDelim = null;
Object o = m_delimField.getEditor().getItem();
if (o instanceof Delimiter) {
newDelim = ((Delimiter) o).getDelimiter();
} else {
newDelim = TokenizerSettings.unescapeString((String) o);
}
if (newDelim.equals(m_delimApplied)) {
// m_delimApplied is the delimiter stored in the settings or <none>
// if none is selected.
m_insideDelimChange = false;
return;
}
m_frSettings.setDelimiterUserSet(true);
// clear it in case things go wrong
m_delimApplied = null;
// remove all delimiters except row delimiters
for (Delimiter delim : m_frSettings.getAllDelimiters()) {
if (m_frSettings.isRowDelimiter(delim.getDelimiter(), false)) {
continue;
}
m_frSettings.removeDelimiterPattern(delim.getDelimiter());
}
m_frSettings.setIgnoreEmptyTokensAtEndOfRow(false);
// index 0 is the <none> placeholder
if (o != DEFAULT_DELIMS[0]) {
String delimStr = null;
if (o instanceof Delimiter) {
// user selected one from the list (didn't edit a new one)
try {
// add that delimiter:
Delimiter selDelim = (Delimiter) o;
delimStr = selDelim.getDelimiter();
m_frSettings.addDelimiterPattern(delimStr, selDelim.combineConsecutiveDelims(), selDelim.returnAsToken(), selDelim.includeInToken());
m_delimApplied = delimStr;
} catch (IllegalArgumentException iae) {
setErrorLabelText(iae.getMessage());
m_insideDelimChange = false;
return;
}
} else {
delimStr = (String) o;
delimStr = TokenizerSettings.unescapeString(delimStr);
if ((delimStr != null) && (!delimStr.equals(""))) {
try {
m_frSettings.addDelimiterPattern(delimStr, false, false, false);
m_delimApplied = delimStr;
} catch (IllegalArgumentException iae) {
setErrorLabelText(iae.getMessage());
m_insideDelimChange = false;
return;
}
}
}
if ((delimStr != null) && (delimStr.equals(" ") || delimStr.equals("\t"))) {
// with whitespaces we ignore (by default) extra delims at EOR
if (m_frSettings.ignoreDelimsAtEORUserSet()) {
m_frSettings.setIgnoreEmptyTokensAtEndOfRow(m_frSettings.ignoreDelimsAtEORUserValue());
} else {
m_frSettings.setIgnoreEmptyTokensAtEndOfRow(true);
}
}
} else {
m_delimApplied = DEFAULT_DELIMS[0].getDelimiter();
}
// make sure \n is always a row delimiter
if (!m_frSettings.isRowDelimiter("\n", false)) {
m_frSettings.addRowDelimiter("\n", true);
}
// force re-analyze
analyzeDataFileAndUpdatePreview(true);
m_insideDelimChange = false;
}
use of org.knime.core.util.tokenizer.Delimiter in project knime-core by knime.
the class FileReaderSettings method readRowDelimitersFromConfig.
/*
* reads the Row delimiters and settings from a config object or reads them
* from it (next function). The crux with the row delimtiers is, that they
* are ordinary delimiters for the file tokenizers (just returned as
* separate token). Thus they will be read in already! - And they will be
* saved before we save our row delimiters. So, we need to be a bit careful
* here.
*/
private void readRowDelimitersFromConfig(final NodeSettingsRO rowDelims) throws InvalidSettingsException {
for (int rowDelIdx = 0; rowDelims.containsKey(CFGKEY_ROWDELIM + rowDelIdx); rowDelIdx++) {
boolean combine;
String rowDelim;
try {
rowDelim = rowDelims.getString(CFGKEY_ROWDELIM + rowDelIdx);
} catch (InvalidSettingsException ice) {
LOGGER.warn("Invalid configuration for" + " row delimiter '" + CFGKEY_ROWDELIM + rowDelIdx + "' (must be of type string). Ignoring it!");
continue;
}
if (rowDelims.containsKey(CFGKEY_RDCOMB + rowDelIdx)) {
try {
combine = rowDelims.getBoolean(CFGKEY_RDCOMB + rowDelIdx);
} catch (InvalidSettingsException ice) {
// shouldn't happen anyway
combine = false;
}
} else {
combine = false;
}
// the row delimiter should already be set as delimiter (as the
// super reads its settings first and all row delims are also
// token delims).
Delimiter delim = getDelimiterPattern(rowDelim);
if (delim == null) {
throw new InvalidSettingsException("Row delimiter must be " + "defined as delimiter.");
}
if (!delim.returnAsToken()) {
throw new InvalidSettingsException("Row delimiter must be " + "returned as token.");
}
if (!(delim.combineConsecutiveDelims() == combine)) {
throw new InvalidSettingsException("Delimiter definition " + "doesn't match row delim definition.");
}
// we just add the pattern to the list of row delim patterns
m_rowDelimiters.add(rowDelim);
}
}
Aggregations