use of org.knime.core.util.tokenizer.Tokenizer in project knime-core by knime.
the class BatchExecutor method splitWorkflowVariableArg.
/**
* Splits the argument to -workflow.variable into its sub-components (name, value, type) and returns it as array.
*
* @param arg The string to split
* @return The components of the string, no validation is done.
* @since 2.11
*/
public static String[] splitWorkflowVariableArg(final String arg) {
Tokenizer tokenizer = new Tokenizer(new StringReader(arg));
TokenizerSettings settings = new TokenizerSettings();
settings.addQuotePattern("\"", "\"", '\\');
settings.addQuotePattern("'", "'", '\\');
settings.addDelimiterPattern(",", /* combine multiple= */
false, /* return as token= */
false, /* include in token= */
false);
tokenizer.setSettings(settings);
ArrayList<String> tokenList = new ArrayList<String>();
String token;
while ((token = tokenizer.nextToken()) != null) {
tokenList.add(token);
}
return tokenList.toArray(new String[tokenList.size()]);
}
use of org.knime.core.util.tokenizer.Tokenizer in project knime-core by knime.
the class CellSplitterCellFactory method prepareTokenizer.
/**
* Creates a tokenizer on the given string reader and prepares it with
* specific settings.
* and returns it.
* @param inputReader The string reader to create a tokenizer on.
* @return The tokenizer created on the string reader.
* @since 2.6
*/
private Tokenizer prepareTokenizer(final StringReader inputReader) {
assert inputReader.markSupported();
Tokenizer tokenizer = new Tokenizer(inputReader);
tokenizer.setSettings(m_tokenizerSettings);
return tokenizer;
}
use of org.knime.core.util.tokenizer.Tokenizer in project knime-core by knime.
the class CellSplitterCellFactory method tokenizeAndCreateCells.
/**
* Tokenizes the string representation of the given data cell and returns
* an array of data cells containing the tokens.
* @param inputCell the cell to tokenize (its string representation)
* @param numOfCells The number of cells to create, containing the tokens
* @return An arrays of cells containing the tokens. The length of the array
* is specified by <code>numOfCells</code>.
* @since 2.6
*/
private DataCell[] tokenizeAndCreateCells(final DataCell inputCell, final int numOfCells) {
DataCell[] result = new DataCell[numOfCells];
if (inputCell.isMissing()) {
Arrays.fill(result, DataType.getMissingCell());
if (m_settings.isUseEmptyString()) {
// replace cells for string columns with empty string cells
for (int c = 0; c < result.length; c++) {
if (m_settings.getTypeOfColumn(c).equals(StringCell.TYPE)) {
result[c] = EMPTY_STRINGCELL;
}
}
}
return result;
}
final String inputString = getInputString(inputCell);
// init the tokenizer
StringReader inputReader = new StringReader(inputString);
Tokenizer tokenizer = prepareTokenizer(inputReader);
// tokenize the column value and create new output cells
for (int col = 0; col < result.length; col++) {
String token = null;
if (col == result.length - 1) {
// mark the stream in case we need to read the rest of it
try {
inputReader.mark(0);
token = tokenizer.nextToken();
// see if there is more in the stream
if (inputReader.read() != -1) {
// go back to before the token
inputReader.reset();
token = readAll(inputReader);
}
} catch (IOException ioe) {
// reading a string won't cause an IOException.
}
} else {
token = tokenizer.nextToken();
}
if (token == null) {
if (m_settings.isUseEmptyString() && m_settings.getTypeOfColumn(col).equals(StringCell.TYPE)) {
// create empty string cells - not missing cells.
result[col] = EMPTY_STRINGCELL;
} else {
result[col] = DataType.getMissingCell();
}
} else {
if (m_settings.isTrim()) {
token = token.trim();
}
result[col] = createDataCell(token, m_settings.getTypeOfColumn(col));
}
}
return result;
}
Aggregations