use of org.embulk.config.ConfigSource in project embulk by embulk.
the class GuessExecutor method guessParserConfig.
private ConfigDiff guessParserConfig(Buffer sample, ConfigSource config, List<PluginType> guessPlugins, final int guessParserSampleBufferBytes) {
// repeat guessing upto 10 times
ConfigDiff lastGuessed = Exec.newConfigDiff();
for (int i = 0; i < 10; i++) {
// include last-guessed config to run guess input
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
ConfigSource guessInputConfig = originalConfig.deepCopy();
guessInputConfig.getNestedOrSetEmpty("parser").set("type", // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
"system_guess").set("guess_plugins", guessPlugins).set("orig_config", originalConfig).set("guess_parser_sample_buffer_bytes", guessParserSampleBufferBytes);
// run FileInputPlugin
final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
ConfigDiff guessed;
try {
input.transaction(guessInputConfig, new InputPlugin.Control() {
public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount) {
if (taskCount == 0) {
throw new NoSampleException("No input files to guess");
}
input.run(inputTaskSource, null, 0, new PageOutput() {
@Override
public void add(Page page) {
// TODO exception class
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
}
@Override
public void finish() {
}
@Override
public void close() {
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
}
});
throw new AssertionError("Guess executor must throw GuessedNoticeError");
} catch (GuessedNoticeError error) {
guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
}
// merge to the last-guessed config
if (lastGuessed.equals(guessed)) {
// not changed
return lastGuessed;
}
lastGuessed = guessed;
}
return lastGuessed;
}
use of org.embulk.config.ConfigSource in project embulk by embulk.
the class TestCsvParserPlugin method checkDefaultValues.
@Test
public void checkDefaultValues() {
ConfigSource config = Exec.newConfigSource().set("columns", ImmutableList.of(ImmutableMap.of("name", "date_code", "type", "string")));
CsvParserPlugin.PluginTask task = config.loadConfig(CsvParserPlugin.PluginTask.class);
assertEquals(Charset.forName("utf-8"), task.getCharset());
assertEquals(Newline.CRLF, task.getNewline());
assertEquals(false, task.getHeaderLine().or(false));
assertEquals(",", task.getDelimiter());
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\"')), task.getQuoteChar());
assertEquals(false, task.getAllowOptionalColumns());
assertEquals("UTC", task.getDefaultTimeZoneId());
assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
}
use of org.embulk.config.ConfigSource in project embulk by embulk.
the class TestCsvParserPlugin method checkColumnsRequired.
@Test(expected = ConfigException.class)
public void checkColumnsRequired() {
ConfigSource config = Exec.newConfigSource();
config.loadConfig(CsvParserPlugin.PluginTask.class);
}
use of org.embulk.config.ConfigSource in project embulk by embulk.
the class TestJsonParserPlugin method useUnEscapeInvalidEscapeString.
@Test
public void useUnEscapeInvalidEscapeString() throws Exception {
ConfigSource config = this.config.deepCopy().set("invalid_string_escapes", "UNESCAPE");
transaction(config, fileInput("{\"\\a\":\"b\"}\\"));
List<Object[]> records = Pages.toObjects(plugin.newSchema(), output.pages);
assertEquals(1, records.size());
Object[] record = records.get(0);
Map<Value, Value> map = ((Value) record[0]).asMapValue().map();
assertEquals(newString("b"), map.get(newString("a")));
}
use of org.embulk.config.ConfigSource in project embulk by embulk.
the class TestJsonParserPlugin method usePassthroughInvalidEscapeStringFunction.
@Test
public void usePassthroughInvalidEscapeStringFunction() throws Exception {
try {
ConfigSource config = this.config.deepCopy().set("invalid_string_escapes", "PASSTHROUGH");
transaction(config, fileInput(// throw DataException
"{\"\\a\":\"b\"}\\"));
fail();
} catch (Throwable t) {
assertTrue(t instanceof DataException);
}
}
Aggregations