use of org.embulk.config.ConfigException in project embulk by embulk.
the class RemoveColumnsFilterPlugin method getExistentColumns.
private List<String> getExistentColumns(Schema schema, List<String> specifiedColumns, boolean acceptUnmatch) {
ImmutableList.Builder<String> existentColumns = ImmutableList.builder();
for (String column : specifiedColumns) {
try {
schema.lookupColumn(column);
existentColumns.add(column);
} catch (SchemaConfigException e) {
if (!acceptUnmatch) {
throw new ConfigException(String.format(ENGLISH, "Column '%s' doesn't exist in the schema", column));
}
}
}
return existentColumns.build();
}
use of org.embulk.config.ConfigException in project embulk by embulk.
the class RemoveColumnsFilterPlugin method transaction.
@Override
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) {
PluginTask task = config.loadConfig(PluginTask.class);
// validate remove: and keep:
if (task.getRemove().isPresent() && task.getKeep().isPresent()) {
throw new ConfigException("remove: and keep: must not be multi-select");
}
if (!task.getRemove().isPresent() && !task.getKeep().isPresent()) {
throw new ConfigException("Must require remove: or keep:");
}
boolean acceptUnmatchedColumns = task.getAcceptUnmatchedColumns();
ImmutableList.Builder<Column> outputColumns = ImmutableList.builder();
int index = 0;
int[] indexMapping = new int[inputSchema.size()];
for (int i = 0; i < indexMapping.length; i++) {
indexMapping[i] = -1;
}
if (task.getRemove().isPresent()) {
// specify remove:
List<String> removeColumns = getExistentColumns(inputSchema, task.getRemove().get(), acceptUnmatchedColumns);
for (Column column : inputSchema.getColumns()) {
if (!removeColumns.contains(column.getName())) {
outputColumns.add(new Column(index, column.getName(), column.getType()));
indexMapping[column.getIndex()] = index;
index++;
}
}
} else {
// specify keep:
List<String> keepColumns = getExistentColumns(inputSchema, task.getKeep().get(), acceptUnmatchedColumns);
for (Column column : inputSchema.getColumns()) {
if (keepColumns.contains(column.getName())) {
outputColumns.add(new Column(index, column.getName(), column.getType()));
indexMapping[column.getIndex()] = index;
index++;
}
}
}
task.setIndexMapping(indexMapping);
control.run(task.dump(), new Schema(outputColumns.build()));
}
use of org.embulk.config.ConfigException in project embulk by embulk.
the class RenameFilterPlugin method applyFirstCharacterTypesRule.
private Schema applyFirstCharacterTypesRule(Schema inputSchema, FirstCharacterTypesRule rule) {
final Optional<String> replace = rule.getReplace();
final List<String> passTypes = rule.getPassTypes();
final String passCharacters = rule.getPassCharacters();
final Optional<String> prefix = rule.getPrefix();
if (replace.isPresent() && replace.get().length() != 1) {
throw new ConfigException("\"replace\" in \"first_character_types\" must contain just 1 character if specified");
}
if (prefix.isPresent() && prefix.get().length() != 1) {
throw new ConfigException("\"prefix\" in \"first_character_types\" must contain just 1 character if specified");
}
if (prefix.isPresent() && replace.isPresent()) {
throw new ConfigException("\"replace\" and \"prefix\" in \"first_character_types\" must not be specified together");
}
if ((!prefix.isPresent()) && (!replace.isPresent())) {
throw new ConfigException("Either of \"replace\" or \"prefix\" must be specified in \"first_character_types\"");
}
// TODO(dmikurube): Revisit this for better escaping.
if (passCharacters.contains("\\E")) {
throw new ConfigException("\"pass_characters\" in \"first_character_types\" must not contain \"\\E\"");
}
StringBuilder regexBuilder = new StringBuilder();
regexBuilder.append("^[^");
for (String target : passTypes) {
if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
} else {
throw new ConfigException("\"" + target + "\" is an unknown character type keyword");
}
}
if (!passCharacters.isEmpty()) {
regexBuilder.append("\\Q");
regexBuilder.append(passCharacters);
regexBuilder.append("\\E");
}
regexBuilder.append("].*");
Schema.Builder schemaBuidler = Schema.builder();
for (Column column : inputSchema.getColumns()) {
String name = column.getName();
if (name.matches(regexBuilder.toString())) {
if (replace.isPresent()) {
name = replace.get() + name.substring(1);
} else if (prefix.isPresent()) {
name = prefix.get() + name;
}
}
schemaBuidler.add(name, column.getType());
}
return schemaBuidler.build();
}
use of org.embulk.config.ConfigException in project embulk by embulk.
the class RenameFilterPlugin method applyCharacterTypesRule.
private Schema applyCharacterTypesRule(Schema inputSchema, CharacterTypesRule rule) {
final List<String> passTypes = rule.getPassTypes();
final String passCharacters = rule.getPassCharacters();
final String replace = rule.getReplace();
if (replace.isEmpty()) {
throw new ConfigException("\"replace\" in \"character_types\" must not be explicitly empty");
}
if (replace.length() != 1) {
throw new ConfigException("\"replace\" in \"character_types\" must contain just 1 character");
}
// TODO(dmikurube): Revisit this for better escaping.
if (passCharacters.contains("\\E")) {
throw new ConfigException("\"pass_characters\" in \"character_types\" must not contain \"\\E\"");
}
StringBuilder regexBuilder = new StringBuilder();
regexBuilder.append("[^");
for (String target : passTypes) {
if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
} else {
throw new ConfigException("\"" + target + "\" is an unknown character type keyword");
}
}
if (!passCharacters.isEmpty()) {
regexBuilder.append("\\Q");
regexBuilder.append(passCharacters);
regexBuilder.append("\\E");
}
regexBuilder.append("]");
Schema.Builder schemaBuilder = Schema.builder();
for (Column column : inputSchema.getColumns()) {
schemaBuilder.add(column.getName().replaceAll(regexBuilder.toString(), replace), column.getType());
}
return schemaBuilder.build();
}
use of org.embulk.config.ConfigException in project embulk by embulk.
the class DynamicColumnSetterFactory method newColumnSetter.
public DynamicColumnSetter newColumnSetter(PageBuilder pageBuilder, Column column) {
Type type = column.getType();
if (type instanceof BooleanType) {
return new BooleanColumnSetter(pageBuilder, column, defaultValue);
} else if (type instanceof LongType) {
return new LongColumnSetter(pageBuilder, column, defaultValue);
} else if (type instanceof DoubleType) {
return new DoubleColumnSetter(pageBuilder, column, defaultValue);
} else if (type instanceof StringType) {
TimestampFormatter formatter = TimestampFormatter.of(getTimestampFormatForFormatter(column), getTimeZoneId(column));
return new StringColumnSetter(pageBuilder, column, defaultValue, formatter);
} else if (type instanceof TimestampType) {
// TODO use flexible time format like Ruby's Time.parse
final TimestampParser parser;
if (this.useColumnForTimestampMetadata) {
final TimestampType timestampType = (TimestampType) type;
// https://github.com/embulk/embulk/issues/935
parser = TimestampParser.of(getFormatFromTimestampTypeWithDepracationSuppressed(timestampType), getTimeZoneId(column));
} else {
parser = TimestampParser.of(getTimestampFormatForParser(column), getTimeZoneId(column));
}
return new TimestampColumnSetter(pageBuilder, column, defaultValue, parser);
} else if (type instanceof JsonType) {
TimestampFormatter formatter = TimestampFormatter.of(getTimestampFormatForFormatter(column), getTimeZoneId(column));
return new JsonColumnSetter(pageBuilder, column, defaultValue, formatter);
}
throw new ConfigException("Unknown column type: " + type);
}
Aggregations