use of org.embulk.spi.Column in project embulk by embulk.
the class RemoveColumnsFilterPlugin method transaction.
@Override
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) {
PluginTask task = config.loadConfig(PluginTask.class);
// validate remove: and keep:
if (task.getRemove().isPresent() && task.getKeep().isPresent()) {
throw new ConfigException("remove: and keep: must not be multi-select");
}
if (!task.getRemove().isPresent() && !task.getKeep().isPresent()) {
throw new ConfigException("Must require remove: or keep:");
}
boolean acceptUnmatchedColumns = task.getAcceptUnmatchedColumns();
ImmutableList.Builder<Column> outputColumns = ImmutableList.builder();
int index = 0;
int[] indexMapping = new int[inputSchema.size()];
for (int i = 0; i < indexMapping.length; i++) {
indexMapping[i] = -1;
}
if (task.getRemove().isPresent()) {
// specify remove:
List<String> removeColumns = getExistentColumns(inputSchema, task.getRemove().get(), acceptUnmatchedColumns);
for (Column column : inputSchema.getColumns()) {
if (!removeColumns.contains(column.getName())) {
outputColumns.add(new Column(index, column.getName(), column.getType()));
indexMapping[column.getIndex()] = index;
index++;
}
}
} else {
// specify keep:
List<String> keepColumns = getExistentColumns(inputSchema, task.getKeep().get(), acceptUnmatchedColumns);
for (Column column : inputSchema.getColumns()) {
if (keepColumns.contains(column.getName())) {
outputColumns.add(new Column(index, column.getName(), column.getType()));
indexMapping[column.getIndex()] = index;
index++;
}
}
}
task.setIndexMapping(indexMapping);
control.run(task.dump(), new Schema(outputColumns.build()));
}
use of org.embulk.spi.Column in project embulk by embulk.
the class CsvFormatterPlugin method open.
@Override
public PageOutput open(TaskSource taskSource, final Schema schema, FileOutput output) {
final PluginTask task = taskSource.loadTask(PluginTask.class);
final LineEncoder encoder = new LineEncoder(output, task);
final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
final char delimiter = task.getDelimiterChar();
final QuotePolicy quotePolicy = task.getQuotePolicy();
final char quote = task.getQuoteChar() != '\0' ? task.getQuoteChar() : '"';
final char escape = task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : quote);
final String newlineInField = task.getNewlineInField().getString();
final String nullString = task.getNullString();
// create a file
encoder.nextFile();
// write header
if (task.getHeaderLine()) {
writeHeader(schema, encoder, delimiter, quotePolicy, quote, escape, newlineInField, nullString);
}
return new PageOutput() {
private final PageReader pageReader = new PageReader(schema);
private final String delimiterString = String.valueOf(delimiter);
public void add(Page page) {
pageReader.setPage(page);
while (pageReader.nextRecord()) {
schema.visitColumns(new ColumnVisitor() {
public void booleanColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
addValue(Boolean.toString(pageReader.getBoolean(column)));
} else {
addNullString();
}
}
public void longColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
addValue(Long.toString(pageReader.getLong(column)));
} else {
addNullString();
}
}
public void doubleColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
addValue(Double.toString(pageReader.getDouble(column)));
} else {
addNullString();
}
}
public void stringColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
addValue(pageReader.getString(column));
} else {
addNullString();
}
}
public void timestampColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
Timestamp value = pageReader.getTimestamp(column);
addValue(timestampFormatters[column.getIndex()].format(value));
} else {
addNullString();
}
}
public void jsonColumn(Column column) {
addDelimiter(column);
if (!pageReader.isNull(column)) {
Value value = pageReader.getJson(column);
addValue(value.toJson());
} else {
addNullString();
}
}
private void addDelimiter(Column column) {
if (column.getIndex() != 0) {
encoder.addText(delimiterString);
}
}
private void addValue(String v) {
encoder.addText(setEscapeAndQuoteValue(v, delimiter, quotePolicy, quote, escape, newlineInField, nullString));
}
private void addNullString() {
encoder.addText(nullString);
}
});
encoder.addNewLine();
}
}
public void finish() {
encoder.finish();
}
public void close() {
encoder.close();
}
};
}
use of org.embulk.spi.Column in project embulk by embulk.
the class RenameFilterPlugin method applyFirstCharacterTypesRule.
private Schema applyFirstCharacterTypesRule(Schema inputSchema, FirstCharacterTypesRule rule) {
final Optional<String> replace = rule.getReplace();
final List<String> passTypes = rule.getPassTypes();
final String passCharacters = rule.getPassCharacters();
final Optional<String> prefix = rule.getPrefix();
if (replace.isPresent() && replace.get().length() != 1) {
throw new ConfigException("\"replace\" in \"first_character_types\" must contain just 1 character if specified");
}
if (prefix.isPresent() && prefix.get().length() != 1) {
throw new ConfigException("\"prefix\" in \"first_character_types\" must contain just 1 character if specified");
}
if (prefix.isPresent() && replace.isPresent()) {
throw new ConfigException("\"replace\" and \"prefix\" in \"first_character_types\" must not be specified together");
}
if ((!prefix.isPresent()) && (!replace.isPresent())) {
throw new ConfigException("Either of \"replace\" or \"prefix\" must be specified in \"first_character_types\"");
}
// TODO(dmikurube): Revisit this for better escaping.
if (passCharacters.contains("\\E")) {
throw new ConfigException("\"pass_characters\" in \"first_character_types\" must not contain \"\\E\"");
}
StringBuilder regexBuilder = new StringBuilder();
regexBuilder.append("^[^");
for (String target : passTypes) {
if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
} else {
throw new ConfigException("\"" + target + "\" is an unknown character type keyword");
}
}
if (!passCharacters.isEmpty()) {
regexBuilder.append("\\Q");
regexBuilder.append(passCharacters);
regexBuilder.append("\\E");
}
regexBuilder.append("].*");
Schema.Builder schemaBuidler = Schema.builder();
for (Column column : inputSchema.getColumns()) {
String name = column.getName();
if (name.matches(regexBuilder.toString())) {
if (replace.isPresent()) {
name = replace.get() + name.substring(1);
} else if (prefix.isPresent()) {
name = prefix.get() + name;
}
}
schemaBuidler.add(name, column.getType());
}
return schemaBuidler.build();
}
use of org.embulk.spi.Column in project embulk by embulk.
the class RenameFilterPlugin method applyCharacterTypesRule.
private Schema applyCharacterTypesRule(Schema inputSchema, CharacterTypesRule rule) {
final List<String> passTypes = rule.getPassTypes();
final String passCharacters = rule.getPassCharacters();
final String replace = rule.getReplace();
if (replace.isEmpty()) {
throw new ConfigException("\"replace\" in \"character_types\" must not be explicitly empty");
}
if (replace.length() != 1) {
throw new ConfigException("\"replace\" in \"character_types\" must contain just 1 character");
}
// TODO(dmikurube): Revisit this for better escaping.
if (passCharacters.contains("\\E")) {
throw new ConfigException("\"pass_characters\" in \"character_types\" must not contain \"\\E\"");
}
StringBuilder regexBuilder = new StringBuilder();
regexBuilder.append("[^");
for (String target : passTypes) {
if (CHARACTER_TYPE_KEYWORDS.containsKey(target)) {
regexBuilder.append(CHARACTER_TYPE_KEYWORDS.get(target));
} else {
throw new ConfigException("\"" + target + "\" is an unknown character type keyword");
}
}
if (!passCharacters.isEmpty()) {
regexBuilder.append("\\Q");
regexBuilder.append(passCharacters);
regexBuilder.append("\\E");
}
regexBuilder.append("]");
Schema.Builder schemaBuilder = Schema.builder();
for (Column column : inputSchema.getColumns()) {
schemaBuilder.add(column.getName().replaceAll(regexBuilder.toString(), replace), column.getType());
}
return schemaBuilder.build();
}
use of org.embulk.spi.Column in project embulk by embulk.
the class RenameFilterPlugin method transaction.
@Override
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) {
PluginTask task = config.loadConfig(PluginTask.class);
Map<String, String> renameMap = task.getRenameMap();
List<ConfigSource> rulesList = task.getRulesList();
// Check if the given column in "columns" exists or not.
for (String columnName : renameMap.keySet()) {
// throws SchemaConfigException
inputSchema.lookupColumn(columnName);
}
// Rename by "columns": to be applied before "rules".
Schema.Builder builder = Schema.builder();
for (Column column : inputSchema.getColumns()) {
String name = column.getName();
if (renameMap.containsKey(name)) {
name = renameMap.get(name);
}
builder.add(name, column.getType());
}
Schema intermediateSchema = builder.build();
// Rename by "rules".
Schema outputSchema = intermediateSchema;
for (ConfigSource rule : rulesList) {
outputSchema = applyRule(rule, intermediateSchema);
intermediateSchema = outputSchema;
}
control.run(task.dump(), outputSchema);
}
Aggregations