use of org.embulk.spi.Schema in project embulk by embulk.
the class SamplingParserPlugin method runFileInputSampling.
public static Buffer runFileInputSampling(final FileInputRunner runner, ConfigSource inputConfig, ConfigSource sampleBufferConfig) {
final SampleBufferTask sampleBufferTask = sampleBufferConfig.loadConfig(SampleBufferTask.class);
// override in.parser.type so that FileInputRunner creates SamplingParserPlugin
ConfigSource samplingInputConfig = inputConfig.deepCopy();
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling").set("sample_buffer_bytes", sampleBufferTask.getSampleBufferBytes());
samplingInputConfig.set("decoders", null);
try {
runner.transaction(samplingInputConfig, new InputPlugin.Control() {
public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount) {
if (taskCount == 0) {
throw new NoSampleException("No input files to read sample data");
}
int maxSize = -1;
int maxSizeTaskIndex = -1;
for (int taskIndex = 0; taskIndex < taskCount; taskIndex++) {
try {
runner.run(taskSource, schema, taskIndex, new PageOutput() {
@Override
public void add(Page page) {
// TODO exception class
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
}
public void finish() {
}
public void close() {
}
});
} catch (NotEnoughSampleError ex) {
if (maxSize < ex.getSize()) {
maxSize = ex.getSize();
maxSizeTaskIndex = taskIndex;
}
continue;
}
}
if (maxSize <= 0) {
throw new NoSampleException("All input files are empty");
}
taskSource.getNested("ParserTaskSource").set("force", true);
try {
runner.run(taskSource, schema, maxSizeTaskIndex, new PageOutput() {
@Override
public void add(Page page) {
// TODO exception class
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
}
public void finish() {
}
public void close() {
}
});
} catch (NotEnoughSampleError ex) {
throw new NoSampleException("All input files are smaller than minimum sampling size");
}
throw new NoSampleException("All input files are smaller than minimum sampling size");
}
});
throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
} catch (SampledNoticeError error) {
return error.getSample();
}
}
use of org.embulk.spi.Schema in project MiscellaneousStudy by mikoto2000.
the class MyPageOutput method transaction.
@Override
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) {
System.out.println("transaction!");
System.out.print("config: ");
System.out.println(config);
System.out.print("inputSchema: ");
System.out.println(inputSchema);
System.out.print("control: ");
System.out.println(control);
PluginTask task = config.loadConfig(PluginTask.class);
java.util.List<Column> newSchemaColumns = inputSchema.getColumns();
Schema.Builder builder = Schema.builder();
// 連番カラムを追加
builder.add("lineNumber", Types.LONG);
for (Column column : newSchemaColumns) {
builder.add(column.getName(), column.getType());
}
// 追加文字列カラムを追加
builder.add("additional", Types.STRING);
Schema outputSchema = builder.build();
control.run(task.dump(), outputSchema);
}
Aggregations