use of org.embulk.spi.PageOutput in project embulk by embulk.
the class SamplingParserPlugin method runFileInputSampling.
public static Buffer runFileInputSampling(final FileInputRunner runner, ConfigSource inputConfig, ConfigSource sampleBufferConfig) {
final SampleBufferTask sampleBufferTask = sampleBufferConfig.loadConfig(SampleBufferTask.class);
// override in.parser.type so that FileInputRunner creates SamplingParserPlugin
ConfigSource samplingInputConfig = inputConfig.deepCopy();
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling").set("sample_buffer_bytes", sampleBufferTask.getSampleBufferBytes());
samplingInputConfig.set("decoders", null);
try {
runner.transaction(samplingInputConfig, new InputPlugin.Control() {
public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount) {
if (taskCount == 0) {
throw new NoSampleException("No input files to read sample data");
}
int maxSize = -1;
int maxSizeTaskIndex = -1;
for (int taskIndex = 0; taskIndex < taskCount; taskIndex++) {
try {
runner.run(taskSource, schema, taskIndex, new PageOutput() {
@Override
public void add(Page page) {
// TODO exception class
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
}
public void finish() {
}
public void close() {
}
});
} catch (NotEnoughSampleError ex) {
if (maxSize < ex.getSize()) {
maxSize = ex.getSize();
maxSizeTaskIndex = taskIndex;
}
continue;
}
}
if (maxSize <= 0) {
throw new NoSampleException("All input files are empty");
}
taskSource.getNested("ParserTaskSource").set("force", true);
try {
runner.run(taskSource, schema, maxSizeTaskIndex, new PageOutput() {
@Override
public void add(Page page) {
// TODO exception class
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration");
}
public void finish() {
}
public void close() {
}
});
} catch (NotEnoughSampleError ex) {
throw new NoSampleException("All input files are smaller than minimum sampling size");
}
throw new NoSampleException("All input files are smaller than minimum sampling size");
}
});
throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
} catch (SampledNoticeError error) {
return error.getSample();
}
}
Aggregations