use of org.apache.tika.eval.db.MimeBuffer in project tika by apache.
the class EvalConsumerBuilder method init.
public MimeBuffer init(ArrayBlockingQueue<FileResource> queue, Map<String, String> localAttrs, JDBCUtil dbUtil, boolean forceDrop) throws IOException, SQLException {
if (initialized.getAndIncrement() > 0) {
throw new RuntimeException("Can only init a consumer builder once!");
}
this.queue = queue;
this.localAttrs = localAttrs;
this.dbUtil = dbUtil;
//the order of the following is critical
//step 1. update the table names with prefixes
updateTableInfosWithPrefixes(localAttrs);
JDBCUtil.CREATE_TABLE createRegularTable = (forceDrop) ? JDBCUtil.CREATE_TABLE.DROP_IF_EXISTS : JDBCUtil.CREATE_TABLE.THROW_EX_IF_EXISTS;
JDBCUtil.CREATE_TABLE createRefTable = (forceDrop) ? JDBCUtil.CREATE_TABLE.DROP_IF_EXISTS : JDBCUtil.CREATE_TABLE.SKIP_IF_EXISTS;
//step 2. create the tables
dbUtil.createTables(getNonRefTableInfos(), createRegularTable);
dbUtil.createTables(getRefTableInfos(), createRefTable);
//step 3. create mime buffer
this.mimeBuffer = new MimeBuffer(dbUtil.getConnection(), TikaConfig.getDefaultConfig());
//step 4. populate the reference tabless
populateRefTables();
return mimeBuffer;
}
use of org.apache.tika.eval.db.MimeBuffer in project tika by apache.
the class EvalConsumersBuilder method build.
@Override
public ConsumersManager build(Node node, Map<String, String> runtimeAttributes, ArrayBlockingQueue<FileResource> queue) {
List<FileResourceConsumer> consumers = new LinkedList<>();
int numConsumers = BatchProcessBuilder.getNumConsumers(runtimeAttributes);
Map<String, String> localAttrs = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
Path db = getPath(localAttrs, "db");
String jdbcConnectionString = localAttrs.get("jdbc");
Path langModelDir = getPath(localAttrs, "langModelDir");
try {
if (langModelDir == null) {
LanguageIDWrapper.loadBuiltInModels();
} else {
LanguageIDWrapper.loadModels(langModelDir);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
Path commonTokens = getPath(localAttrs, "commonTokens");
String defaultLangCode = localAttrs.get("defaultLangCode");
if (defaultLangCode == null || "".equals(defaultLangCode)) {
defaultLangCode = "en";
}
//can be null, in which case will load from memory
try {
AbstractProfiler.loadCommonTokens(commonTokens, defaultLangCode);
} catch (IOException e) {
throw new RuntimeException(e);
}
JDBCUtil jdbcUtil = null;
if (db != null) {
jdbcUtil = new H2Util(db);
} else if (jdbcConnectionString != null) {
jdbcUtil = new JDBCUtil(jdbcConnectionString, localAttrs.get("jdbcDriver"));
} else {
throw new RuntimeException("Must specify: -db or -jdbc");
}
EvalConsumerBuilder consumerBuilder = ClassLoaderUtil.buildClass(EvalConsumerBuilder.class, PropsUtil.getString(localAttrs.get("consumerBuilderClass"), null));
if (consumerBuilder == null) {
throw new RuntimeException("Must specify consumerBuilderClass in config file");
}
boolean forceDrop = PropsUtil.getBoolean(localAttrs.get("drop"), false);
MimeBuffer mimeBuffer = null;
try {
mimeBuffer = consumerBuilder.init(queue, localAttrs, jdbcUtil, forceDrop);
} catch (IOException | SQLException e) {
throw new RuntimeException(e);
}
for (int i = 0; i < numConsumers; i++) {
try {
consumers.add(consumerBuilder.build());
} catch (IOException | SQLException e) {
throw new RuntimeException(e);
}
}
DBConsumersManager manager;
try {
manager = new DBConsumersManager(jdbcUtil, mimeBuffer, consumers);
} catch (SQLException e) {
throw new RuntimeException(e);
}
consumerBuilder.addErrorLogTablePairs(manager);
return manager;
}
Aggregations