use of org.apache.tika.eval.db.H2Util in project tika by apache.
the class ProfilerBatchTest method setUp.
@BeforeClass
public static void setUp() throws Exception {
Path inputRoot = Paths.get(new ComparerBatchTest().getClass().getResource("/test-dirs/extractsA").toURI());
dbDir = Files.createTempDirectory(inputRoot, "tika-test-db-dir-");
Map<String, String> args = new HashMap<>();
Path db = dbDir.resolve("profiler_test");
args.put("-db", db.toString());
//for debugging, you can use this to select only one file pair to load
//args.put("-includeFilePat", "file8.*");
/* BatchProcessTestExecutor ex = new BatchProcessTestExecutor(COMPARER_PROCESS_CLASS, args,
"/single-file-profiler-crawl-input-config.xml");
StreamStrings streamStrings = ex.execute();
System.out.println(streamStrings.getErrString());
System.out.println(streamStrings.getOutString());*/
H2Util dbUtil = new H2Util(db);
conn = dbUtil.getConnection();
}
use of org.apache.tika.eval.db.H2Util in project tika by apache.
the class ResultsReporterTest method setUp.
@Before
public void setUp() throws Exception {
configFile = Paths.get(this.getClass().getResource("/reports.xml").toURI());
tmpDir = Files.createTempDirectory("tika-eval-report-test-");
connection = new H2Util(tmpDir.resolve(dbName)).getConnection();
String sql = "CREATE TABLE test_table (ID LONG PRIMARY KEY, STRING VARCHAR(32))";
Statement st = connection.createStatement();
st.execute(sql);
sql = "INSERT into test_table values ( 100000, 'the quick brown')";
st.execute(sql);
sql = "INSERT into test_table values (123456789, 'fox jumped over')";
st.execute(sql);
connection.commit();
}
use of org.apache.tika.eval.db.H2Util in project tika by apache.
the class EvalConsumersBuilder method build.
@Override
public ConsumersManager build(Node node, Map<String, String> runtimeAttributes, ArrayBlockingQueue<FileResource> queue) {
List<FileResourceConsumer> consumers = new LinkedList<>();
int numConsumers = BatchProcessBuilder.getNumConsumers(runtimeAttributes);
Map<String, String> localAttrs = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
Path db = getPath(localAttrs, "db");
String jdbcConnectionString = localAttrs.get("jdbc");
Path langModelDir = getPath(localAttrs, "langModelDir");
try {
if (langModelDir == null) {
LanguageIDWrapper.loadBuiltInModels();
} else {
LanguageIDWrapper.loadModels(langModelDir);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
Path commonTokens = getPath(localAttrs, "commonTokens");
String defaultLangCode = localAttrs.get("defaultLangCode");
if (defaultLangCode == null || "".equals(defaultLangCode)) {
defaultLangCode = "en";
}
//can be null, in which case will load from memory
try {
AbstractProfiler.loadCommonTokens(commonTokens, defaultLangCode);
} catch (IOException e) {
throw new RuntimeException(e);
}
JDBCUtil jdbcUtil = null;
if (db != null) {
jdbcUtil = new H2Util(db);
} else if (jdbcConnectionString != null) {
jdbcUtil = new JDBCUtil(jdbcConnectionString, localAttrs.get("jdbcDriver"));
} else {
throw new RuntimeException("Must specify: -db or -jdbc");
}
EvalConsumerBuilder consumerBuilder = ClassLoaderUtil.buildClass(EvalConsumerBuilder.class, PropsUtil.getString(localAttrs.get("consumerBuilderClass"), null));
if (consumerBuilder == null) {
throw new RuntimeException("Must specify consumerBuilderClass in config file");
}
boolean forceDrop = PropsUtil.getBoolean(localAttrs.get("drop"), false);
MimeBuffer mimeBuffer = null;
try {
mimeBuffer = consumerBuilder.init(queue, localAttrs, jdbcUtil, forceDrop);
} catch (IOException | SQLException e) {
throw new RuntimeException(e);
}
for (int i = 0; i < numConsumers; i++) {
try {
consumers.add(consumerBuilder.build());
} catch (IOException | SQLException e) {
throw new RuntimeException(e);
}
}
DBConsumersManager manager;
try {
manager = new DBConsumersManager(jdbcUtil, mimeBuffer, consumers);
} catch (SQLException e) {
throw new RuntimeException(e);
}
consumerBuilder.addErrorLogTablePairs(manager);
return manager;
}
use of org.apache.tika.eval.db.H2Util in project tika by apache.
the class ResultsReporter method main.
public static void main(String[] args) throws Exception {
DefaultParser defaultCLIParser = new DefaultParser();
CommandLine commandLine = null;
try {
commandLine = defaultCLIParser.parse(OPTIONS, args);
} catch (ParseException e) {
System.out.println(e.getMessage());
USAGE();
return;
}
JDBCUtil dbUtil = null;
if (commandLine.hasOption("db")) {
Path db = Paths.get(commandLine.getOptionValue("db"));
if (!H2Util.databaseExists(db)) {
throw new RuntimeException("I'm sorry, but I couldn't find this h2 database: " + db + "\nMake sure not to include the .mv.db at the end.");
}
dbUtil = new H2Util(db);
} else if (commandLine.hasOption("jdbc")) {
String driverClass = null;
if (commandLine.hasOption("jdbcdriver")) {
driverClass = commandLine.getOptionValue("jdbcdriver");
}
dbUtil = new JDBCUtil(commandLine.getOptionValue("jdbc"), driverClass);
} else {
System.err.println("Must specify either -db for the default in-memory h2 database\n" + "or -jdbc for a full jdbc connection string");
USAGE();
return;
}
try (Connection c = dbUtil.getConnection()) {
Path tmpReportsFile = null;
try {
ResultsReporter resultsReporter = null;
String reportsFile = commandLine.getOptionValue("rf");
if (reportsFile == null) {
tmpReportsFile = getDefaultReportsConfig(c);
resultsReporter = ResultsReporter.build(tmpReportsFile);
} else {
resultsReporter = ResultsReporter.build(Paths.get(reportsFile));
}
Path reportsRootDirectory = Paths.get(commandLine.getOptionValue("rd", "reports"));
if (Files.isDirectory(reportsRootDirectory)) {
LOG.warn("'Reports' directory exists. Will overwrite existing reports.");
}
resultsReporter.execute(c, reportsRootDirectory);
} finally {
if (tmpReportsFile != null) {
Files.delete(tmpReportsFile);
}
}
}
}
use of org.apache.tika.eval.db.H2Util in project tika by apache.
the class XMLErrorLogUpdater method main.
public static void main(String[] args) throws Exception {
XMLErrorLogUpdater writer = new XMLErrorLogUpdater();
Path xmlLogFileA = Paths.get(args[0]);
Path xmlLogFileB = Paths.get(args[1]);
Path db = Paths.get(args[2]);
JDBCUtil dbUtil = new H2Util(db);
Connection connection = dbUtil.getConnection();
writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_A, xmlLogFileA);
writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_B, xmlLogFileB);
connection.commit();
connection.close();
}
Aggregations