Search in sources :

Example 1 with JDBCUtil

use of org.apache.tika.eval.db.JDBCUtil in project tika by apache.

the class EvalConsumerBuilder method init.

public MimeBuffer init(ArrayBlockingQueue<FileResource> queue, Map<String, String> localAttrs, JDBCUtil dbUtil, boolean forceDrop) throws IOException, SQLException {
    if (initialized.getAndIncrement() > 0) {
        throw new RuntimeException("Can only init a consumer builder once!");
    }
    this.queue = queue;
    this.localAttrs = localAttrs;
    this.dbUtil = dbUtil;
    //the order of the following is critical
    //step 1. update the table names with prefixes
    updateTableInfosWithPrefixes(localAttrs);
    JDBCUtil.CREATE_TABLE createRegularTable = (forceDrop) ? JDBCUtil.CREATE_TABLE.DROP_IF_EXISTS : JDBCUtil.CREATE_TABLE.THROW_EX_IF_EXISTS;
    JDBCUtil.CREATE_TABLE createRefTable = (forceDrop) ? JDBCUtil.CREATE_TABLE.DROP_IF_EXISTS : JDBCUtil.CREATE_TABLE.SKIP_IF_EXISTS;
    //step 2. create the tables
    dbUtil.createTables(getNonRefTableInfos(), createRegularTable);
    dbUtil.createTables(getRefTableInfos(), createRefTable);
    //step 3. create mime buffer
    this.mimeBuffer = new MimeBuffer(dbUtil.getConnection(), TikaConfig.getDefaultConfig());
    //step 4. populate the reference tabless
    populateRefTables();
    return mimeBuffer;
}
Also used : JDBCUtil(org.apache.tika.eval.db.JDBCUtil) MimeBuffer(org.apache.tika.eval.db.MimeBuffer)

Example 2 with JDBCUtil

use of org.apache.tika.eval.db.JDBCUtil in project tika by apache.

the class EvalConsumersBuilder method build.

@Override
public ConsumersManager build(Node node, Map<String, String> runtimeAttributes, ArrayBlockingQueue<FileResource> queue) {
    List<FileResourceConsumer> consumers = new LinkedList<>();
    int numConsumers = BatchProcessBuilder.getNumConsumers(runtimeAttributes);
    Map<String, String> localAttrs = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
    Path db = getPath(localAttrs, "db");
    String jdbcConnectionString = localAttrs.get("jdbc");
    Path langModelDir = getPath(localAttrs, "langModelDir");
    try {
        if (langModelDir == null) {
            LanguageIDWrapper.loadBuiltInModels();
        } else {
            LanguageIDWrapper.loadModels(langModelDir);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Path commonTokens = getPath(localAttrs, "commonTokens");
    String defaultLangCode = localAttrs.get("defaultLangCode");
    if (defaultLangCode == null || "".equals(defaultLangCode)) {
        defaultLangCode = "en";
    }
    //can be null, in which case will load from memory
    try {
        AbstractProfiler.loadCommonTokens(commonTokens, defaultLangCode);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    JDBCUtil jdbcUtil = null;
    if (db != null) {
        jdbcUtil = new H2Util(db);
    } else if (jdbcConnectionString != null) {
        jdbcUtil = new JDBCUtil(jdbcConnectionString, localAttrs.get("jdbcDriver"));
    } else {
        throw new RuntimeException("Must specify: -db or -jdbc");
    }
    EvalConsumerBuilder consumerBuilder = ClassLoaderUtil.buildClass(EvalConsumerBuilder.class, PropsUtil.getString(localAttrs.get("consumerBuilderClass"), null));
    if (consumerBuilder == null) {
        throw new RuntimeException("Must specify consumerBuilderClass in config file");
    }
    boolean forceDrop = PropsUtil.getBoolean(localAttrs.get("drop"), false);
    MimeBuffer mimeBuffer = null;
    try {
        mimeBuffer = consumerBuilder.init(queue, localAttrs, jdbcUtil, forceDrop);
    } catch (IOException | SQLException e) {
        throw new RuntimeException(e);
    }
    for (int i = 0; i < numConsumers; i++) {
        try {
            consumers.add(consumerBuilder.build());
        } catch (IOException | SQLException e) {
            throw new RuntimeException(e);
        }
    }
    DBConsumersManager manager;
    try {
        manager = new DBConsumersManager(jdbcUtil, mimeBuffer, consumers);
    } catch (SQLException e) {
        throw new RuntimeException(e);
    }
    consumerBuilder.addErrorLogTablePairs(manager);
    return manager;
}
Also used : Path(java.nio.file.Path) SQLException(java.sql.SQLException) H2Util(org.apache.tika.eval.db.H2Util) JDBCUtil(org.apache.tika.eval.db.JDBCUtil) MimeBuffer(org.apache.tika.eval.db.MimeBuffer) IOException(java.io.IOException) LinkedList(java.util.LinkedList) FileResourceConsumer(org.apache.tika.batch.FileResourceConsumer)

Example 3 with JDBCUtil

use of org.apache.tika.eval.db.JDBCUtil in project tika by apache.

the class ResultsReporter method main.

public static void main(String[] args) throws Exception {
    DefaultParser defaultCLIParser = new DefaultParser();
    CommandLine commandLine = null;
    try {
        commandLine = defaultCLIParser.parse(OPTIONS, args);
    } catch (ParseException e) {
        System.out.println(e.getMessage());
        USAGE();
        return;
    }
    JDBCUtil dbUtil = null;
    if (commandLine.hasOption("db")) {
        Path db = Paths.get(commandLine.getOptionValue("db"));
        if (!H2Util.databaseExists(db)) {
            throw new RuntimeException("I'm sorry, but I couldn't find this h2 database: " + db + "\nMake sure not to include the .mv.db at the end.");
        }
        dbUtil = new H2Util(db);
    } else if (commandLine.hasOption("jdbc")) {
        String driverClass = null;
        if (commandLine.hasOption("jdbcdriver")) {
            driverClass = commandLine.getOptionValue("jdbcdriver");
        }
        dbUtil = new JDBCUtil(commandLine.getOptionValue("jdbc"), driverClass);
    } else {
        System.err.println("Must specify either -db for the default in-memory h2 database\n" + "or -jdbc for a full jdbc connection string");
        USAGE();
        return;
    }
    try (Connection c = dbUtil.getConnection()) {
        Path tmpReportsFile = null;
        try {
            ResultsReporter resultsReporter = null;
            String reportsFile = commandLine.getOptionValue("rf");
            if (reportsFile == null) {
                tmpReportsFile = getDefaultReportsConfig(c);
                resultsReporter = ResultsReporter.build(tmpReportsFile);
            } else {
                resultsReporter = ResultsReporter.build(Paths.get(reportsFile));
            }
            Path reportsRootDirectory = Paths.get(commandLine.getOptionValue("rd", "reports"));
            if (Files.isDirectory(reportsRootDirectory)) {
                LOG.warn("'Reports' directory exists.  Will overwrite existing reports.");
            }
            resultsReporter.execute(c, reportsRootDirectory);
        } finally {
            if (tmpReportsFile != null) {
                Files.delete(tmpReportsFile);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) CommandLine(org.apache.commons.cli.CommandLine) Connection(java.sql.Connection) H2Util(org.apache.tika.eval.db.H2Util) JDBCUtil(org.apache.tika.eval.db.JDBCUtil) ParseException(org.apache.commons.cli.ParseException) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 4 with JDBCUtil

use of org.apache.tika.eval.db.JDBCUtil in project tika by apache.

the class XMLErrorLogUpdater method main.

public static void main(String[] args) throws Exception {
    XMLErrorLogUpdater writer = new XMLErrorLogUpdater();
    Path xmlLogFileA = Paths.get(args[0]);
    Path xmlLogFileB = Paths.get(args[1]);
    Path db = Paths.get(args[2]);
    JDBCUtil dbUtil = new H2Util(db);
    Connection connection = dbUtil.getConnection();
    writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_A, xmlLogFileA);
    writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_B, xmlLogFileB);
    connection.commit();
    connection.close();
}
Also used : Path(java.nio.file.Path) Connection(java.sql.Connection) H2Util(org.apache.tika.eval.db.H2Util) JDBCUtil(org.apache.tika.eval.db.JDBCUtil)

Aggregations

JDBCUtil (org.apache.tika.eval.db.JDBCUtil)4 Path (java.nio.file.Path)3 H2Util (org.apache.tika.eval.db.H2Util)3 Connection (java.sql.Connection)2 MimeBuffer (org.apache.tika.eval.db.MimeBuffer)2 IOException (java.io.IOException)1 SQLException (java.sql.SQLException)1 LinkedList (java.util.LinkedList)1 CommandLine (org.apache.commons.cli.CommandLine)1 DefaultParser (org.apache.commons.cli.DefaultParser)1 ParseException (org.apache.commons.cli.ParseException)1 FileResourceConsumer (org.apache.tika.batch.FileResourceConsumer)1