Search in sources :

Example 1 with H2Util

use of org.apache.tika.eval.db.H2Util in project tika by apache.

the class ProfilerBatchTest method setUp.

@BeforeClass
public static void setUp() throws Exception {
    Path inputRoot = Paths.get(new ComparerBatchTest().getClass().getResource("/test-dirs/extractsA").toURI());
    dbDir = Files.createTempDirectory(inputRoot, "tika-test-db-dir-");
    Map<String, String> args = new HashMap<>();
    Path db = dbDir.resolve("profiler_test");
    args.put("-db", db.toString());
    //for debugging, you can use this to select only one file pair to load
    //args.put("-includeFilePat", "file8.*");
    /* BatchProcessTestExecutor ex = new BatchProcessTestExecutor(COMPARER_PROCESS_CLASS, args,
                "/single-file-profiler-crawl-input-config.xml");
        StreamStrings streamStrings = ex.execute();
        System.out.println(streamStrings.getErrString());
        System.out.println(streamStrings.getOutString());*/
    H2Util dbUtil = new H2Util(db);
    conn = dbUtil.getConnection();
}
Also used : Path(java.nio.file.Path) HashMap(java.util.HashMap) H2Util(org.apache.tika.eval.db.H2Util) BeforeClass(org.junit.BeforeClass)

Example 2 with H2Util

use of org.apache.tika.eval.db.H2Util in project tika by apache.

the class ResultsReporterTest method setUp.

@Before
public void setUp() throws Exception {
    configFile = Paths.get(this.getClass().getResource("/reports.xml").toURI());
    tmpDir = Files.createTempDirectory("tika-eval-report-test-");
    connection = new H2Util(tmpDir.resolve(dbName)).getConnection();
    String sql = "CREATE TABLE test_table (ID LONG PRIMARY KEY, STRING VARCHAR(32))";
    Statement st = connection.createStatement();
    st.execute(sql);
    sql = "INSERT into test_table values ( 100000, 'the quick brown')";
    st.execute(sql);
    sql = "INSERT into test_table values (123456789, 'fox jumped over')";
    st.execute(sql);
    connection.commit();
}
Also used : Statement(java.sql.Statement) H2Util(org.apache.tika.eval.db.H2Util) Before(org.junit.Before)

Example 3 with H2Util

use of org.apache.tika.eval.db.H2Util in project tika by apache.

the class EvalConsumersBuilder method build.

@Override
public ConsumersManager build(Node node, Map<String, String> runtimeAttributes, ArrayBlockingQueue<FileResource> queue) {
    List<FileResourceConsumer> consumers = new LinkedList<>();
    int numConsumers = BatchProcessBuilder.getNumConsumers(runtimeAttributes);
    Map<String, String> localAttrs = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
    Path db = getPath(localAttrs, "db");
    String jdbcConnectionString = localAttrs.get("jdbc");
    Path langModelDir = getPath(localAttrs, "langModelDir");
    try {
        if (langModelDir == null) {
            LanguageIDWrapper.loadBuiltInModels();
        } else {
            LanguageIDWrapper.loadModels(langModelDir);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Path commonTokens = getPath(localAttrs, "commonTokens");
    String defaultLangCode = localAttrs.get("defaultLangCode");
    if (defaultLangCode == null || "".equals(defaultLangCode)) {
        defaultLangCode = "en";
    }
    //can be null, in which case will load from memory
    try {
        AbstractProfiler.loadCommonTokens(commonTokens, defaultLangCode);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    JDBCUtil jdbcUtil = null;
    if (db != null) {
        jdbcUtil = new H2Util(db);
    } else if (jdbcConnectionString != null) {
        jdbcUtil = new JDBCUtil(jdbcConnectionString, localAttrs.get("jdbcDriver"));
    } else {
        throw new RuntimeException("Must specify: -db or -jdbc");
    }
    EvalConsumerBuilder consumerBuilder = ClassLoaderUtil.buildClass(EvalConsumerBuilder.class, PropsUtil.getString(localAttrs.get("consumerBuilderClass"), null));
    if (consumerBuilder == null) {
        throw new RuntimeException("Must specify consumerBuilderClass in config file");
    }
    boolean forceDrop = PropsUtil.getBoolean(localAttrs.get("drop"), false);
    MimeBuffer mimeBuffer = null;
    try {
        mimeBuffer = consumerBuilder.init(queue, localAttrs, jdbcUtil, forceDrop);
    } catch (IOException | SQLException e) {
        throw new RuntimeException(e);
    }
    for (int i = 0; i < numConsumers; i++) {
        try {
            consumers.add(consumerBuilder.build());
        } catch (IOException | SQLException e) {
            throw new RuntimeException(e);
        }
    }
    DBConsumersManager manager;
    try {
        manager = new DBConsumersManager(jdbcUtil, mimeBuffer, consumers);
    } catch (SQLException e) {
        throw new RuntimeException(e);
    }
    consumerBuilder.addErrorLogTablePairs(manager);
    return manager;
}
Also used : Path(java.nio.file.Path) SQLException(java.sql.SQLException) H2Util(org.apache.tika.eval.db.H2Util) JDBCUtil(org.apache.tika.eval.db.JDBCUtil) MimeBuffer(org.apache.tika.eval.db.MimeBuffer) IOException(java.io.IOException) LinkedList(java.util.LinkedList) FileResourceConsumer(org.apache.tika.batch.FileResourceConsumer)

Example 4 with H2Util

use of org.apache.tika.eval.db.H2Util in project tika by apache.

the class ResultsReporter method main.

public static void main(String[] args) throws Exception {
    DefaultParser defaultCLIParser = new DefaultParser();
    CommandLine commandLine = null;
    try {
        commandLine = defaultCLIParser.parse(OPTIONS, args);
    } catch (ParseException e) {
        System.out.println(e.getMessage());
        USAGE();
        return;
    }
    JDBCUtil dbUtil = null;
    if (commandLine.hasOption("db")) {
        Path db = Paths.get(commandLine.getOptionValue("db"));
        if (!H2Util.databaseExists(db)) {
            throw new RuntimeException("I'm sorry, but I couldn't find this h2 database: " + db + "\nMake sure not to include the .mv.db at the end.");
        }
        dbUtil = new H2Util(db);
    } else if (commandLine.hasOption("jdbc")) {
        String driverClass = null;
        if (commandLine.hasOption("jdbcdriver")) {
            driverClass = commandLine.getOptionValue("jdbcdriver");
        }
        dbUtil = new JDBCUtil(commandLine.getOptionValue("jdbc"), driverClass);
    } else {
        System.err.println("Must specify either -db for the default in-memory h2 database\n" + "or -jdbc for a full jdbc connection string");
        USAGE();
        return;
    }
    try (Connection c = dbUtil.getConnection()) {
        Path tmpReportsFile = null;
        try {
            ResultsReporter resultsReporter = null;
            String reportsFile = commandLine.getOptionValue("rf");
            if (reportsFile == null) {
                tmpReportsFile = getDefaultReportsConfig(c);
                resultsReporter = ResultsReporter.build(tmpReportsFile);
            } else {
                resultsReporter = ResultsReporter.build(Paths.get(reportsFile));
            }
            Path reportsRootDirectory = Paths.get(commandLine.getOptionValue("rd", "reports"));
            if (Files.isDirectory(reportsRootDirectory)) {
                LOG.warn("'Reports' directory exists.  Will overwrite existing reports.");
            }
            resultsReporter.execute(c, reportsRootDirectory);
        } finally {
            if (tmpReportsFile != null) {
                Files.delete(tmpReportsFile);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) CommandLine(org.apache.commons.cli.CommandLine) Connection(java.sql.Connection) H2Util(org.apache.tika.eval.db.H2Util) JDBCUtil(org.apache.tika.eval.db.JDBCUtil) ParseException(org.apache.commons.cli.ParseException) DefaultParser(org.apache.commons.cli.DefaultParser)

Example 5 with H2Util

use of org.apache.tika.eval.db.H2Util in project tika by apache.

the class XMLErrorLogUpdater method main.

public static void main(String[] args) throws Exception {
    XMLErrorLogUpdater writer = new XMLErrorLogUpdater();
    Path xmlLogFileA = Paths.get(args[0]);
    Path xmlLogFileB = Paths.get(args[1]);
    Path db = Paths.get(args[2]);
    JDBCUtil dbUtil = new H2Util(db);
    Connection connection = dbUtil.getConnection();
    writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_A, xmlLogFileA);
    writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_B, xmlLogFileB);
    connection.commit();
    connection.close();
}
Also used : Path(java.nio.file.Path) Connection(java.sql.Connection) H2Util(org.apache.tika.eval.db.H2Util) JDBCUtil(org.apache.tika.eval.db.JDBCUtil)

Aggregations

H2Util (org.apache.tika.eval.db.H2Util)5 Path (java.nio.file.Path)4 JDBCUtil (org.apache.tika.eval.db.JDBCUtil)3 Connection (java.sql.Connection)2 IOException (java.io.IOException)1 SQLException (java.sql.SQLException)1 Statement (java.sql.Statement)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 CommandLine (org.apache.commons.cli.CommandLine)1 DefaultParser (org.apache.commons.cli.DefaultParser)1 ParseException (org.apache.commons.cli.ParseException)1 FileResourceConsumer (org.apache.tika.batch.FileResourceConsumer)1 MimeBuffer (org.apache.tika.eval.db.MimeBuffer)1 Before (org.junit.Before)1 BeforeClass (org.junit.BeforeClass)1