Search in sources :

Example 1 with CorpusInfo

use of org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusInfo in project stanbol by apache.

the class FstLinkingEngineTest method setup.

@BeforeClass
public static void setup() throws Exception {
    // get the working directory
    // use property substitution to test this feature!
    String prefix = System.getProperty("basedir") == null ? "." : "${basedir}";
    String solrServerDir = prefix + TEST_INDEX_REL_PATH;
    log.info("Test Solr Server Directory: {}", solrServerDir);
    System.setProperty(ManagedSolrServer.MANAGED_SOLR_DIR_PROPERTY, solrServerDir);
    SolrYardConfig config = new SolrYardConfig(TEST_YARD_ID, TEST_SOLR_CORE_NAME);
    config.setAllowInitialisation(false);
    //the dbpedia default data
    config.setIndexConfigurationName(TEST_SOLR_CORE_CONFIGURATION);
    //init from datafile provider
    config.setAllowInitialisation(true);
    config.setName("DBpedia.org default data");
    config.setDescription("Data used for FstLinkingEngie tests");
    // create the Yard used for the tests
    IndexReference solrIndexRef = IndexReference.parse(config.getSolrServerLocation());
    SolrServer server = StandaloneEmbeddedSolrServerProvider.getInstance().getSolrServer(solrIndexRef, config.getIndexConfigurationName());
    Assert.assertNotNull("Unable to initialise SolrServer for testing", server);
    core = ((EmbeddedSolrServer) server).getCoreContainer().getCore(solrIndexRef.getIndex());
    Assert.assertNotNull("Unable to get SolrCore '" + config.getIndexConfigurationName() + "' from SolrServer " + server, core);
    yard = new SolrYard(server, config, null);
    //setup the index configuration
    LanguageConfiguration langConf = new LanguageConfiguration("not.used", new String[] { "en;field=dbpedia-ont:surfaceForm;generate=true" });
    fstConfig = new IndexConfiguration(langConf, core, FieldEncodingEnum.SolrYard, "");
    fstConfig.setExecutorService(Executors.newFixedThreadPool(1));
    fstConfig.setTypeField("rdf:type");
    fstConfig.setRankingField("entityhub:entityRank");
    //fstConfig.setEntityCacheManager(new FastLRUCacheManager(2048));
    fstConfig.setOrigin(new PlainLiteralImpl(TEST_ORIGIN));
    //activate the FST config
    //activate this configuration
    fstConfig.activate();
    //validate that the index contains the expected entities
    validateTestIndex();
    //now create the FST models
    List<Future<?>> creationTasks = new ArrayList<Future<?>>();
    for (CorpusInfo corpus : fstConfig.getCorpora()) {
        Assert.assertTrue("Failure in UnitTest - all FST models need to be generate=true", corpus.allowCreation);
        if (!corpus.isFstFile()) {
            //create a task on the FST corpus creation service
            creationTasks.add(fstConfig.getExecutorService().submit(new CorpusCreationTask(fstConfig, corpus)));
        }
    }
    //typical hardware
    for (Future<?> future : creationTasks) {
        try {
            future.get(FST_CREATION_WAIT_TIME, TimeUnit.SECONDS);
        } catch (TimeoutException e) {
        // we assert on future.isDone instead
        }
        Assert.assertTrue("FST Model creation not finished after " + FST_CREATION_WAIT_TIME + "seconds", future.isDone());
    }
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrYard(org.apache.stanbol.entityhub.yard.solr.impl.SolrYard) SolrYardConfig(org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig) IndexConfiguration(org.apache.stanbol.enhancer.engines.lucenefstlinking.IndexConfiguration) CorpusInfo(org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusInfo) Future(java.util.concurrent.Future) CorpusCreationTask(org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusCreationTask) LanguageConfiguration(org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration) IndexReference(org.apache.stanbol.commons.solr.IndexReference) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) TimeoutException(java.util.concurrent.TimeoutException) BeforeClass(org.junit.BeforeClass)

Aggregations

ArrayList (java.util.ArrayList)1 Future (java.util.concurrent.Future)1 TimeoutException (java.util.concurrent.TimeoutException)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 SolrServer (org.apache.solr.client.solrj.SolrServer)1 EmbeddedSolrServer (org.apache.solr.client.solrj.embedded.EmbeddedSolrServer)1 IndexReference (org.apache.stanbol.commons.solr.IndexReference)1 ManagedSolrServer (org.apache.stanbol.commons.solr.managed.ManagedSolrServer)1 CorpusCreationTask (org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusCreationTask)1 CorpusInfo (org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusInfo)1 IndexConfiguration (org.apache.stanbol.enhancer.engines.lucenefstlinking.IndexConfiguration)1 LanguageConfiguration (org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration)1 SolrYard (org.apache.stanbol.entityhub.yard.solr.impl.SolrYard)1 SolrYardConfig (org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig)1 BeforeClass (org.junit.BeforeClass)1