Search in sources :

Example 1 with ScoreNormaliser

use of org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser in project stanbol by apache.

the class ConfigTest method loadSimpleConfigDir.

/**
 * Loads a simple but not functional configuration to test the loading and
 * parsing of configuration files
 */
@Test
public void loadSimpleConfigDir() throws IOException {
    String name = CONFIG_ROOT + "simple";
    IndexingConfig config = new IndexingConfig(name, name) {
    };
    // assert that this directory exists (is created)
    File expectedRoot = new File(testRoot, name);
    expectedRoot = new File(expectedRoot, "indexing");
    assertTrue("Root Dir not created", expectedRoot.isDirectory());
    assertEquals("Root dir other the expected ", expectedRoot.getCanonicalPath(), config.getIndexingFolder().getCanonicalPath());
    assertTrue(config.getConfigFolder().isDirectory());
    assertTrue(config.getSourceFolder().isDirectory());
    assertTrue(config.getDestinationFolder().isDirectory());
    assertTrue(config.getDistributionFolder().isDirectory());
    // test the name
    assertEquals(config.getName(), "simple");
    assertEquals(config.getDescription(), "Simple Configuration");
    // test if the normaliser configuration was parsed correctly!
    final ScoreNormaliser normaliser = config.getNormaliser();
    // test if the config files where copied form the classpath to the
    // config directory.
    assertTrue("Config File for the RangeNormaliser not copied", new File(config.getConfigFolder(), "range.properties").isFile());
    assertTrue("Config File for the MinScoreNormalizer not copied", new File(config.getConfigFolder(), "minscore.properties").isFile());
    // now test if the configuration was parsed correctly
    ScoreNormaliser testNormaliser = normaliser;
    assertNotNull(testNormaliser);
    assertEquals(testNormaliser.getClass(), RangeNormaliser.class);
    testNormaliser = testNormaliser.getChained();
    assertNotNull(testNormaliser);
    assertEquals(testNormaliser.getClass(), NaturalLogNormaliser.class);
    testNormaliser = testNormaliser.getChained();
    assertNotNull(testNormaliser);
    assertEquals(testNormaliser.getClass(), MinScoreNormalizer.class);
    EntityIterator entityIterator = config.getEntityIdIterator();
    assertNotNull(entityIterator);
    assertEquals(entityIterator.getClass(), LineBasedEntityIterator.class);
    if (entityIterator.needsInitialisation()) {
        entityIterator.initialise();
    }
    Map<String, Float> entityIds = new HashMap<String, Float>();
    // the values test if the normaliser configuration was readed correctly
    // the keys if the configured entiyScore file was configured correctly
    float boost = 10f / (float) Math.log1p(100);
    entityIds.put("http://www.example.org/entity/test", Float.valueOf(10));
    entityIds.put("http://www.example.org/entity/test2", Float.valueOf((float) (Math.log1p(10) * boost)));
    entityIds.put("http://www.example.org/entity/test3", Float.valueOf(-1));
    while (entityIterator.hasNext()) {
        EntityIterator.EntityScore entityScore = entityIterator.next();
        Float expectedScore = entityIds.remove(entityScore.id);
        assertNotNull("Entity with ID " + entityScore.id + " not found!", expectedScore);
        Float score = normaliser.normalise(entityScore.score);
        assertTrue("Entity score " + score + " is not the expected " + expectedScore, expectedScore.compareTo(score) == 0);
    }
    assertTrue(entityIds.isEmpty());
    List<EntityProcessor> processors = config.getEntityProcessors();
    assertNotNull(processors);
}
Also used : ScoreNormaliser(org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) HashMap(java.util.HashMap) LineBasedEntityIterator(org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator) File(java.io.File) Test(org.junit.Test)

Example 2 with ScoreNormaliser

use of org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser in project stanbol by apache.

the class ConfigTest method testEntityIdIteratorConfig.

@Test
public void testEntityIdIteratorConfig() {
    IndexingConfig config = new IndexingConfig();
    EntityIterator iterator = config.getEntityIdIterator();
    ScoreNormaliser normaliser = config.getNormaliser();
    if (iterator.needsInitialisation()) {
        iterator.initialise();
    }
    float lastScore = Float.MAX_VALUE;
    float lastNormalisedScore = 1f;
    while (iterator.hasNext()) {
        EntityScore entity = iterator.next();
        assertNotNull(entity);
        assertNotNull(entity.id);
        assertNotNull(entity.score);
        // log.info("Entity: {}",entity);
        assertTrue(entity.id.startsWith("http://dbpedia.org/resource/"));
        float score = entity.score.floatValue();
        assertTrue(score > 0);
        assertTrue(score <= lastScore);
        lastScore = score;
        Float normalisedScore = normaliser.normalise(entity.score);
        assertNotNull(normalisedScore);
        float nScore = normalisedScore.floatValue();
        assertTrue(nScore <= lastNormalisedScore);
        if (score < 2) {
            // the value of "min-score" in minincoming
            log.info("score=" + score + " nScore=" + nScore);
            assertTrue(nScore < 0);
            return;
        } else {
            assertTrue(nScore > 0);
        }
    }
}
Also used : ScoreNormaliser(org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser) EntityScore(org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) EntityIterator(org.apache.stanbol.entityhub.indexing.core.EntityIterator) Test(org.junit.Test)

Example 3 with ScoreNormaliser

use of org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser in project stanbol by apache.

the class IndexingConfig method initNormaliser.

private void initNormaliser() {
    Object value = configuration.get(IndexingConstants.KEY_SCORE_NORMALIZER);
    if (value == null) {
        this.scoreNormaliser = new DefaultNormaliser();
    } else {
        ScoreNormaliser normaliser = null;
        ScoreNormaliser last = null;
        List<ConfigEntry> configs = parseConfigEntries(value.toString());
        for (int i = configs.size() - 1; i >= 0; i--) {
            last = normaliser;
            normaliser = null;
            ConfigEntry config = configs.get(i);
            try {
                normaliser = (ScoreNormaliser) Class.forName(config.getClassName()).newInstance();
            } catch (Exception e) {
                throw new IllegalArgumentException("Invalid Normaliser configuration '" + config.getConfigString() + "'!", e);
            }
            Map<String, Object> normaliserConfig = getComponentConfig(config, normaliser.getClass().getSimpleName(), config.getParams().containsKey(CONFIG_PARAM));
            // add also the directly provided parameters
            normaliserConfig.putAll(config.getParams());
            if (last != null) {
                normaliserConfig.put(ScoreNormaliser.CHAINED_SCORE_NORMALISER, last);
            }
            normaliser.setConfiguration(normaliserConfig);
        }
        // set the normaliser!
        this.scoreNormaliser = normaliser;
    }
}
Also used : ScoreNormaliser(org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser) DefaultNormaliser(org.apache.stanbol.entityhub.indexing.core.normaliser.DefaultNormaliser) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException)

Aggregations

ScoreNormaliser (org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser)3 IndexingConfig (org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig)2 Test (org.junit.Test)2 File (java.io.File)1 IOException (java.io.IOException)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 HashMap (java.util.HashMap)1 EntityIterator (org.apache.stanbol.entityhub.indexing.core.EntityIterator)1 EntityScore (org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore)1 DefaultNormaliser (org.apache.stanbol.entityhub.indexing.core.normaliser.DefaultNormaliser)1 LineBasedEntityIterator (org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator)1