use of org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser in project stanbol by apache.
the class ConfigTest method loadSimpleConfigDir.
/**
* Loads a simple but not functional configuration to test the loading and
* parsing of configuration files
*/
@Test
public void loadSimpleConfigDir() throws IOException {
String name = CONFIG_ROOT + "simple";
IndexingConfig config = new IndexingConfig(name, name) {
};
// assert that this directory exists (is created)
File expectedRoot = new File(testRoot, name);
expectedRoot = new File(expectedRoot, "indexing");
assertTrue("Root Dir not created", expectedRoot.isDirectory());
assertEquals("Root dir other the expected ", expectedRoot.getCanonicalPath(), config.getIndexingFolder().getCanonicalPath());
assertTrue(config.getConfigFolder().isDirectory());
assertTrue(config.getSourceFolder().isDirectory());
assertTrue(config.getDestinationFolder().isDirectory());
assertTrue(config.getDistributionFolder().isDirectory());
// test the name
assertEquals(config.getName(), "simple");
assertEquals(config.getDescription(), "Simple Configuration");
// test if the normaliser configuration was parsed correctly!
final ScoreNormaliser normaliser = config.getNormaliser();
// test if the config files where copied form the classpath to the
// config directory.
assertTrue("Config File for the RangeNormaliser not copied", new File(config.getConfigFolder(), "range.properties").isFile());
assertTrue("Config File for the MinScoreNormalizer not copied", new File(config.getConfigFolder(), "minscore.properties").isFile());
// now test if the configuration was parsed correctly
ScoreNormaliser testNormaliser = normaliser;
assertNotNull(testNormaliser);
assertEquals(testNormaliser.getClass(), RangeNormaliser.class);
testNormaliser = testNormaliser.getChained();
assertNotNull(testNormaliser);
assertEquals(testNormaliser.getClass(), NaturalLogNormaliser.class);
testNormaliser = testNormaliser.getChained();
assertNotNull(testNormaliser);
assertEquals(testNormaliser.getClass(), MinScoreNormalizer.class);
EntityIterator entityIterator = config.getEntityIdIterator();
assertNotNull(entityIterator);
assertEquals(entityIterator.getClass(), LineBasedEntityIterator.class);
if (entityIterator.needsInitialisation()) {
entityIterator.initialise();
}
Map<String, Float> entityIds = new HashMap<String, Float>();
// the values test if the normaliser configuration was readed correctly
// the keys if the configured entiyScore file was configured correctly
float boost = 10f / (float) Math.log1p(100);
entityIds.put("http://www.example.org/entity/test", Float.valueOf(10));
entityIds.put("http://www.example.org/entity/test2", Float.valueOf((float) (Math.log1p(10) * boost)));
entityIds.put("http://www.example.org/entity/test3", Float.valueOf(-1));
while (entityIterator.hasNext()) {
EntityIterator.EntityScore entityScore = entityIterator.next();
Float expectedScore = entityIds.remove(entityScore.id);
assertNotNull("Entity with ID " + entityScore.id + " not found!", expectedScore);
Float score = normaliser.normalise(entityScore.score);
assertTrue("Entity score " + score + " is not the expected " + expectedScore, expectedScore.compareTo(score) == 0);
}
assertTrue(entityIds.isEmpty());
List<EntityProcessor> processors = config.getEntityProcessors();
assertNotNull(processors);
}
use of org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser in project stanbol by apache.
the class ConfigTest method testEntityIdIteratorConfig.
@Test
public void testEntityIdIteratorConfig() {
IndexingConfig config = new IndexingConfig();
EntityIterator iterator = config.getEntityIdIterator();
ScoreNormaliser normaliser = config.getNormaliser();
if (iterator.needsInitialisation()) {
iterator.initialise();
}
float lastScore = Float.MAX_VALUE;
float lastNormalisedScore = 1f;
while (iterator.hasNext()) {
EntityScore entity = iterator.next();
assertNotNull(entity);
assertNotNull(entity.id);
assertNotNull(entity.score);
// log.info("Entity: {}",entity);
assertTrue(entity.id.startsWith("http://dbpedia.org/resource/"));
float score = entity.score.floatValue();
assertTrue(score > 0);
assertTrue(score <= lastScore);
lastScore = score;
Float normalisedScore = normaliser.normalise(entity.score);
assertNotNull(normalisedScore);
float nScore = normalisedScore.floatValue();
assertTrue(nScore <= lastNormalisedScore);
if (score < 2) {
// the value of "min-score" in minincoming
log.info("score=" + score + " nScore=" + nScore);
assertTrue(nScore < 0);
return;
} else {
assertTrue(nScore > 0);
}
}
}
use of org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser in project stanbol by apache.
the class IndexingConfig method initNormaliser.
private void initNormaliser() {
Object value = configuration.get(IndexingConstants.KEY_SCORE_NORMALIZER);
if (value == null) {
this.scoreNormaliser = new DefaultNormaliser();
} else {
ScoreNormaliser normaliser = null;
ScoreNormaliser last = null;
List<ConfigEntry> configs = parseConfigEntries(value.toString());
for (int i = configs.size() - 1; i >= 0; i--) {
last = normaliser;
normaliser = null;
ConfigEntry config = configs.get(i);
try {
normaliser = (ScoreNormaliser) Class.forName(config.getClassName()).newInstance();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid Normaliser configuration '" + config.getConfigString() + "'!", e);
}
Map<String, Object> normaliserConfig = getComponentConfig(config, normaliser.getClass().getSimpleName(), config.getParams().containsKey(CONFIG_PARAM));
// add also the directly provided parameters
normaliserConfig.putAll(config.getParams());
if (last != null) {
normaliserConfig.put(ScoreNormaliser.CHAINED_SCORE_NORMALISER, last);
}
normaliser.setConfiguration(normaliserConfig);
}
// set the normaliser!
this.scoreNormaliser = normaliser;
}
}
Aggregations