use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.
the class EntityIdBasedIndexingDaemon method run.
@Override
public void run() {
while (entityIdIterator.hasNext()) {
Long start = Long.valueOf(System.currentTimeMillis());
EntityScore entityScore = entityIdIterator.next();
Float score;
if (normaliser != null) {
score = normaliser.normalise(entityScore.score);
} else {
score = entityScore.score;
}
if (//all entities are indexed anyway
indexAllEntitiesState || //no score available
score == null || score.compareTo(ScoreNormaliser.ZERO) >= 0) {
//score >= 0
Representation rep = dataProvider.getEntityData(entityScore.id);
if (rep == null) {
log.debug("unable to get Data for Entity {} (score=norm:{}|orig:{})", new Object[] { entityScore.id, score, entityScore.score });
}
produce(rep, score, start);
}
//else ignore this entity
}
setFinished();
}
use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.
the class RdfIndexingSourceTest method testEntityDataProvider.
@Test
public void testEntityDataProvider() {
log.info(" --- testEntityDataProvider ---");
String testName = "provider";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
EntityIterator entityIdIterator = config.getEntityIdIterator();
assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
if (entityIdIterator.needsInitialisation()) {
entityIdIterator.initialise();
}
EntityDataProvider dataProvider = config.getEntityDataProvider();
try {
assertNotNull(dataProvider);
if (dataProvider.needsInitialisation()) {
dataProvider.initialise();
}
assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
long count = 0;
while (entityIdIterator.hasNext()) {
EntityScore entityScore = entityIdIterator.next();
assertNotNull(entityScore);
assertNotNull(entityScore.id);
validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
count++;
}
//check if all entities where found
assertEquals(String.format("%s Entities expected but %s processed!", NUMBER_OF_ENTITIES_EXPECTED, count), NUMBER_OF_ENTITIES_EXPECTED, count);
} finally {
//we need to ensure close is called as otherwise other tests might fail
dataProvider.close();
}
}
use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.
the class RdfIndexingSourceTest method testEntityDataProvider.
@Test
public void testEntityDataProvider() {
log.info(" --- testEntityDataProvider ---");
String testName = "provider";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
EntityIterator entityIdIterator = config.getEntityIdIterator();
assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
if (entityIdIterator.needsInitialisation()) {
entityIdIterator.initialise();
}
EntityDataProvider dataProvider = config.getEntityDataProvider();
assertNotNull(dataProvider);
//there are test data to load
assertTrue(dataProvider.needsInitialisation());
dataProvider.initialise();
assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
long count = 0;
while (entityIdIterator.hasNext()) {
EntityScore entityScore = entityIdIterator.next();
assertNotNull(entityScore);
assertNotNull(entityScore.id);
validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
count++;
}
//check if all entities where found
assertEquals(String.format("%s Entities expected but %s processed!", NUMBER_OF_ENTITIES_EXPECTED, count), NUMBER_OF_ENTITIES_EXPECTED, count);
}
use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.
the class RdfIndexingSourceTest method testQuadsImport.
/**
* Tests support for Quads (STANBOL-764)
*/
@Test
public void testQuadsImport() {
log.info(" --- testQuadsImport ---");
String testName = "quads";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
EntityIterator entityIdIterator = config.getEntityIdIterator();
assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
if (entityIdIterator.needsInitialisation()) {
entityIdIterator.initialise();
}
EntityDataProvider dataProvider = config.getEntityDataProvider();
assertNotNull(dataProvider);
//there are test data to load
assertTrue(dataProvider.needsInitialisation());
dataProvider.initialise();
assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
long count = 0;
while (entityIdIterator.hasNext()) {
EntityScore entityScore = entityIdIterator.next();
assertNotNull(entityScore);
assertNotNull(entityScore.id);
validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
count++;
}
//check if all 9 entities where imported to the default dataset
// (and not named graphs)
assertEquals(String.format("%s Entities expected but %s processed!", 9, count), 9, count);
}
use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.
the class ConfigTest method testEntityIdIteratorConfig.
@Test
public void testEntityIdIteratorConfig() {
IndexingConfig config = new IndexingConfig();
EntityIterator iterator = config.getEntityIdIterator();
ScoreNormaliser normaliser = config.getNormaliser();
if (iterator.needsInitialisation()) {
iterator.initialise();
}
float lastScore = Float.MAX_VALUE;
float lastNormalisedScore = 1f;
while (iterator.hasNext()) {
EntityScore entity = iterator.next();
assertNotNull(entity);
assertNotNull(entity.id);
assertNotNull(entity.score);
//log.info("Entity: {}",entity);
assertTrue(entity.id.startsWith("http://dbpedia.org/resource/"));
float score = entity.score.floatValue();
assertTrue(score > 0);
assertTrue(score <= lastScore);
lastScore = score;
Float normalisedScore = normaliser.normalise(entity.score);
assertNotNull(normalisedScore);
float nScore = normalisedScore.floatValue();
assertTrue(nScore <= lastNormalisedScore);
if (score < 2) {
//the value of "min-score" in minincoming
log.info("score=" + score + " nScore=" + nScore);
assertTrue(nScore < 0);
return;
} else {
assertTrue(nScore > 0);
}
}
}
Aggregations