Search in sources :

Example 1 with EntityScore

use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.

the class EntityIdBasedIndexingDaemon method run.

@Override
public void run() {
    while (entityIdIterator.hasNext()) {
        Long start = Long.valueOf(System.currentTimeMillis());
        EntityScore entityScore = entityIdIterator.next();
        Float score;
        if (normaliser != null) {
            score = normaliser.normalise(entityScore.score);
        } else {
            score = entityScore.score;
        }
        if (//all entities are indexed anyway
        indexAllEntitiesState || //no score available
        score == null || score.compareTo(ScoreNormaliser.ZERO) >= 0) {
            //score >= 0
            Representation rep = dataProvider.getEntityData(entityScore.id);
            if (rep == null) {
                log.debug("unable to get Data for Entity {} (score=norm:{}|orig:{})", new Object[] { entityScore.id, score, entityScore.score });
            }
            produce(rep, score, start);
        }
    //else ignore this entity
    }
    setFinished();
}
Also used : EntityScore(org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation)

Example 2 with EntityScore

use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.

the class RdfIndexingSourceTest method testEntityDataProvider.

@Test
public void testEntityDataProvider() {
    log.info(" --- testEntityDataProvider ---");
    String testName = "provider";
    IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
    };
    EntityIterator entityIdIterator = config.getEntityIdIterator();
    assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
    if (entityIdIterator.needsInitialisation()) {
        entityIdIterator.initialise();
    }
    EntityDataProvider dataProvider = config.getEntityDataProvider();
    try {
        assertNotNull(dataProvider);
        if (dataProvider.needsInitialisation()) {
            dataProvider.initialise();
        }
        assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
        long count = 0;
        while (entityIdIterator.hasNext()) {
            EntityScore entityScore = entityIdIterator.next();
            assertNotNull(entityScore);
            assertNotNull(entityScore.id);
            validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
            count++;
        }
        //check if all entities where found
        assertEquals(String.format("%s Entities expected but %s processed!", NUMBER_OF_ENTITIES_EXPECTED, count), NUMBER_OF_ENTITIES_EXPECTED, count);
    } finally {
        //we need to ensure close is called as otherwise other tests might fail 
        dataProvider.close();
    }
}
Also used : EntityDataProvider(org.apache.stanbol.entityhub.indexing.core.EntityDataProvider) EntityScore(org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) EntityIterator(org.apache.stanbol.entityhub.indexing.core.EntityIterator) Test(org.junit.Test)

Example 3 with EntityScore

use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.

the class RdfIndexingSourceTest method testEntityDataProvider.

@Test
public void testEntityDataProvider() {
    log.info(" --- testEntityDataProvider ---");
    String testName = "provider";
    IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
    };
    EntityIterator entityIdIterator = config.getEntityIdIterator();
    assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
    if (entityIdIterator.needsInitialisation()) {
        entityIdIterator.initialise();
    }
    EntityDataProvider dataProvider = config.getEntityDataProvider();
    assertNotNull(dataProvider);
    //there are test data to load
    assertTrue(dataProvider.needsInitialisation());
    dataProvider.initialise();
    assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
    long count = 0;
    while (entityIdIterator.hasNext()) {
        EntityScore entityScore = entityIdIterator.next();
        assertNotNull(entityScore);
        assertNotNull(entityScore.id);
        validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
        count++;
    }
    //check if all entities where found
    assertEquals(String.format("%s Entities expected but %s processed!", NUMBER_OF_ENTITIES_EXPECTED, count), NUMBER_OF_ENTITIES_EXPECTED, count);
}
Also used : EntityDataProvider(org.apache.stanbol.entityhub.indexing.core.EntityDataProvider) EntityScore(org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) EntityIterator(org.apache.stanbol.entityhub.indexing.core.EntityIterator) Test(org.junit.Test)

Example 4 with EntityScore

use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.

the class RdfIndexingSourceTest method testQuadsImport.

/**
     * Tests support for Quads (STANBOL-764)
     */
@Test
public void testQuadsImport() {
    log.info(" --- testQuadsImport ---");
    String testName = "quads";
    IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
    };
    EntityIterator entityIdIterator = config.getEntityIdIterator();
    assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
    if (entityIdIterator.needsInitialisation()) {
        entityIdIterator.initialise();
    }
    EntityDataProvider dataProvider = config.getEntityDataProvider();
    assertNotNull(dataProvider);
    //there are test data to load
    assertTrue(dataProvider.needsInitialisation());
    dataProvider.initialise();
    assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
    long count = 0;
    while (entityIdIterator.hasNext()) {
        EntityScore entityScore = entityIdIterator.next();
        assertNotNull(entityScore);
        assertNotNull(entityScore.id);
        validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
        count++;
    }
    //check if all 9 entities where imported to the default dataset
    // (and not named graphs)
    assertEquals(String.format("%s Entities expected but %s processed!", 9, count), 9, count);
}
Also used : EntityDataProvider(org.apache.stanbol.entityhub.indexing.core.EntityDataProvider) EntityScore(org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) EntityIterator(org.apache.stanbol.entityhub.indexing.core.EntityIterator) Test(org.junit.Test)

Example 5 with EntityScore

use of org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore in project stanbol by apache.

the class ConfigTest method testEntityIdIteratorConfig.

@Test
public void testEntityIdIteratorConfig() {
    IndexingConfig config = new IndexingConfig();
    EntityIterator iterator = config.getEntityIdIterator();
    ScoreNormaliser normaliser = config.getNormaliser();
    if (iterator.needsInitialisation()) {
        iterator.initialise();
    }
    float lastScore = Float.MAX_VALUE;
    float lastNormalisedScore = 1f;
    while (iterator.hasNext()) {
        EntityScore entity = iterator.next();
        assertNotNull(entity);
        assertNotNull(entity.id);
        assertNotNull(entity.score);
        //log.info("Entity: {}",entity);
        assertTrue(entity.id.startsWith("http://dbpedia.org/resource/"));
        float score = entity.score.floatValue();
        assertTrue(score > 0);
        assertTrue(score <= lastScore);
        lastScore = score;
        Float normalisedScore = normaliser.normalise(entity.score);
        assertNotNull(normalisedScore);
        float nScore = normalisedScore.floatValue();
        assertTrue(nScore <= lastNormalisedScore);
        if (score < 2) {
            //the value of "min-score" in minincoming
            log.info("score=" + score + " nScore=" + nScore);
            assertTrue(nScore < 0);
            return;
        } else {
            assertTrue(nScore > 0);
        }
    }
}
Also used : ScoreNormaliser(org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser) EntityScore(org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) EntityIterator(org.apache.stanbol.entityhub.indexing.core.EntityIterator) Test(org.junit.Test)

Aggregations

EntityScore (org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore)6 EntityIterator (org.apache.stanbol.entityhub.indexing.core.EntityIterator)4 IndexingConfig (org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig)4 Test (org.junit.Test)4 EntityDataProvider (org.apache.stanbol.entityhub.indexing.core.EntityDataProvider)3 HashMap (java.util.HashMap)1 ScoreNormaliser (org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser)1 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)1