Search in sources :

Example 1 with IndexerImpl

use of org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl in project stanbol by apache.

the class IndexerFactory method create.

/**
 * Internally used for unit testing. Allows to parse an offset for loading
 * the indexer configuration from the classpath. Currently a protected
 * feature, but might be moved to the public API at a later point of time.
 * (would allow to include multiple default configurations via the
 * classpath).
 * @param dir
 * @param classpathOffset
 * @return
 */
protected Indexer create(String dir, String classpathOffset) {
    Indexer indexer;
    IndexingConfig config;
    if (classpathOffset != null) {
        config = new IndexingConfig(dir, classpathOffset) {
        };
    } else {
        config = new IndexingConfig(dir);
    }
    // get the mode based on the configured IndexingComponents
    String name = config.getName();
    EntityDataIterable dataIterable = config.getDataIterable();
    EntityIterator idIterator = config.getEntityIdIterator();
    EntityDataProvider dataProvider = config.getEntityDataProvider();
    EntityScoreProvider scoreProvider = config.getEntityScoreProvider();
    IndexingDestination destination = config.getIndexingDestination();
    if (destination == null) {
        log.error("The indexing configuration does not provide an " + "indexing destination. This needs to be configured by the key " + "'{}' in the indexing.properties within the directory {}", IndexingConstants.KEY_INDEXING_DESTINATION, config.getConfigFolder());
        throw new IllegalArgumentException("No IndexingDestination present");
    }
    List<EntityProcessor> processors = config.getEntityProcessors();
    if (processors == null) {
        log.error("The indexing configuration does not provide an " + "entity processor. This needs to be configured by the key " + "'{}' in the indexing.properties within the directory {}", IndexingConstants.KEY_ENTITY_PROCESSOR, config.getConfigFolder());
    }
    List<EntityProcessor> postProcessors = config.getEntityPostProcessors();
    log.info("Present Source Configuration:");
    log.info(" - EntityDataIterable: {}", dataIterable);
    log.info(" - EntityIterator: {}", idIterator);
    log.info(" - EntityDataProvider: {}", dataProvider);
    log.info(" - EntityScoreProvider: {}", scoreProvider);
    log.info(" - EntityProcessors ({}):", processors.size());
    if (postProcessors != null) {
        log.info(" - EntityPostProcessors ({}):", postProcessors.size());
    }
    int i = 0;
    for (EntityProcessor processor : processors) {
        i++;
        log.info("    {}) {}", i, processor);
    }
    if (dataIterable != null && scoreProvider != null) {
        // iterate over data and lookup scores
        indexer = new IndexerImpl(name, dataIterable, scoreProvider, config.getNormaliser(), destination, processors, config.getIndexedEntitiesIdsFile(), postProcessors);
    } else if (idIterator != null && dataProvider != null) {
        // iterate over id and lookup data
        indexer = new IndexerImpl(name, idIterator, dataProvider, config.getNormaliser(), destination, processors, config.getIndexedEntitiesIdsFile(), postProcessors);
    } else if (dataIterable != null && idIterator != null) {
        // create an EntityIterator to EntityScoreProvider adapter
        log.info("Create Adapter from the configured EntityIterator '{}' to the " + "required EntityScoreProvider as needed together with the " + "configured EntityDataIterable '{}'", idIterator.getClass(), dataIterable.getClass());
        indexer = new IndexerImpl(config.getName(), dataIterable, new EntityIneratorToScoreProviderAdapter(idIterator), config.getNormaliser(), destination, processors, config.getIndexedEntitiesIdsFile(), postProcessors);
    } else {
        log.error("Invalid Indexing Source configuration: ");
        log.error(" - To iterate over the data and lookup scores one need to " + "configure an EntityDataIterable and an EntityScoreProvider ");
        log.error(" - To iterate over the Id and and lookup data one need to " + "configure an EntityIterator and an EntityDataProvider");
        throw new IllegalArgumentException("Invalid Indexing Source configuration");
    }
    return indexer;
}
Also used : IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) EntityIneratorToScoreProviderAdapter(org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter) IndexerImpl(org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl)

Example 2 with IndexerImpl

use of org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl in project stanbol by apache.

the class IndexerFactory method create.

public Indexer create(String name, EntityDataIterable dataIterable, EntityScoreProvider scoreProvider, ScoreNormaliser normaliser, List<EntityProcessor> processors, List<EntityProcessor> postProcessors, IndexingDestination destination) {
    File tmp;
    try {
        tmp = File.createTempFile("ind-ent-ids", ".zip");
        tmp.deleteOnExit();
    } catch (IOException e) {
        throw new IllegalStateException("Unable to create temporary file for storing the" + "indexed Entity IDs", e);
    }
    return new IndexerImpl(name, dataIterable, scoreProvider, normaliser, destination, processors, tmp, postProcessors);
}
Also used : IndexerImpl(org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl) IOException(java.io.IOException) File(java.io.File)

Example 3 with IndexerImpl

use of org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl in project stanbol by apache.

the class IndexerFactory method create.

public Indexer create(String name, EntityIterator idIterator, EntityDataProvider dataProvider, ScoreNormaliser normaliser, List<EntityProcessor> processors, List<EntityProcessor> postProcessors, IndexingDestination destination) {
    File tmp;
    try {
        tmp = File.createTempFile("ind-ent-ids", ".zip");
        tmp.deleteOnExit();
    } catch (IOException e) {
        throw new IllegalStateException("Unable to create temporary file for storing the" + "indexed Entity IDs", e);
    }
    return new IndexerImpl(name, idIterator, dataProvider, normaliser, destination, processors, tmp, postProcessors);
}
Also used : IndexerImpl(org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl) IOException(java.io.IOException) File(java.io.File)

Aggregations

IndexerImpl (org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl)3 File (java.io.File)2 IOException (java.io.IOException)2 IndexingConfig (org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig)1 EntityIneratorToScoreProviderAdapter (org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter)1