use of org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl in project stanbol by apache.
the class IndexerFactory method create.
/**
* Internally used for unit testing. Allows to parse an offset for loading
* the indexer configuration from the classpath. Currently a protected
* feature, but might be moved to the public API at a later point of time.
* (would allow to include multiple default configurations via the
* classpath).
* @param dir
* @param classpathOffset
* @return
*/
protected Indexer create(String dir, String classpathOffset) {
Indexer indexer;
IndexingConfig config;
if (classpathOffset != null) {
config = new IndexingConfig(dir, classpathOffset) {
};
} else {
config = new IndexingConfig(dir);
}
// get the mode based on the configured IndexingComponents
String name = config.getName();
EntityDataIterable dataIterable = config.getDataIterable();
EntityIterator idIterator = config.getEntityIdIterator();
EntityDataProvider dataProvider = config.getEntityDataProvider();
EntityScoreProvider scoreProvider = config.getEntityScoreProvider();
IndexingDestination destination = config.getIndexingDestination();
if (destination == null) {
log.error("The indexing configuration does not provide an " + "indexing destination. This needs to be configured by the key " + "'{}' in the indexing.properties within the directory {}", IndexingConstants.KEY_INDEXING_DESTINATION, config.getConfigFolder());
throw new IllegalArgumentException("No IndexingDestination present");
}
List<EntityProcessor> processors = config.getEntityProcessors();
if (processors == null) {
log.error("The indexing configuration does not provide an " + "entity processor. This needs to be configured by the key " + "'{}' in the indexing.properties within the directory {}", IndexingConstants.KEY_ENTITY_PROCESSOR, config.getConfigFolder());
}
List<EntityProcessor> postProcessors = config.getEntityPostProcessors();
log.info("Present Source Configuration:");
log.info(" - EntityDataIterable: {}", dataIterable);
log.info(" - EntityIterator: {}", idIterator);
log.info(" - EntityDataProvider: {}", dataProvider);
log.info(" - EntityScoreProvider: {}", scoreProvider);
log.info(" - EntityProcessors ({}):", processors.size());
if (postProcessors != null) {
log.info(" - EntityPostProcessors ({}):", postProcessors.size());
}
int i = 0;
for (EntityProcessor processor : processors) {
i++;
log.info(" {}) {}", i, processor);
}
if (dataIterable != null && scoreProvider != null) {
// iterate over data and lookup scores
indexer = new IndexerImpl(name, dataIterable, scoreProvider, config.getNormaliser(), destination, processors, config.getIndexedEntitiesIdsFile(), postProcessors);
} else if (idIterator != null && dataProvider != null) {
// iterate over id and lookup data
indexer = new IndexerImpl(name, idIterator, dataProvider, config.getNormaliser(), destination, processors, config.getIndexedEntitiesIdsFile(), postProcessors);
} else if (dataIterable != null && idIterator != null) {
// create an EntityIterator to EntityScoreProvider adapter
log.info("Create Adapter from the configured EntityIterator '{}' to the " + "required EntityScoreProvider as needed together with the " + "configured EntityDataIterable '{}'", idIterator.getClass(), dataIterable.getClass());
indexer = new IndexerImpl(config.getName(), dataIterable, new EntityIneratorToScoreProviderAdapter(idIterator), config.getNormaliser(), destination, processors, config.getIndexedEntitiesIdsFile(), postProcessors);
} else {
log.error("Invalid Indexing Source configuration: ");
log.error(" - To iterate over the data and lookup scores one need to " + "configure an EntityDataIterable and an EntityScoreProvider ");
log.error(" - To iterate over the Id and and lookup data one need to " + "configure an EntityIterator and an EntityDataProvider");
throw new IllegalArgumentException("Invalid Indexing Source configuration");
}
return indexer;
}
use of org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl in project stanbol by apache.
the class IndexerFactory method create.
public Indexer create(String name, EntityDataIterable dataIterable, EntityScoreProvider scoreProvider, ScoreNormaliser normaliser, List<EntityProcessor> processors, List<EntityProcessor> postProcessors, IndexingDestination destination) {
File tmp;
try {
tmp = File.createTempFile("ind-ent-ids", ".zip");
tmp.deleteOnExit();
} catch (IOException e) {
throw new IllegalStateException("Unable to create temporary file for storing the" + "indexed Entity IDs", e);
}
return new IndexerImpl(name, dataIterable, scoreProvider, normaliser, destination, processors, tmp, postProcessors);
}
use of org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl in project stanbol by apache.
the class IndexerFactory method create.
public Indexer create(String name, EntityIterator idIterator, EntityDataProvider dataProvider, ScoreNormaliser normaliser, List<EntityProcessor> processors, List<EntityProcessor> postProcessors, IndexingDestination destination) {
File tmp;
try {
tmp = File.createTempFile("ind-ent-ids", ".zip");
tmp.deleteOnExit();
} catch (IOException e) {
throw new IllegalStateException("Unable to create temporary file for storing the" + "indexed Entity IDs", e);
}
return new IndexerImpl(name, idIterator, dataProvider, normaliser, destination, processors, tmp, postProcessors);
}
Aggregations