use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class IndexerImpl method postProcessEntities.
@Override
public void postProcessEntities() {
synchronized (stateSync) {
//ensure that two threads do not start the
//initialisation at the same time ...
State state = getState();
if (state.ordinal() < State.INDEXED.ordinal()) {
throw new IllegalStateException("The Indexer MUST BE already " + State.INDEXED + " when calling this Method!");
}
if (state != State.INDEXED) {
// ignore this call
return;
}
setState(State.POSTPROCESSING);
log.info("{}: PostProcessing started ...", name);
}
if (entityPostProcessors == null || entityPostProcessors.isEmpty()) {
setState(State.POSTPROCESSED);
//nothing to do
return;
}
//init the post processing components
//use an EntityDataProvider based on the indexed data
EntityDataProvider dataProvider = new YardEntityDataProvider(indexingDestination.getYard());
//use an LineBasedEntityIterator to iterate over the indexed entity ids
EntityIterator entityIterator;
try {
entityIterator = new LineBasedEntityIterator(getEntityIdFileInputStream(), "UTF-8", null);
} catch (IOException e) {
throw new IllegalStateException("Unable to open file containing the " + "IDs of the indexed Entities!", e);
}
Map<String, Object> config = new HashMap<String, Object>();
config.put(LineBasedEntityIterator.PARAM_ID_POS, 1);
config.put(LineBasedEntityIterator.PARAM_SCORE_POS, Integer.MAX_VALUE);
entityIterator.setConfiguration(config);
//does not really make sense for processors
for (EntityProcessor processor : entityPostProcessors) {
if (processor.needsInitialisation()) {
processor.initialise();
}
}
//NOTE the destination needs not to be initialised -> it will be the same
//as for indexing!
//initialisation complete ... now setup the poet processing
//init the queues
int queueSize = Math.max(MIN_QUEUE_SIZE, chunkSize * 2);
BlockingQueue<QueueItem<Representation>> indexedEntityQueue = new ArrayBlockingQueue<QueueItem<Representation>>(queueSize);
BlockingQueue<QueueItem<Representation>> processedEntityQueue = new ArrayBlockingQueue<QueueItem<Representation>>(queueSize);
BlockingQueue<QueueItem<Representation>> finishedEntityQueue = new ArrayBlockingQueue<QueueItem<Representation>>(queueSize);
BlockingQueue<QueueItem<IndexingError>> errorEntityQueue = new ArrayBlockingQueue<QueueItem<IndexingError>>(queueSize);
//Set holding all active post processing deamons
final SortedSet<IndexingDaemon<?, ?>> activeIndexingDeamons = new TreeSet<IndexingDaemon<?, ?>>();
//create the IndexingDaemos
//TODO: Here we would need to create multiple instances in case
// one would e.g. like to use several threads for processing entities
//(1) the daemon reading from the IndexingSources
String entitySourceReaderName = name + ": post-processing: Entity Reader Deamon";
activeIndexingDeamons.add(new EntityIdBasedIndexingDaemon(entitySourceReaderName, indexedEntityQueue, errorEntityQueue, entityIterator, dataProvider, //no score normaliser
null, //post-process all indexed entities
true));
//(2) The daemon for post-processing the entities
activeIndexingDeamons.add(new EntityProcessorRunnable(name + ": post-processing: Entity Processor Deamon", //it consumes indexed Entities
indexedEntityQueue, //it produces processed Entities
processedEntityQueue, errorEntityQueue, entityPostProcessors, // parsed by the used LineBasedEntityIterator!
Collections.singleton(//ensure the score not changed
SCORE_FIELD)));
//(3) The daemon for persisting the entities
activeIndexingDeamons.add(new EntityPersisterRunnable(name + ": Entity Perstisting Deamon", //it consumes processed Entities
processedEntityQueue, //it produces finished Entities
finishedEntityQueue, errorEntityQueue, chunkSize, indexingDestination.getYard()));
//(4) The daemon for logging finished entities
activeIndexingDeamons.add(new FinishedEntityDaemon(name + ": Finished Entity Logger Deamon", finishedEntityQueue, -1, log, //we have already all entity ids!
null));
//(5) The daemon for logging errors
activeIndexingDeamons.add(new EntityErrorLoggerDaemon(name + ": Entity Error Logging Daemon", errorEntityQueue, log));
//start post-processing and wait until it has finished
startAndWait(activeIndexingDeamons);
//close all post processors
for (EntityProcessor ep : entityPostProcessors) {
ep.close();
}
setState(State.POSTPROCESSED);
}
use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class IndexerImpl method indexEntities.
@Override
public void indexEntities() {
synchronized (stateSync) {
//ensure that two threads do not start the
//initialisation at the same time ...
State state = getState();
if (state.ordinal() < State.INITIALISED.ordinal()) {
throw new IllegalStateException("The Indexer MUST BE already " + State.INITIALISED + " when calling this Method!");
}
if (state != State.INITIALISED) {
// ignore this call
return;
}
setState(State.INDEXING);
log.info("{}: indexing started ...", name);
}
//init the queues
int queueSize = Math.max(MIN_QUEUE_SIZE, chunkSize * 2);
BlockingQueue<QueueItem<Representation>> indexedEntityQueue = new ArrayBlockingQueue<QueueItem<Representation>>(queueSize);
BlockingQueue<QueueItem<Representation>> processedEntityQueue = new ArrayBlockingQueue<QueueItem<Representation>>(queueSize);
BlockingQueue<QueueItem<Representation>> finishedEntityQueue = new ArrayBlockingQueue<QueueItem<Representation>>(queueSize);
BlockingQueue<QueueItem<IndexingError>> errorEntityQueue = new ArrayBlockingQueue<QueueItem<IndexingError>>(queueSize);
//Set holding all active IndexingDaemons
final SortedSet<IndexingDaemon<?, ?>> activeIndexingDeamons = new TreeSet<IndexingDaemon<?, ?>>();
//create the IndexingDaemos
//TODO: Here we would need to create multiple instances in case
// one would e.g. like to use several threads for processing entities
//(1) the daemon reading from the IndexingSources
String entitySourceReaderName = name + ": Entity Source Reader Deamon";
if (entityIterator != null) {
activeIndexingDeamons.add(new EntityIdBasedIndexingDaemon(entitySourceReaderName, indexedEntityQueue, errorEntityQueue, entityIterator, dataProvider, scoreNormaliser, indexAllEntitiesState));
} else {
activeIndexingDeamons.add(new EntityDataBasedIndexingDaemon(entitySourceReaderName, indexedEntityQueue, errorEntityQueue, dataIterable, scoreProvider, scoreNormaliser, indexAllEntitiesState));
}
//(2) The daemon for processing the entities
activeIndexingDeamons.add(new EntityProcessorRunnable(name + ": Entity Processor Deamon", //it consumes indexed Entities
indexedEntityQueue, //it produces processed Entities
processedEntityQueue, errorEntityQueue, entityProcessors, Collections.singleton(SCORE_FIELD)));
//(3) The daemon for persisting the entities
activeIndexingDeamons.add(new EntityPersisterRunnable(name + ": Entity Perstisting Deamon", //it consumes processed Entities
processedEntityQueue, //it produces finished Entities
finishedEntityQueue, errorEntityQueue, chunkSize, indexingDestination.getYard()));
//(4) The daemon for logging finished entities
activeIndexingDeamons.add(new FinishedEntityDaemon(name + ": Finished Entity Logger Deamon", finishedEntityQueue, -1, log, indexedEntityIdOutputStream));
//(5) The daemon for logging errors
activeIndexingDeamons.add(new EntityErrorLoggerDaemon(name + ": Entity Error Logging Daemon", errorEntityQueue, log));
//start indexing and wait until it has finished
startAndWait(activeIndexingDeamons);
//close the stream with IDs
IOUtils.closeQuietly(indexedEntityIdOutputStream);
//call close on all indexing components
for (EntityProcessor ep : entityProcessors) {
ep.close();
}
//set the new state to INDEXED
setState(State.INDEXED);
}
use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class YardTest method testIsRepresentation.
@Test
public void testIsRepresentation() throws YardException {
String id = "urn:yard.test.testIsRepresentation:representation.id";
Yard yard = getYard();
// Representations created via the yard need to be created (as empty
// representation within the yard
Representation test = create();
assertTrue(yard.isRepresentation(test.getId()));
// Representations created via the ValueFactory MUST NOT be added to the
// Yard
Representation test2 = create(id, false);
assertFalse(yard.isRepresentation(test2.getId()));
// now store test2 and test again
yard.store(test2);
assertTrue(yard.isRepresentation(test2.getId()));
// now remove test and test again
yard.remove(test.getId());
assertFalse(yard.isRepresentation(test.getId()));
yard.remove(test2.getId());
assertFalse(yard.isRepresentation(test2.getId()));
}
use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class YardTest method validateQueryResults.
/**
* Used to validate the results of the query against expected results.
* Supports also the validation of selected fields
* @param query the query
* @param results the results of the query
* @param parsedExpectedResults read-only list of expected results
* @param parsedExpectedFields read-only list of expected selected fields or
* <code>null</code> to deactivate validation of fields
*/
protected final void validateQueryResults(FieldQuery query, QueryResultList<Representation> results, Collection<String> parsedExpectedResults, Collection<String> parsedExpectedFields) {
Set<String> expectedResults = parsedExpectedResults == null ? new HashSet<String>() : new HashSet<String>(parsedExpectedResults);
Set<String> expectedFields = parsedExpectedFields == null ? null : new HashSet<String>(parsedExpectedFields);
FieldQueryTestData data = getFieldQueryTestData();
Assert.assertNotNull("NULL result for query " + query + "!", results);
for (Representation result : results) {
Assert.assertTrue("Result '" + result.getId() + "' is missing for Query " + query + "!", expectedResults.remove(result.getId()));
if (expectedFields != null) {
//validate fields
for (String field : expectedFields) {
Set<Object> expectedFieldValues = ModelUtils.asSet(data.representations.get(result.getId()).get(field));
Iterator<Object> fieldValues = result.get(field);
while (fieldValues.hasNext()) {
Object fieldValue = fieldValues.next();
Assert.assertTrue("Unexpected value '" + fieldValue + " of selected field '" + field + "' of result " + result.getId(), expectedFieldValues.remove(fieldValue));
}
Assert.assertTrue("Missing expected value(s) " + expectedFieldValues + " of selected field '" + field + "' of result " + result.getId(), expectedFieldValues.isEmpty());
}
}
}
Assert.assertTrue("Missing expected result(s) " + expectedResults + "for query" + query + "!", expectedResults.isEmpty());
}
use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class YardTest method testStoreRepresentation.
@Test
public void testStoreRepresentation() throws YardException {
// NOTE: this does not test if the updated view of the representation is
// stored, but only that the store method works for representations
// that are already in the Yard AND representations that are not yet
// present within the yard
String testId = "urn:yard.test.testStoreRepresentation:representation.id1";
String testId2 = "urn:yard.test.testStoreRepresentation:representation.id2";
Yard yard = getYard();
Representation test = create(testId, false);
// this adds the representation
Representation added = yard.store(test);
assertEquals(test, added);
// this creates and adds the representation
Representation test2 = create(testId2, true);
// now test that the representation can also be updated
added = yard.store(test2);
assertEquals(test2, added);
}
Aggregations