use of org.apache.stanbol.enhancer.engines.poschunker.PhraseBuilder in project stanbol by apache.
the class PosChunkerEngine method computeEnhancements.
/**
* Compute enhancements for supplied ContentItem. The results of the process
* are expected to be stored in the metadata of the content item.
* <p/>
* The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
* persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
*
* @throws org.apache.stanbol.enhancer.servicesapi.EngineException
* if the underlying process failed to work as
* expected
*/
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
isLangaugeConfigured(this, languageConfiguration, language, true);
//init the PhraseBuilder
ChunkFactory chunkFactory = new ChunkFactoryImpl(at, ci.getLock());
List<PhraseBuilder> phraseBuilders = new ArrayList<PhraseBuilder>(phraseTypeDefinitions.size());
for (PhraseTypeDefinition ptd : phraseTypeDefinitions) {
phraseBuilders.add(new PhraseBuilder(ptd, chunkFactory, minPosScore));
}
Iterator<? extends Section> sentences = at.getSentences();
if (!sentences.hasNext()) {
//no sentences ... iterate over the whole text
sentences = Collections.singleton(at).iterator();
}
while (sentences.hasNext()) {
// (1) get Tokens and POS information for the sentence
Section sentence = sentences.next();
for (PhraseBuilder pb : phraseBuilders) {
pb.nextSection(sentence);
}
Iterator<Token> tokens = sentence.getTokens();
while (tokens.hasNext()) {
Token token = tokens.next();
for (PhraseBuilder pb : phraseBuilders) {
pb.nextToken(token);
}
}
}
//signal the end of the document
for (PhraseBuilder pb : phraseBuilders) {
pb.nextSection(null);
}
// if(log.isTraceEnabled()){
// logChunks(at);
// }
}
Aggregations