use of org.apache.stanbol.enhancer.nlp.phrase.PhraseTag in project stanbol by apache.
the class EntityCoReferenceEngine method extractNersAndNounPhrases.
/**
* Extracts the NERs and the noun phrases from the given text and puts them in the given lists.
*
* @param ci
* @param ners
* @param nounPhrases
*/
private void extractNersAndNounPhrases(ContentItem ci, Map<Integer, List<Span>> ners, List<NounPhrase> nounPhrases) {
AnalysedText at = NlpEngineHelper.getAnalysedText(this, ci, true);
Iterator<? extends Section> sections = at.getSentences();
if (!sections.hasNext()) {
// process as single sentence
sections = Collections.singleton(at).iterator();
}
int sentenceCnt = 0;
while (sections.hasNext()) {
sentenceCnt++;
Section section = sections.next();
List<NounPhrase> sectionNounPhrases = new ArrayList<NounPhrase>();
List<Span> sectionNers = new ArrayList<Span>();
Iterator<Span> chunks = section.getEnclosed(EnumSet.of(SpanTypeEnum.Chunk));
while (chunks.hasNext()) {
Span chunk = chunks.next();
Value<NerTag> ner = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION);
if (ner != null) {
sectionNers.add(chunk);
}
Value<PhraseTag> phrase = chunk.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
if (phrase != null && phrase.value().getCategory() == LexicalCategory.Noun) {
sectionNounPhrases.add(new NounPhrase(chunk, sentenceCnt));
}
}
for (NounPhrase nounPhrase : sectionNounPhrases) {
Iterator<Span> tokens = section.getEnclosed(EnumSet.of(SpanTypeEnum.Token));
while (tokens.hasNext()) {
Span token = tokens.next();
if (nounPhrase.containsSpan(token)) {
nounPhrase.addToken(token);
}
}
for (Span sectionNer : sectionNers) {
if (nounPhrase.containsSpan(sectionNer)) {
nounPhrase.addNerChunk(sectionNer);
}
}
}
nounPhrases.addAll(sectionNounPhrases);
if (!sectionNers.isEmpty()) {
ners.put(sentenceCnt, sectionNers);
}
}
}
use of org.apache.stanbol.enhancer.nlp.phrase.PhraseTag in project stanbol by apache.
the class OpenNlpChunkingEngine method getPhraseTag.
private PhraseTag getPhraseTag(TagSet<PhraseTag> model, Map<String, PhraseTag> adhocTags, String tag, String language) {
PhraseTag phraseTag = model.getTag(tag);
if (phraseTag != null) {
return phraseTag;
}
phraseTag = adhocTags.get(tag);
if (phraseTag != null) {
return phraseTag;
}
phraseTag = new PhraseTag(tag);
adhocTags.put(tag, phraseTag);
log.info("Encountered unknown POS tag '{}' for langauge '{}'", tag, language);
return phraseTag;
}
use of org.apache.stanbol.enhancer.nlp.phrase.PhraseTag in project stanbol by apache.
the class Nif20Helper method writePhrase.
/**
* Writes a {@link NlpAnnotations#PHRASE_ANNOTATION} as NIF 1.0 to the
* parsed RDF graph by using the segmentUri as subject
* @param graph the graph
* @param annotated the annotated element (e.g. a {@link Chunk})
* @param segmentUri the URI of the resource representing the parsed
* annotated element in the graph
*/
public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) {
Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
if (phraseTag != null) {
IRI phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get(phraseTag.value().getCategory());
if (phraseTypeUri != null) {
//add the oliaLink for the Phrase
graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), phraseTypeUri));
setOliaConf(graph, segmentUri, phraseTag);
}
}
}
Aggregations