use of org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum in project stanbol by apache.
the class Nlp2RdfMetadataEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String lang = EnhancementEngineHelper.getLanguage(ci);
Language language = lang == null ? null : new Language(lang);
//now iterate over the AnalysedText data and create the RDF representation
//TODO: make configureable
boolean sentences = true;
boolean phrases = true;
boolean words = true;
EnumSet<SpanTypeEnum> activeTypes = EnumSet.noneOf(SpanTypeEnum.class);
if (sentences) {
activeTypes.add(SpanTypeEnum.Sentence);
}
if (phrases) {
activeTypes.add(SpanTypeEnum.Chunk);
}
if (words) {
activeTypes.add(SpanTypeEnum.Token);
}
Graph metadata = ci.getMetadata();
IRI base = ci.getUri();
ci.getLock().writeLock().lock();
try {
Iterator<Span> spans = at.getEnclosed(activeTypes);
IRI sentence = null;
IRI phrase = null;
IRI word = null;
boolean firstWordInSentence = true;
while (spans.hasNext()) {
Span span = spans.next();
//TODO: filter Spans based on additional requirements
//(1) write generic information about the span
IRI current = writeSpan(metadata, base, at, language, span);
//(2) add the relations between the different spans
switch(span.getType()) {
case Sentence:
if (sentence != null) {
metadata.add(new TripleImpl(sentence, SsoOntology.nextSentence.getUri(), current));
}
sentence = current;
firstWordInSentence = true;
break;
case Chunk:
if (sentence != null) {
metadata.add(new TripleImpl(current, StringOntology.superString.getUri(), sentence));
if (word != null) {
metadata.add(new TripleImpl(word, SsoOntology.lastWord.getUri(), sentence));
}
}
phrase = current;
break;
case Token:
if (sentence != null) {
metadata.add(new TripleImpl(current, SsoOntology.sentence.getUri(), sentence));
if (firstWordInSentence) {
metadata.add(new TripleImpl(current, SsoOntology.firstWord.getUri(), sentence));
firstWordInSentence = false;
}
}
if (phrase != null) {
metadata.add(new TripleImpl(current, SsoOntology.parent.getUri(), phrase));
}
if (word != null) {
metadata.add(new TripleImpl(word, SsoOntology.nextWord.getUri(), current));
metadata.add(new TripleImpl(current, SsoOntology.previousWord.getUri(), word));
}
word = current;
break;
default:
break;
}
//(3) add specific information such as POS, chunk type ...
writePos(metadata, span, current);
writePhrase(metadata, span, current);
//OlIA does not include Sentiments
Value<Double> sentiment = span.getAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION);
if (sentiment != null && sentiment.value() != null) {
metadata.add(new TripleImpl(current, SENTIMENT_PROPERTY, lf.createTypedLiteral(sentiment.value())));
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum in project stanbol by apache.
the class AnalyzedTextParser method parseSpan.
private void parseSpan(AnalysedText at, JsonNode node) throws IOException {
if (node.isObject()) {
ObjectNode jSpan = (ObjectNode) node;
int[] spanPos = new int[] { -1, -1 };
Collection<Entry<String, JsonNode>> jAnnotations = new ArrayList<Entry<String, JsonNode>>(4);
SpanTypeEnum spanType = parseSpanData(jSpan, spanPos, jAnnotations);
if (spanType == null || spanPos[0] < 0 || spanPos[1] < 0) {
log.warn("Illegal or missing span type, start and/or end position (ignored, json: " + jSpan);
return;
}
//now create the Span
Span span;
switch(spanType) {
case Text:
log.warn("Encounterd 'Text' span that is not the first span in the " + "'spans' array (ignored, json: " + node + ")");
return;
case TextSection:
log.warn("Encountered 'TextSection' span. This SpanTypeEnum entry " + "is currently unused. If this is no longer the case please " + "update this implementation (ignored, json: " + node + ")");
return;
case Sentence:
span = at.addSentence(spanPos[0], spanPos[1]);
break;
case Chunk:
span = at.addChunk(spanPos[0], spanPos[1]);
break;
case Token:
span = at.addToken(spanPos[0], spanPos[1]);
break;
default:
log.warn("Unsupported SpanTypeEnum '" + spanType + "'!. Please " + "update this implementation (ignored, json: " + node + ")");
return;
}
if (!jAnnotations.isEmpty()) {
parseAnnotations(span, jAnnotations);
}
} else {
log.warn("Unable to parse Span form JsonNode " + node + " (expected JSON object)!");
}
}
use of org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum in project stanbol by apache.
the class DependencyRelationSupport method parse.
@Override
public DependencyRelation parse(ObjectNode jDependencyRelation, AnalysedText at) {
JsonNode tag = jDependencyRelation.path(RELATION_TYPE_TAG);
if (!tag.isTextual()) {
throw new IllegalStateException("Unable to parse GrammaticalRelationTag. The value of the " + "'tag' field MUST have a textual value (json: " + jDependencyRelation + ")");
}
GrammaticalRelation grammaticalRelation = GrammaticalRelation.class.getEnumConstants()[jDependencyRelation.path(RELATION_STANBOL_TYPE_TAG).asInt()];
GrammaticalRelationTag gramRelTag = new GrammaticalRelationTag(tag.getTextValue(), grammaticalRelation);
JsonNode isDependent = jDependencyRelation.path(RELATION_IS_DEPENDENT_TAG);
if (!isDependent.isBoolean()) {
throw new IllegalStateException("Field 'isDependent' must have a true/false format");
}
Span partnerSpan = null;
String typeString = jDependencyRelation.path(RELATION_PARTNER_TYPE_TAG).getTextValue();
if (!typeString.equals(ROOT_TAG)) {
SpanTypeEnum spanType = SpanTypeEnum.valueOf(jDependencyRelation.path(RELATION_PARTNER_TYPE_TAG).getTextValue());
int spanStart = jDependencyRelation.path(RELATION_PARTNER_START_TAG).asInt();
int spanEnd = jDependencyRelation.path(RELATION_PARTNER_END_TAG).asInt();
switch(spanType) {
case Chunk:
partnerSpan = at.addChunk(spanStart, spanEnd);
break;
// break;
case Token:
partnerSpan = at.addToken(spanStart, spanEnd);
break;
}
}
return new DependencyRelation(gramRelTag, isDependent.asBoolean(), partnerSpan);
}
use of org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum in project stanbol by apache.
the class Nif20MetadataEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String lang = EnhancementEngineHelper.getLanguage(ci);
Language language = lang == null ? null : new Language(lang);
//now iterate over the AnalysedText data and create the RDF representation
//TODO: make configureable
boolean sentences = true;
boolean phrases = true;
boolean words = true;
EnumSet<SpanTypeEnum> activeTypes = EnumSet.noneOf(SpanTypeEnum.class);
if (sentences) {
activeTypes.add(SpanTypeEnum.Sentence);
}
if (phrases) {
activeTypes.add(SpanTypeEnum.Chunk);
}
if (words) {
activeTypes.add(SpanTypeEnum.Token);
}
Graph metadata = ci.getMetadata();
IRI base = ci.getUri();
ci.getLock().writeLock().lock();
try {
//write the context
IRI text = writeSpan(metadata, base, at, language, at);
metadata.add(new TripleImpl(text, Nif20.sourceUrl.getUri(), ci.getUri()));
Iterator<Span> spans = at.getEnclosed(activeTypes);
IRI sentence = null;
IRI phrase = null;
IRI word = null;
boolean firstWordInSentence = true;
while (spans.hasNext()) {
Span span = spans.next();
//TODO: filter Spans based on additional requirements
//(1) write generic information about the span
IRI current = writeSpan(metadata, base, at, language, span);
//write the context
metadata.add(new TripleImpl(current, Nif20.referenceContext.getUri(), text));
//(2) add the relations between the different spans
switch(span.getType()) {
case Sentence:
if (sentence != null && writePrevNext) {
metadata.add(new TripleImpl(sentence, Nif20.nextSentence.getUri(), current));
metadata.add(new TripleImpl(current, Nif20.previousSentence.getUri(), sentence));
}
if (word != null) {
metadata.add(new TripleImpl(sentence, Nif20.lastWord.getUri(), word));
}
sentence = current;
firstWordInSentence = true;
break;
case Chunk:
if (sentence != null && writeHierary) {
metadata.add(new TripleImpl(current, Nif20.superString.getUri(), sentence));
}
phrase = current;
break;
case Token:
if (sentence != null) {
if (writeHierary) {
metadata.add(new TripleImpl(current, Nif20.sentence.getUri(), sentence));
}
//metadata.add(new TripleImpl(sentence, Nif20.word.getUri(), current));
if (firstWordInSentence) {
metadata.add(new TripleImpl(sentence, Nif20.firstWord.getUri(), current));
firstWordInSentence = false;
}
}
if (writeHierary && phrase != null && !phrase.equals(current)) {
metadata.add(new TripleImpl(current, Nif20.subString.getUri(), phrase));
}
if (word != null && writePrevNext) {
metadata.add(new TripleImpl(word, Nif20.nextWord.getUri(), current));
metadata.add(new TripleImpl(current, Nif20.previousWord.getUri(), word));
}
word = current;
break;
default:
break;
}
//(3) add specific information such as POS, chunk type ...
Nif20Helper.writePhrase(metadata, span, current);
Nif20Helper.writePos(metadata, span, current);
//TODO: sentiment support
Value<Double> sentiment = span.getAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION);
if (sentiment != null && sentiment.value() != null) {
metadata.add(new TripleImpl(current, SENTIMENT_PROPERTY, lf.createTypedLiteral(sentiment.value())));
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum in project stanbol by apache.
the class AnalyzedTextParser method parseAnalyzedTextSpan.
private void parseAnalyzedTextSpan(JsonNode node, AnalysedText at) throws IOException {
if (node.isObject()) {
ObjectNode jSpan = (ObjectNode) node;
int[] spanPos = new int[] { -1, -1 };
Collection<Entry<String, JsonNode>> jAnnotations = new ArrayList<Entry<String, JsonNode>>(4);
SpanTypeEnum spanType = parseSpanData(jSpan, spanPos, jAnnotations);
if (spanType != SpanTypeEnum.Text || spanPos[0] != 0 || spanPos[1] < 0) {
throw new IOException("The AnalyzedText span MUST have the SpanType 'text', a " + "start position of '0' and an end position (ignored, json: " + jSpan);
}
if (at.getEnd() != spanPos[1]) {
throw new IOException("The size of the local text '" + at.getEnd() + "' does not " + "match the span of the parsed AnalyzedText [" + spanPos[0] + "," + spanPos[1] + "]!");
}
parseAnnotations(at, jAnnotations);
} else {
throw new IOException("Unable to parse AnalyzedText span form JsonNode " + node + " (expected JSON object)!");
}
}
Aggregations