use of org.apache.stanbol.enhancer.nlp.model.Span in project stanbol by apache.
the class AnalyzedTextParser method parseSpan.
private void parseSpan(AnalysedText at, JsonNode node) throws IOException {
if (node.isObject()) {
ObjectNode jSpan = (ObjectNode) node;
int[] spanPos = new int[] { -1, -1 };
Collection<Entry<String, JsonNode>> jAnnotations = new ArrayList<Entry<String, JsonNode>>(4);
SpanTypeEnum spanType = parseSpanData(jSpan, spanPos, jAnnotations);
if (spanType == null || spanPos[0] < 0 || spanPos[1] < 0) {
log.warn("Illegal or missing span type, start and/or end position (ignored, json: " + jSpan);
return;
}
// now create the Span
Span span;
switch(spanType) {
case Text:
log.warn("Encounterd 'Text' span that is not the first span in the " + "'spans' array (ignored, json: " + node + ")");
return;
case TextSection:
log.warn("Encountered 'TextSection' span. This SpanTypeEnum entry " + "is currently unused. If this is no longer the case please " + "update this implementation (ignored, json: " + node + ")");
return;
case Sentence:
span = at.addSentence(spanPos[0], spanPos[1]);
break;
case Chunk:
span = at.addChunk(spanPos[0], spanPos[1]);
break;
case Token:
span = at.addToken(spanPos[0], spanPos[1]);
break;
default:
log.warn("Unsupported SpanTypeEnum '" + spanType + "'!. Please " + "update this implementation (ignored, json: " + node + ")");
return;
}
if (!jAnnotations.isEmpty()) {
parseAnnotations(span, jAnnotations);
}
} else {
log.warn("Unable to parse Span form JsonNode " + node + " (expected JSON object)!");
}
}
use of org.apache.stanbol.enhancer.nlp.model.Span in project stanbol by apache.
the class DependencyRelationSupport method parse.
@Override
public DependencyRelation parse(ObjectNode jDependencyRelation, AnalysedText at) {
JsonNode tag = jDependencyRelation.path(RELATION_TYPE_TAG);
if (!tag.isTextual()) {
throw new IllegalStateException("Unable to parse GrammaticalRelationTag. The value of the " + "'tag' field MUST have a textual value (json: " + jDependencyRelation + ")");
}
GrammaticalRelation grammaticalRelation = GrammaticalRelation.class.getEnumConstants()[jDependencyRelation.path(RELATION_STANBOL_TYPE_TAG).asInt()];
GrammaticalRelationTag gramRelTag = new GrammaticalRelationTag(tag.getTextValue(), grammaticalRelation);
JsonNode isDependent = jDependencyRelation.path(RELATION_IS_DEPENDENT_TAG);
if (!isDependent.isBoolean()) {
throw new IllegalStateException("Field 'isDependent' must have a true/false format");
}
Span partnerSpan = null;
String typeString = jDependencyRelation.path(RELATION_PARTNER_TYPE_TAG).getTextValue();
if (!typeString.equals(ROOT_TAG)) {
SpanTypeEnum spanType = SpanTypeEnum.valueOf(jDependencyRelation.path(RELATION_PARTNER_TYPE_TAG).getTextValue());
int spanStart = jDependencyRelation.path(RELATION_PARTNER_START_TAG).asInt();
int spanEnd = jDependencyRelation.path(RELATION_PARTNER_END_TAG).asInt();
switch(spanType) {
case Chunk:
partnerSpan = at.addChunk(spanStart, spanEnd);
break;
// break;
case Token:
partnerSpan = at.addToken(spanStart, spanEnd);
break;
}
}
return new DependencyRelation(gramRelTag, isDependent.asBoolean(), partnerSpan);
}
use of org.apache.stanbol.enhancer.nlp.model.Span in project stanbol by apache.
the class DependencyRelationSupport method serialize.
@Override
public ObjectNode serialize(ObjectMapper mapper, DependencyRelation relation) {
ObjectNode jDependencyRelation = mapper.createObjectNode();
GrammaticalRelationTag gramRelTag = relation.getGrammaticalRelationTag();
jDependencyRelation.put(RELATION_TYPE_TAG, gramRelTag.getTag());
jDependencyRelation.put(RELATION_STANBOL_TYPE_TAG, gramRelTag.getGrammaticalRelation().ordinal());
jDependencyRelation.put(RELATION_IS_DEPENDENT_TAG, (relation.isDependent()));
Span partner = relation.getPartner();
if (partner != null) {
jDependencyRelation.put(RELATION_PARTNER_TYPE_TAG, partner.getType().toString());
jDependencyRelation.put(RELATION_PARTNER_START_TAG, partner.getStart());
jDependencyRelation.put(RELATION_PARTNER_END_TAG, partner.getEnd());
} else {
jDependencyRelation.put(RELATION_PARTNER_TYPE_TAG, ROOT_TAG);
jDependencyRelation.put(RELATION_PARTNER_START_TAG, 0);
jDependencyRelation.put(RELATION_PARTNER_END_TAG, 0);
}
return jDependencyRelation;
}
use of org.apache.stanbol.enhancer.nlp.model.Span in project stanbol by apache.
the class AnalyzedTextSerializerAndParserTest method testSerialization.
@Test
public void testSerialization() throws IOException {
ByteArrayOutputStream bout = new ByteArrayOutputStream();
AnalyzedTextSerializer serializer = AnalyzedTextSerializer.getDefaultInstance();
serializer.serialize(analysedTextWithData, bout, null);
// get the serialized String and check for some expected elements
byte[] data = bout.toByteArray();
String serialized = new String(data, Charset.forName("UTF-8"));
log.info(serialized);
Assert.assertTrue(serialized.contains("\"spans\" : [ {"));
Assert.assertTrue(serialized.contains("\"type\" : \"Text\""));
Assert.assertTrue(serialized.contains("\"type\" : \"Sentence\""));
Assert.assertTrue(serialized.contains("\"type\" : \"Token\""));
Assert.assertTrue(serialized.contains("\"stanbol.enhancer.nlp.pos\" : {"));
Assert.assertTrue(serialized.contains("\"class\" : \"org.apache.stanbol.enhancer.nlp.pos.PosTag\""));
Assert.assertTrue(serialized.contains("\"stanbol.enhancer.nlp.ner\" : {"));
Assert.assertTrue(serialized.contains("\"class\" : \"org.apache.stanbol.enhancer.nlp.ner.NerTag\""));
Assert.assertTrue(serialized.contains("\"stanbol.enhancer.nlp.morpho\" : {"));
Assert.assertTrue(serialized.contains("\"class\" : \"org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures\""));
// deserialize
AnalyzedTextParser parser = AnalyzedTextParser.getDefaultInstance();
AnalysedText parsedAt = parser.parse(new ByteArrayInputStream(data), null, atFactory.createAnalysedText(textBlob.getValue()));
Assert.assertEquals(analysedTextWithData, parsedAt);
Iterator<Span> origSpanIt = analysedTextWithData.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
Iterator<Span> parsedSpanIt = parsedAt.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
while (origSpanIt.hasNext() && parsedSpanIt.hasNext()) {
Span orig = origSpanIt.next();
Span parsed = parsedSpanIt.next();
Assert.assertEquals(orig, parsed);
Set<String> origKeys = orig.getKeys();
Set<String> parsedKeys = parsed.getKeys();
Assert.assertEquals(origKeys, parsedKeys);
for (String key : origKeys) {
List<Value<?>> origValues = orig.getValues(key);
List<Value<?>> parsedValues = parsed.getValues(key);
Assert.assertEquals(origValues, parsedValues);
}
}
Assert.assertFalse("Original AnalyzedText MUST NOT have additional Spans", origSpanIt.hasNext());
Assert.assertFalse("Parsed AnalyzedText MUST NOT have additional Spans", parsedSpanIt.hasNext());
}
use of org.apache.stanbol.enhancer.nlp.model.Span in project stanbol by apache.
the class SectionImpl method getEnclosed.
@Override
@SuppressWarnings("unchecked")
public Iterator<Span> getEnclosed(final Set<SpanTypeEnum> types, int startOffset, int endOffset) {
if (startOffset >= (span[1] - span[0])) {
// start is outside the span
return Collections.<Span>emptySet().iterator();
}
int startIdx = startOffset < 0 ? span[0] : (span[0] + startOffset);
int endIdx = span[0] + endOffset;
if (endIdx <= startIdx) {
return Collections.<Span>emptySet().iterator();
} else if (endIdx > span[1]) {
endIdx = span[1];
}
return IteratorUtils.filteredIterator(getIterator(new SubSetHelperSpan(startIdx, endIdx)), new Predicate() {
@Override
public boolean evaluate(Object span) {
return types.contains(((Span) span).getType());
}
});
}
Aggregations