use of org.apache.stanbol.enhancer.nlp.morpho.NumberFeature in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method validateMorphoFeatureProperty.
/**
* [1..*] values of an {@link TypedLiteral} in the form {key=value}
* @param enhancements The graph with the enhancements
* @param textAnnotation the TextAnnotation to check
*/
private void validateMorphoFeatureProperty(Graph enhancements, BlankNodeOrIRI textAnnotation) {
//This taste checks for known morpho features of a given input (constant TERM)
Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, RDF_TYPE, null);
assertTrue("No POS Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
while (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
LexicalCategory cat = LexicalCategory.valueOf(key);
assertTrue("Part of Speech of " + TERM + " should be " + LexicalCategory.Noun, (cat == LexicalCategory.Noun));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_GENDER, null);
assertTrue("No Gender Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
Gender cat = Gender.valueOf(key);
assertTrue("Gender of " + TERM + " should be " + Gender.Feminine, (cat == Gender.Feminine));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_NUMBER, null);
assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
NumberFeature cat = NumberFeature.valueOf(key);
assertTrue("Number of " + TERM + " should be " + Gender.Feminine, (cat == NumberFeature.Singular));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, null);
assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Lemma Forms value are expected of type Literal", morphoFeature instanceof Literal);
assertFalse("Lemma forms MUST NOT be empty", ((Literal) morphoFeature).getLexicalForm().isEmpty());
String feature = ((Literal) morphoFeature).getLexicalForm();
assertTrue("Lemma of " + TERM + " should be " + TERM, (feature.equals(TERM)));
}
}
use of org.apache.stanbol.enhancer.nlp.morpho.NumberFeature in project stanbol by apache.
the class MorphoFeaturesSupport method parse.
@Override
public MorphoFeatures parse(ObjectNode jMorpho, AnalysedText at) {
JsonNode jLemma = jMorpho.path("lemma");
if (!jLemma.isTextual()) {
throw new IllegalStateException("Field 'lemma' MUST provide a String value (parsed JSON: " + jMorpho);
}
MorphoFeatures morpho = new MorphoFeatures(jLemma.asText());
JsonNode node = jMorpho.path("case");
if (node.isArray()) {
ArrayNode jCases = (ArrayNode) node;
for (int i = 0; i < jCases.size(); i++) {
JsonNode member = jCases.get(i);
if (member.isObject()) {
ObjectNode jCase = (ObjectNode) member;
JsonNode tag = jCase.path("tag");
if (tag.isTextual()) {
EnumSet<Case> type = JsonUtils.parseEnum(jCase, "type", Case.class);
if (type.isEmpty()) {
morpho.addCase(new CaseTag(tag.getTextValue()));
} else {
morpho.addCase(new CaseTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse CaseTag becuase 'tag' value is " + "missing or is not a String (json: " + jCase.toString() + ")");
}
} else {
log.warn("Unable to parse CaseTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse CaseTags (Json Array expected as value for field 'case' but was " + node);
}
if (jMorpho.has("definitness")) {
for (Definitness d : JsonUtils.parseEnum(jMorpho, "definitness", Definitness.class)) {
morpho.addDefinitness(d);
}
}
node = jMorpho.path("gender");
if (node.isArray()) {
ArrayNode jGenders = (ArrayNode) node;
for (int i = 0; i < jGenders.size(); i++) {
JsonNode member = jGenders.get(i);
if (member.isObject()) {
ObjectNode jGender = (ObjectNode) member;
JsonNode tag = jGender.path("tag");
if (tag.isTextual()) {
EnumSet<Gender> type = JsonUtils.parseEnum(jGender, "type", Gender.class);
if (type.isEmpty()) {
morpho.addGender(new GenderTag(tag.getTextValue()));
} else {
morpho.addGender(new GenderTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse GenderTag becuase 'tag' value is " + "missing or is not a String (json: " + jGender.toString() + ")");
}
} else {
log.warn("Unable to parse GenderTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse GenderTag (Json Array expected as value for field 'case' but was " + node);
}
node = jMorpho.path("number");
if (node.isArray()) {
ArrayNode jNumbers = (ArrayNode) node;
for (int i = 0; i < jNumbers.size(); i++) {
JsonNode member = jNumbers.get(i);
if (member.isObject()) {
ObjectNode jNumber = (ObjectNode) member;
JsonNode tag = jNumber.path("tag");
if (tag.isTextual()) {
EnumSet<NumberFeature> type = JsonUtils.parseEnum(jNumber, "type", NumberFeature.class);
if (type.isEmpty()) {
morpho.addNumber(new NumberTag(tag.getTextValue()));
} else {
morpho.addNumber(new NumberTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse NumberTag becuase 'tag' value is " + "missing or is not a String (json: " + jNumber.toString() + ")");
}
} else {
log.warn("Unable to parse NumberTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse NumberTag (Json Array expected as value for field 'case' but was " + node);
}
if (jMorpho.has("person")) {
for (Person p : JsonUtils.parseEnum(jMorpho, "person", Person.class)) {
morpho.addPerson(p);
}
}
node = jMorpho.path("pos");
if (node.isArray()) {
ArrayNode jPosTags = (ArrayNode) node;
for (int i = 0; i < jPosTags.size(); i++) {
JsonNode member = jPosTags.get(i);
if (member.isObject()) {
ObjectNode jPosTag = (ObjectNode) member;
morpho.addPos(getPosTagParser().parse(jPosTag, at));
} else {
log.warn("Unable to parse PosTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse PosTag (Json Array expected as value for field 'case' but was " + node);
}
node = jMorpho.path("tense");
if (node.isArray()) {
ArrayNode jTenses = (ArrayNode) node;
for (int i = 0; i < jTenses.size(); i++) {
JsonNode member = jTenses.get(i);
if (member.isObject()) {
ObjectNode jTense = (ObjectNode) member;
JsonNode tag = jTense.path("tag");
if (tag.isTextual()) {
EnumSet<Tense> type = JsonUtils.parseEnum(jTense, "type", Tense.class);
if (type.isEmpty()) {
morpho.addTense(new TenseTag(tag.getTextValue()));
} else {
morpho.addTense(new TenseTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse TenseTag becuase 'tag' value is " + "missing or is not a String (json: " + jTense.toString() + ")");
}
} else {
log.warn("Unable to parse TenseTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse TenseTag (Json Array expected as value for field 'case' but was " + node);
}
node = jMorpho.path("verb-mood");
if (node.isArray()) {
ArrayNode jVerbMoods = (ArrayNode) node;
for (int i = 0; i < jVerbMoods.size(); i++) {
JsonNode member = jVerbMoods.get(i);
if (member.isObject()) {
ObjectNode jVerbMood = (ObjectNode) member;
JsonNode tag = jVerbMood.path("tag");
if (tag.isTextual()) {
EnumSet<VerbMood> type = JsonUtils.parseEnum(jVerbMood, "type", VerbMood.class);
if (type.isEmpty()) {
morpho.addVerbForm(new VerbMoodTag(tag.getTextValue()));
} else {
morpho.addVerbForm(new VerbMoodTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse VerbMoodTag becuase 'tag' value is " + "missing or is not a String (json: " + jVerbMood.toString() + ")");
}
} else {
log.warn("Unable to parse VerbMoodTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse VerbMoodTag (Json Array expected as value for field 'case' but was " + node);
}
return morpho;
}
Aggregations