use of org.apache.stanbol.enhancer.nlp.morpho.NumberTag in project stanbol by apache.
the class AnalyzedTextSerializerAndParserTest method setup.
@BeforeClass
public static final void setup() throws IOException {
ci = ciFactory.createContentItem(new StringSource(text));
textBlob = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
analysedTextWithData = createAnalysedText();
int sentence = text.indexOf('.') + 1;
Sentence sent1 = analysedTextWithData.addSentence(0, sentence);
expectedSentences.put(sent1, "The Stanbol enhancer can detect famous " + "cities such as Paris and people such as Bob Marley.");
Token the = sent1.addToken(0, 3);
expectedTokens.put(the, "The");
the.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("PREP", Pos.Preposition), 0.85));
Token stanbol = sent1.addToken(4, 11);
expectedTokens.put(stanbol, "Stanbol");
stanbol.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("PN", Pos.ProperNoun), 0.95));
stanbol.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, Value.value(0.5));
//use index to create Tokens
int enhancerStart = sent1.getSpan().indexOf("enhancer");
Token enhancer = sent1.addToken(enhancerStart, enhancerStart + "enhancer".length());
expectedTokens.put(enhancer, "enhancer");
enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("PN", Pos.ProperNoun), 0.95));
enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("N", LexicalCategory.Noun), 0.87));
MorphoFeatures morpho = new MorphoFeatures("enhance");
morpho.addCase(new CaseTag("test-case-1", Case.Comitative));
morpho.addCase(new CaseTag("test-case-2", Case.Abessive));
morpho.addDefinitness(Definitness.Definite);
morpho.addPerson(Person.First);
morpho.addPos(new PosTag("PN", Pos.ProperNoun));
morpho.addGender(new GenderTag("test-gender", Gender.Masculine));
morpho.addNumber(new NumberTag("test-number", NumberFeature.Plural));
morpho.addTense(new TenseTag("test-tense", Tense.Present));
morpho.addVerbForm(new VerbMoodTag("test-verb-mood", VerbMood.ConditionalVerb));
enhancer.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, Value.value(morpho));
//create a chunk
Chunk stanbolEnhancer = analysedTextWithData.addChunk(stanbol.getStart(), enhancer.getEnd());
expectedChunks.put(stanbolEnhancer, "Stanbol enhancer");
stanbolEnhancer.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(new NerTag("organization", DBPEDIA_ORGANISATION)));
stanbolEnhancer.addAnnotation(NlpAnnotations.PHRASE_ANNOTATION, Value.value(new PhraseTag("NP", LexicalCategory.Noun), 0.98));
}
use of org.apache.stanbol.enhancer.nlp.morpho.NumberTag in project stanbol by apache.
the class MorphoFeaturesSupport method serialize.
@Override
public ObjectNode serialize(ObjectMapper mapper, MorphoFeatures morpho) {
ObjectNode jMorpho = mapper.createObjectNode();
jMorpho.put("lemma", morpho.getLemma());
List<CaseTag> caseList = morpho.getCaseList();
if (!caseList.isEmpty()) {
ArrayNode jCases = mapper.createArrayNode();
for (CaseTag caseTag : caseList) {
ObjectNode jCase = mapper.createObjectNode();
jCase.put("tag", caseTag.getTag());
if (caseTag.getCase() != null) {
jCase.put("type", caseTag.getCase().name());
}
jCases.add(jCase);
}
jMorpho.put("case", jCases);
}
List<Definitness> definitnesses = morpho.getDefinitnessList();
if (!definitnesses.isEmpty()) {
if (definitnesses.size() == 1) {
jMorpho.put("definitness", definitnesses.get(0).name());
} else {
ArrayNode jDefinitnesses = mapper.createArrayNode();
for (Definitness d : definitnesses) {
jDefinitnesses.add(d.name());
}
jMorpho.put("definitness", jDefinitnesses);
}
}
List<GenderTag> genderList = morpho.getGenderList();
if (!genderList.isEmpty()) {
ArrayNode jGenders = mapper.createArrayNode();
for (GenderTag genderTag : genderList) {
ObjectNode jGender = mapper.createObjectNode();
jGender.put("tag", genderTag.getTag());
if (genderTag.getGender() != null) {
jGender.put("type", genderTag.getGender().name());
}
jGenders.add(jGender);
}
jMorpho.put("gender", jGenders);
}
List<NumberTag> numberList = morpho.getNumberList();
if (!numberList.isEmpty()) {
ArrayNode jNumbers = mapper.createArrayNode();
for (NumberTag numberTag : numberList) {
ObjectNode jNumber = mapper.createObjectNode();
jNumber.put("tag", numberTag.getTag());
if (numberTag.getNumber() != null) {
jNumber.put("type", numberTag.getNumber().name());
}
jNumbers.add(jNumber);
}
jMorpho.put("number", jNumbers);
}
List<Person> persons = morpho.getPersonList();
if (!persons.isEmpty()) {
if (persons.size() == 1) {
jMorpho.put("person", persons.get(0).name());
} else {
ArrayNode jPersons = mapper.createArrayNode();
for (Person d : persons) {
jPersons.add(d.name());
}
jMorpho.put("person", jPersons);
}
}
List<PosTag> posList = morpho.getPosList();
if (!posList.isEmpty()) {
ArrayNode jPosTags = mapper.createArrayNode();
for (PosTag posTag : posList) {
jPosTags.add(getPosTagSerializer().serialize(mapper, posTag));
}
jMorpho.put("pos", jPosTags);
}
List<TenseTag> tenseList = morpho.getTenseList();
if (!tenseList.isEmpty()) {
ArrayNode jTenses = mapper.createArrayNode();
for (TenseTag tenseTag : tenseList) {
ObjectNode jTense = mapper.createObjectNode();
jTense.put("tag", tenseTag.getTag());
if (tenseTag.getTense() != null) {
jTense.put("type", tenseTag.getTense().name());
}
jTenses.add(jTense);
}
jMorpho.put("tense", jTenses);
}
List<VerbMoodTag> verbMoodList = morpho.getVerbMoodList();
if (!verbMoodList.isEmpty()) {
ArrayNode jMoods = mapper.createArrayNode();
for (VerbMoodTag verbMoodTag : verbMoodList) {
ObjectNode jMood = mapper.createObjectNode();
jMood.put("tag", verbMoodTag.getTag());
if (verbMoodTag.getVerbForm() != null) {
jMood.put("type", verbMoodTag.getVerbForm().name());
}
jMoods.add(jMood);
}
jMorpho.put("verb-mood", jMoods);
}
return jMorpho;
}
use of org.apache.stanbol.enhancer.nlp.morpho.NumberTag in project stanbol by apache.
the class MorphoFeaturesSupport method parse.
@Override
public MorphoFeatures parse(ObjectNode jMorpho, AnalysedText at) {
JsonNode jLemma = jMorpho.path("lemma");
if (!jLemma.isTextual()) {
throw new IllegalStateException("Field 'lemma' MUST provide a String value (parsed JSON: " + jMorpho);
}
MorphoFeatures morpho = new MorphoFeatures(jLemma.asText());
JsonNode node = jMorpho.path("case");
if (node.isArray()) {
ArrayNode jCases = (ArrayNode) node;
for (int i = 0; i < jCases.size(); i++) {
JsonNode member = jCases.get(i);
if (member.isObject()) {
ObjectNode jCase = (ObjectNode) member;
JsonNode tag = jCase.path("tag");
if (tag.isTextual()) {
EnumSet<Case> type = JsonUtils.parseEnum(jCase, "type", Case.class);
if (type.isEmpty()) {
morpho.addCase(new CaseTag(tag.getTextValue()));
} else {
morpho.addCase(new CaseTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse CaseTag becuase 'tag' value is " + "missing or is not a String (json: " + jCase.toString() + ")");
}
} else {
log.warn("Unable to parse CaseTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse CaseTags (Json Array expected as value for field 'case' but was " + node);
}
if (jMorpho.has("definitness")) {
for (Definitness d : JsonUtils.parseEnum(jMorpho, "definitness", Definitness.class)) {
morpho.addDefinitness(d);
}
}
node = jMorpho.path("gender");
if (node.isArray()) {
ArrayNode jGenders = (ArrayNode) node;
for (int i = 0; i < jGenders.size(); i++) {
JsonNode member = jGenders.get(i);
if (member.isObject()) {
ObjectNode jGender = (ObjectNode) member;
JsonNode tag = jGender.path("tag");
if (tag.isTextual()) {
EnumSet<Gender> type = JsonUtils.parseEnum(jGender, "type", Gender.class);
if (type.isEmpty()) {
morpho.addGender(new GenderTag(tag.getTextValue()));
} else {
morpho.addGender(new GenderTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse GenderTag becuase 'tag' value is " + "missing or is not a String (json: " + jGender.toString() + ")");
}
} else {
log.warn("Unable to parse GenderTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse GenderTag (Json Array expected as value for field 'case' but was " + node);
}
node = jMorpho.path("number");
if (node.isArray()) {
ArrayNode jNumbers = (ArrayNode) node;
for (int i = 0; i < jNumbers.size(); i++) {
JsonNode member = jNumbers.get(i);
if (member.isObject()) {
ObjectNode jNumber = (ObjectNode) member;
JsonNode tag = jNumber.path("tag");
if (tag.isTextual()) {
EnumSet<NumberFeature> type = JsonUtils.parseEnum(jNumber, "type", NumberFeature.class);
if (type.isEmpty()) {
morpho.addNumber(new NumberTag(tag.getTextValue()));
} else {
morpho.addNumber(new NumberTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse NumberTag becuase 'tag' value is " + "missing or is not a String (json: " + jNumber.toString() + ")");
}
} else {
log.warn("Unable to parse NumberTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse NumberTag (Json Array expected as value for field 'case' but was " + node);
}
if (jMorpho.has("person")) {
for (Person p : JsonUtils.parseEnum(jMorpho, "person", Person.class)) {
morpho.addPerson(p);
}
}
node = jMorpho.path("pos");
if (node.isArray()) {
ArrayNode jPosTags = (ArrayNode) node;
for (int i = 0; i < jPosTags.size(); i++) {
JsonNode member = jPosTags.get(i);
if (member.isObject()) {
ObjectNode jPosTag = (ObjectNode) member;
morpho.addPos(getPosTagParser().parse(jPosTag, at));
} else {
log.warn("Unable to parse PosTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse PosTag (Json Array expected as value for field 'case' but was " + node);
}
node = jMorpho.path("tense");
if (node.isArray()) {
ArrayNode jTenses = (ArrayNode) node;
for (int i = 0; i < jTenses.size(); i++) {
JsonNode member = jTenses.get(i);
if (member.isObject()) {
ObjectNode jTense = (ObjectNode) member;
JsonNode tag = jTense.path("tag");
if (tag.isTextual()) {
EnumSet<Tense> type = JsonUtils.parseEnum(jTense, "type", Tense.class);
if (type.isEmpty()) {
morpho.addTense(new TenseTag(tag.getTextValue()));
} else {
morpho.addTense(new TenseTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse TenseTag becuase 'tag' value is " + "missing or is not a String (json: " + jTense.toString() + ")");
}
} else {
log.warn("Unable to parse TenseTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse TenseTag (Json Array expected as value for field 'case' but was " + node);
}
node = jMorpho.path("verb-mood");
if (node.isArray()) {
ArrayNode jVerbMoods = (ArrayNode) node;
for (int i = 0; i < jVerbMoods.size(); i++) {
JsonNode member = jVerbMoods.get(i);
if (member.isObject()) {
ObjectNode jVerbMood = (ObjectNode) member;
JsonNode tag = jVerbMood.path("tag");
if (tag.isTextual()) {
EnumSet<VerbMood> type = JsonUtils.parseEnum(jVerbMood, "type", VerbMood.class);
if (type.isEmpty()) {
morpho.addVerbForm(new VerbMoodTag(tag.getTextValue()));
} else {
morpho.addVerbForm(new VerbMoodTag(tag.getTextValue(), type.iterator().next()));
}
} else {
log.warn("Unable to parse VerbMoodTag becuase 'tag' value is " + "missing or is not a String (json: " + jVerbMood.toString() + ")");
}
} else {
log.warn("Unable to parse VerbMoodTag from " + member.toString());
}
}
} else if (!node.isMissingNode()) {
log.warn("Unable to parse VerbMoodTag (Json Array expected as value for field 'case' but was " + node);
}
return morpho;
}
use of org.apache.stanbol.enhancer.nlp.morpho.NumberTag in project stanbol by apache.
the class CeliMorphoFeatures method featuresAsTriples.
public Collection<? extends Triple> featuresAsTriples(IRI textAnnotation, Language lang) {
Collection<TripleImpl> result = new Vector<TripleImpl>();
result.add(new TripleImpl(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, new PlainLiteralImpl(getLemma(), lang)));
for (PosTag pos : getPosList()) {
if (pos.isMapped()) {
for (LexicalCategory cat : pos.getCategories()) {
result.add(new TripleImpl(textAnnotation, RDF_TYPE, cat.getUri()));
}
}
}
for (NumberTag num : getNumberList()) {
if (num.getNumber() != null) {
result.add(new TripleImpl(textAnnotation, HAS_NUMBER, num.getNumber().getUri()));
}
}
for (Person pers : getPersonList()) {
result.add(new TripleImpl(textAnnotation, HAS_PERSON, pers.getUri()));
}
for (GenderTag gender : getGenderList()) {
if (gender.getGender() != null) {
result.add(new TripleImpl(textAnnotation, HAS_GENDER, gender.getGender().getUri()));
}
}
for (Definitness def : getDefinitnessList()) {
result.add(new TripleImpl(textAnnotation, HAS_DEFINITENESS, def.getUri()));
}
for (CaseTag caseFeat : getCaseList()) {
if (caseFeat.getCase() != null) {
result.add(new TripleImpl(textAnnotation, HAS_CASE, caseFeat.getCase().getUri()));
}
}
for (VerbMoodTag vf : getVerbMoodList()) {
if (vf.getVerbForm() != null) {
result.add(new TripleImpl(textAnnotation, HAS_MOOD, vf.getVerbForm().getUri()));
}
}
for (TenseTag tense : getTenseList()) {
if (tense.getTense() != null) {
result.add(new TripleImpl(textAnnotation, HAS_TENSE, tense.getTense().getUri()));
}
}
return result;
}
Aggregations