use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class RepresentationTest method testGetNaturalTextWithLanguagesWithoutValues.
@Test
public void testGetNaturalTextWithLanguagesWithoutValues() {
String field = "urn:the.field:used.for.this.Test";
Representation rep = initNaturalLanguageTest(field);
// test not present language
Iterator<Text> esTexts = rep.get(field, "es");
assertNotNull(esTexts);
assertFalse(esTexts.hasNext());
// test multiple not present languages
Iterator<Text> frItTexts = rep.get(field, "fr", "it");
assertNotNull(frItTexts);
assertFalse(frItTexts.hasNext());
}
use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class RepresentationTest method testGetNaturalTextWithMultipleLanguages.
@Test
public void testGetNaturalTextWithMultipleLanguages() {
String field = "urn:the.field:used.for.this.Test";
Representation rep = initNaturalLanguageTest(field);
// test multiple language query
Iterator<Text> allDeTexts = rep.get(field, "de", "de-AT");
assertNotNull(allDeTexts);
Set<String> textSet = new HashSet<String>(Arrays.asList(NL_TEST_de, NL_TEST_de_AT));
while (allDeTexts.hasNext()) {
Text text = allDeTexts.next();
assertTrue(text.getLanguage().equalsIgnoreCase("de") || text.getLanguage().equalsIgnoreCase("de-AT"));
// remove the found
assertTrue(textSet.remove(text.getText()));
}
// all texts found
assertTrue(textSet.isEmpty());
}
use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class BaseGoogleRefineReconcileResource method reconcile.
private JSONObject reconcile(String siteId, ReconcileQuery rQuery) throws JSONException, EntityhubException {
FieldQuery query = createFieldQuery(siteId);
query.addSelectedFields(SELECTED_FIELDS);
addNameConstraint(rQuery, query);
addTypeConstraint(rQuery, query);
addPropertyConstraints(rQuery, query);
query.setLimit(query.getLimit());
QueryResultList<Representation> results = performQuery(siteId, query);
List<JSONObject> jResultList = new ArrayList<JSONObject>(results.size());
//we need to know the highest score to normalise between [0..1]
double maxQueryScore = -1;
if (!results.isEmpty()) {
for (Representation r : results) {
if (maxQueryScore < 0) {
maxQueryScore = r.getFirst(resultScore.getUri(), Number.class).doubleValue();
}
JSONObject jResult = new JSONObject();
jResult.put("id", r.getId());
double similarity = 0.0;
//the name returned for the entity
String name = null;
for (Iterator<Text> labels = r.getText(NAME_FIELD); labels.hasNext(); ) {
Text label = labels.next();
if (label.getText().equalsIgnoreCase(rQuery.getQuery())) {
name = label.getText();
similarity = 1.0;
break;
}
double curSimilarity = Utils.levenshtein(rQuery.getQuery(), label.getText());
if (similarity < curSimilarity) {
name = label.getText();
similarity = curSimilarity;
}
}
//set the selected name
jResult.put("name", name);
Iterator<Reference> types = r.getReferences(TYPE_FIELD);
if (types != null && types.hasNext()) {
jResult.put("type", new JSONArray(ModelUtils.asCollection(types)));
}
double normalisedScore = r.getFirst(resultScore.getUri(), Number.class).doubleValue();
normalisedScore = normalisedScore * similarity / maxQueryScore;
jResult.put("score", normalisedScore);
jResult.put("match", similarity >= 0);
jResultList.add(jResult);
}
}
//else no results ... nothing todo
//sort results based on score
Collections.sort(jResultList, resultScoreComparator);
JSONObject jResultContainer = new JSONObject();
jResultContainer.put("result", new JSONArray(jResultList));
return jResultContainer;
}
use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class RdfIndexingSourceTest method testText.
private void testText(Representation rep) {
for (Iterator<String> fields = rep.getFieldNames(); fields.hasNext(); ) {
String field = fields.next();
Iterator<Text> values = rep.getText(field);
// assertTrue(values.hasNext());
while (values.hasNext()) {
Text text = values.next();
assertNotNull(text);
String lang = text.getLanguage();
//log.info(text.getText()+" | "+text.getLanguage()+" | "+text.getText().endsWith("@"+lang));
//this texts that the text does not contain the @{lang} as added by
//the toString method of the RDF Literal java class
assertFalse("Labels MUST NOT end with the Language! value=" + text.getText(), text.getText().endsWith("@" + lang));
}
}
}
use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class CoreferenceFinder method buildEntityTypeLabels.
/**
* Builds a Set of Entity Type labels given the Entity type uris.
*
* @param entity
* @param language
* @return
* @throws EngineException
*/
private Set<String> buildEntityTypeLabels(Entity entity, String language) throws EngineException {
Iterator<Object> typeUris = entity.getRepresentation().get(RDF_TYPE.getUnicodeString());
Set<String> allTypeLabels = new HashSet<String>();
while (typeUris.hasNext()) {
String typeUri = typeUris.next().toString();
if (this.config.shouldExcludeClass(typeUri))
continue;
// First try the in memory index
Set<String> labels = this.entityTypeIndex.lookupEntityType(new IRI(typeUri), language);
if (labels == null) {
Site site = getReferencedSite();
Entity entityType = (site == null) ? this.entityHub.getEntity(typeUri) : site.getEntity(typeUri);
if (entityType != null) {
labels = new HashSet<String>();
Iterator<Text> labelIterator = entityType.getRepresentation().get(RDFS_LABEL.getUnicodeString(), language);
while (labelIterator.hasNext()) {
labels.add(labelIterator.next().getText());
}
this.entityTypeIndex.addEntityType(new IRI(typeUri), language, labels);
}
}
if (labels != null)
allTypeLabels.addAll(labels);
}
return allTypeLabels;
}
Aggregations