Search in sources :

Example 21 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class RepresentationTest method testGetNaturalTextWithLanguagesWithoutValues.

@Test
public void testGetNaturalTextWithLanguagesWithoutValues() {
    String field = "urn:the.field:used.for.this.Test";
    Representation rep = initNaturalLanguageTest(field);
    // test not present language
    Iterator<Text> esTexts = rep.get(field, "es");
    assertNotNull(esTexts);
    assertFalse(esTexts.hasNext());
    // test multiple not present languages
    Iterator<Text> frItTexts = rep.get(field, "fr", "it");
    assertNotNull(frItTexts);
    assertFalse(frItTexts.hasNext());
}
Also used : Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) Test(org.junit.Test)

Example 22 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class RepresentationTest method testGetNaturalTextWithMultipleLanguages.

@Test
public void testGetNaturalTextWithMultipleLanguages() {
    String field = "urn:the.field:used.for.this.Test";
    Representation rep = initNaturalLanguageTest(field);
    // test multiple language query
    Iterator<Text> allDeTexts = rep.get(field, "de", "de-AT");
    assertNotNull(allDeTexts);
    Set<String> textSet = new HashSet<String>(Arrays.asList(NL_TEST_de, NL_TEST_de_AT));
    while (allDeTexts.hasNext()) {
        Text text = allDeTexts.next();
        assertTrue(text.getLanguage().equalsIgnoreCase("de") || text.getLanguage().equalsIgnoreCase("de-AT"));
        // remove the found
        assertTrue(textSet.remove(text.getText()));
    }
    // all texts found
    assertTrue(textSet.isEmpty());
}
Also used : Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 23 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class BaseGoogleRefineReconcileResource method reconcile.

private JSONObject reconcile(String siteId, ReconcileQuery rQuery) throws JSONException, EntityhubException {
    FieldQuery query = createFieldQuery(siteId);
    query.addSelectedFields(SELECTED_FIELDS);
    addNameConstraint(rQuery, query);
    addTypeConstraint(rQuery, query);
    addPropertyConstraints(rQuery, query);
    query.setLimit(query.getLimit());
    QueryResultList<Representation> results = performQuery(siteId, query);
    List<JSONObject> jResultList = new ArrayList<JSONObject>(results.size());
    //we need to know the highest score to normalise between [0..1]
    double maxQueryScore = -1;
    if (!results.isEmpty()) {
        for (Representation r : results) {
            if (maxQueryScore < 0) {
                maxQueryScore = r.getFirst(resultScore.getUri(), Number.class).doubleValue();
            }
            JSONObject jResult = new JSONObject();
            jResult.put("id", r.getId());
            double similarity = 0.0;
            //the name returned for the entity
            String name = null;
            for (Iterator<Text> labels = r.getText(NAME_FIELD); labels.hasNext(); ) {
                Text label = labels.next();
                if (label.getText().equalsIgnoreCase(rQuery.getQuery())) {
                    name = label.getText();
                    similarity = 1.0;
                    break;
                }
                double curSimilarity = Utils.levenshtein(rQuery.getQuery(), label.getText());
                if (similarity < curSimilarity) {
                    name = label.getText();
                    similarity = curSimilarity;
                }
            }
            //set the selected name
            jResult.put("name", name);
            Iterator<Reference> types = r.getReferences(TYPE_FIELD);
            if (types != null && types.hasNext()) {
                jResult.put("type", new JSONArray(ModelUtils.asCollection(types)));
            }
            double normalisedScore = r.getFirst(resultScore.getUri(), Number.class).doubleValue();
            normalisedScore = normalisedScore * similarity / maxQueryScore;
            jResult.put("score", normalisedScore);
            jResult.put("match", similarity >= 0);
            jResultList.add(jResult);
        }
    }
    //else no results ... nothing todo
    //sort results based on score
    Collections.sort(jResultList, resultScoreComparator);
    JSONObject jResultContainer = new JSONObject();
    jResultContainer.put("result", new JSONArray(jResultList));
    return jResultContainer;
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Reference(org.apache.stanbol.entityhub.servicesapi.model.Reference) ArrayList(java.util.ArrayList) JSONArray(org.codehaus.jettison.json.JSONArray) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) JSONObject(org.codehaus.jettison.json.JSONObject)

Example 24 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class RdfIndexingSourceTest method testText.

private void testText(Representation rep) {
    for (Iterator<String> fields = rep.getFieldNames(); fields.hasNext(); ) {
        String field = fields.next();
        Iterator<Text> values = rep.getText(field);
        //            assertTrue(values.hasNext());
        while (values.hasNext()) {
            Text text = values.next();
            assertNotNull(text);
            String lang = text.getLanguage();
            //log.info(text.getText()+" | "+text.getLanguage()+" | "+text.getText().endsWith("@"+lang));
            //this texts that the text does not contain the @{lang} as added by
            //the toString method of the RDF Literal java class
            assertFalse("Labels MUST NOT end with the Language! value=" + text.getText(), text.getText().endsWith("@" + lang));
        }
    }
}
Also used : Text(org.apache.stanbol.entityhub.servicesapi.model.Text)

Example 25 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class CoreferenceFinder method buildEntityTypeLabels.

/**
     * Builds a Set of Entity Type labels given the Entity type uris.
     * 
     * @param entity
     * @param language
     * @return
     * @throws EngineException
     */
private Set<String> buildEntityTypeLabels(Entity entity, String language) throws EngineException {
    Iterator<Object> typeUris = entity.getRepresentation().get(RDF_TYPE.getUnicodeString());
    Set<String> allTypeLabels = new HashSet<String>();
    while (typeUris.hasNext()) {
        String typeUri = typeUris.next().toString();
        if (this.config.shouldExcludeClass(typeUri))
            continue;
        // First try the in memory index
        Set<String> labels = this.entityTypeIndex.lookupEntityType(new IRI(typeUri), language);
        if (labels == null) {
            Site site = getReferencedSite();
            Entity entityType = (site == null) ? this.entityHub.getEntity(typeUri) : site.getEntity(typeUri);
            if (entityType != null) {
                labels = new HashSet<String>();
                Iterator<Text> labelIterator = entityType.getRepresentation().get(RDFS_LABEL.getUnicodeString(), language);
                while (labelIterator.hasNext()) {
                    labels.add(labelIterator.next().getText());
                }
                this.entityTypeIndex.addEntityType(new IRI(typeUri), language, labels);
            }
        }
        if (labels != null)
            allTypeLabels.addAll(labels);
    }
    return allTypeLabels;
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) IRI(org.apache.clerezza.commons.rdf.IRI) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) HashSet(java.util.HashSet)

Aggregations

Text (org.apache.stanbol.entityhub.servicesapi.model.Text)50 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)32 Test (org.junit.Test)24 HashSet (java.util.HashSet)14 Reference (org.apache.stanbol.entityhub.servicesapi.model.Reference)12 ArrayList (java.util.ArrayList)11 IRI (org.apache.clerezza.commons.rdf.IRI)6 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)4 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)4 ValueFactory (org.apache.stanbol.entityhub.servicesapi.model.ValueFactory)4 RepresentationTest (org.apache.stanbol.entityhub.test.model.RepresentationTest)4 Graph (org.apache.clerezza.commons.rdf.Graph)3 Language (org.apache.clerezza.commons.rdf.Language)3 Literal (org.apache.clerezza.commons.rdf.Literal)3 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)3 EntityhubException (org.apache.stanbol.entityhub.servicesapi.EntityhubException)3 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)3 TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)3 URI (java.net.URI)2 URL (java.net.URL)2