Search in sources :

Example 6 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class SolrYardTest method testFieldQuery.

@Test
public void testFieldQuery() throws YardException {
    // NOTE: this does not test if the updated view of the representation is
    // stored, but only that the update method works correctly
    Yard yard = getYard();
    String id1 = "urn:yard.test.testFieldQuery:representation.id1";
    String id2 = "urn:yard.test.testFieldQuery:representation.id2";
    String field = "urn:the.field:used.for.testFieldQuery";
    Representation test1 = create(id1, true);
    Representation test2 = create(id2, true);
    // change the representations to be sure to force an update even if the
    // implementation checks for changes before updating a representation
    test1.add(field, "This is the text content of a field with value1.");
    test2.add(field, "This is the text content of a field with value2.");
    Iterable<Representation> updatedIterable = yard.update(Arrays.asList(test1, test2));
    assertNotNull(updatedIterable);
    FieldQuery query = yard.getQueryFactory().createFieldQuery();
    query.setConstraint(field, new TextConstraint(Arrays.asList("text content")));
    QueryResultList<Representation> results = yard.find(query);
    assertEquals(2, results.size());
    // fetch the light / minimal representation
    query = yard.getQueryFactory().createFieldQuery();
    query.setConstraint(field, new TextConstraint(Arrays.asList("value2")));
    results = yard.find(query);
    assertEquals(1, results.size());
    Representation result = results.iterator().next();
    assertEquals("urn:yard.test.testFieldQuery:representation.id2", result.getId());
    assertEquals(null, result.getFirst(field));
    // fetch the full representation
    results = yard.findRepresentation(query);
    assertEquals(1, results.size());
    result = results.iterator().next();
    assertEquals("urn:yard.test.testFieldQuery:representation.id2", result.getId());
    assertEquals("This is the text content of a field with value2.", result.getFirst(field));
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) SolrYard(org.apache.stanbol.entityhub.yard.solr.impl.SolrYard) Yard(org.apache.stanbol.entityhub.servicesapi.yard.Yard) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) YardTest(org.apache.stanbol.entityhub.test.yard.YardTest) Test(org.junit.Test)

Example 7 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class NamedEntityTaggingEngine method computeEntityRecommentations.

/**
     * Computes the Enhancements
     * 
     * @param site
     *            The {@link SiteException} id or <code>null</code> to use the {@link Entityhub}
     * @param literalFactory
     *            the {@link LiteralFactory} used to create RDF Literals
     * @param contentItemId
     *            the id of the contentItem
     * @param textAnnotation
     *            the text annotation to enhance
     * @param subsumedAnnotations
     *            other text annotations for the same entity
     * @param language
     *            the language of the analysed text or <code>null</code> if not available.
     * @return the suggestions for the parsed {@link NamedEntity}
     * @throws EntityhubException
     *             On any Error while looking up Entities via the Entityhub
     */
protected final List<Suggestion> computeEntityRecommentations(Site site, NamedEntity namedEntity, List<IRI> subsumedAnnotations, String language) throws EntityhubException {
    // First get the required properties for the parsed textAnnotation
    // ... and check the values
    log.debug("Process {}", namedEntity);
    // if site is NULL use
    // the Entityhub
    FieldQueryFactory queryFactory = site == null ? entityhub.getQueryFactory() : site.getQueryFactory();
    log.trace("Will use a query-factory of type [{}].", queryFactory.getClass().toString());
    FieldQuery query = queryFactory.createFieldQuery();
    // replace spaces with plus to create an AND search for all words in the
    // name!
    Constraint labelConstraint;
    // TODO: make case sensitivity configurable
    boolean casesensitive = false;
    String namedEntityLabel = casesensitive ? namedEntity.getName() : namedEntity.getName().toLowerCase();
    if (language != null) {
        // search labels in the language and without language
        labelConstraint = new TextConstraint(namedEntityLabel, casesensitive, language, null);
    } else {
        labelConstraint = new TextConstraint(namedEntityLabel, casesensitive);
    }
    query.setConstraint(nameField, labelConstraint);
    if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
        if (personState) {
            if (personType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(personType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    } else if (DBPEDIA_ORGANISATION.equals(namedEntity.getType())) {
        if (orgState) {
            if (orgType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(orgType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    } else if (OntologicalClasses.DBPEDIA_PLACE.equals(namedEntity.getType())) {
        if (this.placeState) {
            if (this.placeType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(placeType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    }
    query.setLimit(Math.max(20, this.numSuggestions * 3));
    log.trace("A query has been created of type [{}] and the following settings:\n{}", query.getClass().toString(), query.toString());
    if (null == site)
        log.trace("A query will be sent to the entity-hub of type [{}].", entityhub.getClass());
    else
        log.trace("A query will be sent to a site [id :: {}][type :: {}].", site.getId(), site.getClass());
    QueryResultList<Entity> results = // if site is NULL
    site == null ? entityhub.findEntities(query) : // use the Entityhub
    site.findEntities(// else the referenced site
    query);
    log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
    if (results.isEmpty()) {
        // no results nothing to do
        return Collections.emptyList();
    }
    // we need to normalise the confidence values from [0..1]
    // * levenshtein distance as absolute (1.0 for exact match)
    // * Solr scores * levenshtein to rank entities relative to each other
    Float maxScore = null;
    Float maxExactScore = null;
    List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
    // assumes entities are sorted by score
    for (Iterator<Entity> guesses = results.iterator(); guesses.hasNext(); ) {
        Suggestion match = new Suggestion(guesses.next());
        Representation rep = match.getEntity().getRepresentation();
        Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
        if (maxScore == null) {
            maxScore = score;
        }
        Iterator<Text> labels = rep.getText(nameField);
        while (labels.hasNext() && match.getLevenshtein() < 1.0) {
            Text label = labels.next();
            if (// if the content language is unknown ->
            language == null || // accept all labels
            label.getLanguage() == // accept labels with no
            null || // and labels in the same language as the content
            (language != null && label.getLanguage().startsWith(language))) {
                double actMatch = levenshtein(casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
                if (actMatch > match.getLevenshtein()) {
                    match.setLevenshtein(actMatch);
                    match.setMatchedLabel(label);
                }
            }
        }
        if (match.getMatchedLabel() != null) {
            if (match.getLevenshtein() == 1.0) {
                if (maxExactScore == null) {
                    maxExactScore = score;
                }
                // normalise exact matches against the best exact score
                match.setScore(score.doubleValue() / maxExactScore.doubleValue());
            } else {
                // normalise partial matches against the best match and the
                // Levenshtein similarity with the label
                match.setScore(score.doubleValue() * match.getLevenshtein() / maxScore.doubleValue());
            }
            matches.add(match);
        } else {
            log.debug("No value of {} for Entity {}!", nameField, match.getEntity().getId());
        }
    }
    // now sort the results
    Collections.sort(matches);
    return matches.subList(0, Math.min(matches.size(), numSuggestions));
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ArrayList(java.util.ArrayList) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) FieldQueryFactory(org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Example 8 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class CoreferenceFinder method lookupEntity.

/**
     * Gets an Entity from the configured {@link Site} based on the NER text and type.
     * 
     * @param ner
     * @param language
     * @return
     * @throws EngineException
     */
private Entity lookupEntity(Span ner, String language) throws EngineException {
    Site site = getReferencedSite();
    FieldQueryFactory queryFactory = site == null ? entityHub.getQueryFactory() : site.getQueryFactory();
    FieldQuery query = queryFactory.createFieldQuery();
    Constraint labelConstraint;
    String namedEntityLabel = ner.getSpan();
    labelConstraint = new TextConstraint(namedEntityLabel, false, language, null);
    query.setConstraint(RDFS_LABEL.getUnicodeString(), labelConstraint);
    query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType().getUnicodeString()));
    query.setLimit(1);
    QueryResultList<Entity> results = // if site is NULL
    site == null ? entityHub.findEntities(query) : // use the Entityhub
    site.findEntities(// else the referenced site
    query);
    if (results.isEmpty())
        return null;
    // We set the limit to 1 so if it found anything it should contain just 1 entry
    return results.iterator().next();
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) FieldQueryFactory(org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)

Example 9 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class EntitySearcherUtils method createFieldQuery.

/**
     * Validated the parsed parameter as parsed to 
     * {@link EntitySearcher#lookup(String, Set, List, String...)}
     * and creates a fieldQuery for the parsed parameter
     * @param field
     * @param includeFields
     * @param search
     * @param languages
     * @return
     */
public static final FieldQuery createFieldQuery(FieldQueryFactory factory, IRI field, Set<IRI> includeFields, List<String> search, String... languages) {
    if (field == null || field.getUnicodeString().isEmpty()) {
        throw new IllegalArgumentException("The parsed search field MUST NOT be NULL nor empty");
    }
    if (search == null || search.isEmpty()) {
        throw new IllegalArgumentException("The parsed list of search strings MUST NOT be NULL nor empty");
    }
    //build the query and than return the result
    FieldQuery query = factory.createFieldQuery();
    if (includeFields == null) {
        query.addSelectedField(field.getUnicodeString());
    } else {
        if (!includeFields.contains(field.getUnicodeString())) {
            query.addSelectedField(field.getUnicodeString());
        }
        for (IRI select : includeFields) {
            query.addSelectedField(select.getUnicodeString());
        }
    }
    //also add the entity rankings
    query.addSelectedField(RdfResourceEnum.entityRank.getUri());
    //TODO make configurable
    query.setLimit(20);
    //List<String> search2 = new ArrayList<String>(search.size() + 1);
    //Collections.reverse(search);
    //search2.add(StringUtils.join(search, " "));
    //search2.addAll(search);
    TextConstraint tc = new TextConstraint(search, languages);
    //STANBOL-1104
    tc.setProximityRanking(true);
    query.setConstraint(field.getUnicodeString(), tc);
    return query;
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) IRI(org.apache.clerezza.commons.rdf.IRI) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Example 10 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class AbstractBackend method listSubjects.

@Override
public Collection<Object> listSubjects(Object property, Object object) {
    FieldQuery query = createQuery();
    if (this.isURI(object)) {
        query.setConstraint(property.toString(), new ReferenceConstraint(object.toString()));
    } else if (object instanceof Text) {
        Text text = (Text) object;
        TextConstraint constraint;
        if (text.getLanguage() == null) {
            constraint = new TextConstraint(text.getText(), PatternType.none, true);
        } else {
            constraint = new TextConstraint(text.getText(), PatternType.none, true, text.getLanguage());
        }
        query.setConstraint(property.toString(), constraint);
    } else {
        Set<DataTypeEnum> dataTypes = DataTypeEnum.getPrimaryDataTypes(object.getClass());
        if (dataTypes == null || dataTypes.isEmpty()) {
            query.setConstraint(property.toString(), new ValueConstraint(object));
        } else {
            Collection<String> types = new ArrayList<String>(dataTypes.size());
            for (DataTypeEnum type : dataTypes) {
                types.add(type.getUri());
            }
            query.setConstraint(property.toString(), new ValueConstraint(object, types));
        }
    }
    query.setLimit(Integer.valueOf(DEFAULT_MAX_SELECT));
    QueryResultList<String> results;
    try {
        results = query(query);
    } catch (EntityhubException e) {
        throw new IllegalStateException("Unable to query for resources with value '" + object + "' on property '" + property + "'!", e);
    }
    Collection<Object> references;
    if (results.isEmpty()) {
        references = Collections.emptySet();
    } else if (results.size() == 1) {
        //assuming that a single result is a likely case
        references = Collections.singleton((Object) getValueFactory().createReference(results.iterator().next()));
    } else {
        int offset = 0;
        references = new HashSet<Object>(results.size());
        for (String result : results) {
            references.add(getValueFactory().createReference(result));
        }
        while (results.size() >= DEFAULT_MAX_SELECT && references.size() <= DEFAULT_MAX_RESULTS - DEFAULT_MAX_SELECT) {
            offset = offset + results.size();
            query.setOffset(offset);
            try {
                results = query(query);
            } catch (EntityhubException e) {
                throw new IllegalStateException("Unable to query for resources with value '" + object + "' on property '" + property + "'!", e);
            }
            for (String result : results) {
                references.add(getValueFactory().createReference(result));
            }
        }
    }
    return references;
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) DataTypeEnum(org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Aggregations

TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)31 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)17 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)10 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)8 SimilarityConstraint (org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint)8 Constraint (org.apache.stanbol.entityhub.servicesapi.query.Constraint)7 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)7 RangeConstraint (org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)5 HashSet (java.util.HashSet)3 DataTypeEnum (org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum)3 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)3 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)3 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)3 FieldQueryFactory (org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory)3 ReconcileValue (org.apache.stanbol.entityhub.jersey.grefine.ReconcileValue)2 SparqlFieldQuery (org.apache.stanbol.entityhub.query.sparql.SparqlFieldQuery)2 FieldMapping (org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping)2 Yard (org.apache.stanbol.entityhub.servicesapi.yard.Yard)2 YardTest (org.apache.stanbol.entityhub.test.yard.YardTest)2