Search in sources :

Example 6 with ReferenceConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.

the class BaseGoogleRefineReconcileResource method addTypeConstraint.

/**
     * @param rQuery
     * @param query
     */
private void addTypeConstraint(ReconcileQuery rQuery, FieldQuery query) {
    //maybe an other column was also mapped to the TYPE_FIELD property
    Collection<ReconcileValue> additionalTypes = rQuery.removeProperty(TYPE_FIELD);
    Set<String> queryTypes = rQuery.getTypes();
    Set<String> types = null;
    if (additionalTypes == null) {
        if (queryTypes != null) {
            types = queryTypes;
        }
    } else {
        types = new HashSet<String>();
        if (queryTypes != null) {
            types.add(rQuery.getQuery());
        }
        for (ReconcileValue value : additionalTypes) {
            if (value != null) {
                if (value.getId() != null) {
                    types.add(value.getId());
                } else if (value.getValue() instanceof String) {
                    //TODO: check if the assumption that String values are
                    //good for types is valid
                    types.add((String) value.getValue());
                }
            }
        //else null -> ignore
        }
    }
    if (!types.isEmpty()) {
        query.setConstraint(TYPE_FIELD, new ReferenceConstraint(types));
    }
}
Also used : ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) ReconcileValue(org.apache.stanbol.entityhub.jersey.grefine.ReconcileValue)

Example 7 with ReferenceConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.

the class BaseGoogleRefineReconcileResource method addPropertyConstraints.

/**
     * @param rQuery
     * @param query
     */
private void addPropertyConstraints(ReconcileQuery rQuery, FieldQuery query) {
    Collection<String> ids = new HashSet<String>();
    // keep order for texts
    List<String> texts = new ArrayList<String>();
    Collection<Object> values = new HashSet<Object>();
    //hold all references for @references special property
    HashSet<String> references = new HashSet<String>();
    //holds all texts for @fullText special property
    List<String> fullText = null;
    //holds the context for the @similarity special property
    Collection<String> similarityContext = null;
    //the field used for the @similarity special property
    HashSet<String> similarityFields = new LinkedHashSet<String>();
    for (Entry<ReconcileProperty, Collection<ReconcileValue>> propertyEntry : rQuery.getProperties()) {
        ReconcileProperty property = propertyEntry.getKey();
        // collect the properties
        for (ReconcileValue value : propertyEntry.getValue()) {
            if (value.getId() != null) {
                ids.add(value.getId());
            }
            if (value.getValue() instanceof String) {
                texts.add((String) value.getValue());
            } else {
                values.add(value.getValue());
            }
        }
        //handle supported special properties
        if (property.isSpecial()) {
            if (property.getName().equalsIgnoreCase("references")) {
                //if Users do parse parameters - so we need to collect all values
                if (property.getParameter() != null) {
                    log.warn("parameters are not supported for @references -> ignore '{}'", property.getParameter());
                }
                if (ids.isEmpty()) {
                    log.warn("No URI values present for parsed @references property! (values: " + propertyEntry.getValue());
                }
                for (String id : ids) {
                    references.add(id);
                }
            } else if (property.getName().equalsIgnoreCase("fulltext")) {
                //if Users do parse parameters - so we need to collect all values
                if (property.getParameter() != null) {
                    log.warn("parameters are not supported for @fullText -> ignore '{}'", property.getParameter());
                }
                fullText = texts;
            } else if (property.getName().equalsIgnoreCase("similarity")) {
                String propUri = property.getParameter() != null ? nsPrefixService.getFullName(property.getParameter()) : SpecialFieldEnum.fullText.getUri();
                if (propUri != null) {
                    similarityFields.add(propUri);
                } else {
                    //TODO: maybe throw an Exception instead
                    log.warn("Unknown prefix '{}' used by Google Refine query parameter of property '{}'! " + "Will use the full text field as fallback", NamespaceMappingUtils.getPrefix(property.getParameter()), property);
                    similarityFields.add(SpecialFieldEnum.fullText.getUri());
                }
                similarityContext = texts;
            } else {
                //TODO: implement LDPATH support
                log.warn("ignore unsupported special property {}", property);
            }
        } else {
            //  * non Reference | Text | Datatype values are ignored
            if (!ids.isEmpty()) {
                // only references -> create reference constraint
                query.setConstraint(property.getName(), new ReferenceConstraint(ids));
                if (ids.size() != propertyEntry.getValue().size()) {
                    log.info("Only some of the parsed values of the field {} contain" + "references -> will ignore values with missing references");
                }
            } else if (!texts.isEmpty()) {
                // NOTE: This will use OR over all texts. To enforce AND one
                // would need to parse a single string with all values e.g. by
                // using StringUtils.join(texts," ")
                query.setConstraint(property.getName(), new TextConstraint(texts));
                if (ids.size() != propertyEntry.getValue().size()) {
                    log.info("Only some of the parsed values of the field {} are" + "of type String -> will ignore non-string values");
                }
            } else if (!values.isEmpty()) {
                query.setConstraint(property.getName(), new ValueConstraint(values));
            }
        //else no values ... ignore property
        }
        //clean up
        ids.clear();
        values.clear();
    }
    //now add constraints for the collected special properties
    if (!references.isEmpty()) {
        //add references constraint
        ReferenceConstraint refConstraint = new ReferenceConstraint(references, MODE.all);
        query.setConstraint(SpecialFieldEnum.references.getUri(), refConstraint);
    }
    if (fullText != null && !fullText.isEmpty()) {
        TextConstraint textConstraint = new TextConstraint(fullText);
        query.setConstraint(SpecialFieldEnum.fullText.getUri(), textConstraint);
    //add full text constraint
    }
    if (similarityContext != null && !similarityContext.isEmpty()) {
        //add similarity constraint
        Iterator<String> fieldIt = similarityFields.iterator();
        String field = fieldIt.next();
        SimilarityConstraint simConstraint;
        if (fieldIt.hasNext()) {
            List<String> addFields = new ArrayList<String>(similarityFields.size() - 1);
            while (fieldIt.hasNext()) {
                addFields.add(fieldIt.next());
            }
            simConstraint = new SimilarityConstraint(similarityContext, DataTypeEnum.Text, addFields);
        } else {
            simConstraint = new SimilarityConstraint(similarityContext, DataTypeEnum.Text);
        }
        query.setConstraint(field, simConstraint);
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) ArrayList(java.util.ArrayList) ReconcileProperty(org.apache.stanbol.entityhub.jersey.grefine.ReconcileProperty) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) Collection(java.util.Collection) JSONObject(org.codehaus.jettison.json.JSONObject) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReconcileValue(org.apache.stanbol.entityhub.jersey.grefine.ReconcileValue) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 8 with ReferenceConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.

the class NamedEntityTaggingEngine method computeEntityRecommentations.

/**
     * Computes the Enhancements
     * 
     * @param site
     *            The {@link SiteException} id or <code>null</code> to use the {@link Entityhub}
     * @param literalFactory
     *            the {@link LiteralFactory} used to create RDF Literals
     * @param contentItemId
     *            the id of the contentItem
     * @param textAnnotation
     *            the text annotation to enhance
     * @param subsumedAnnotations
     *            other text annotations for the same entity
     * @param language
     *            the language of the analysed text or <code>null</code> if not available.
     * @return the suggestions for the parsed {@link NamedEntity}
     * @throws EntityhubException
     *             On any Error while looking up Entities via the Entityhub
     */
protected final List<Suggestion> computeEntityRecommentations(Site site, NamedEntity namedEntity, List<IRI> subsumedAnnotations, String language) throws EntityhubException {
    // First get the required properties for the parsed textAnnotation
    // ... and check the values
    log.debug("Process {}", namedEntity);
    // if site is NULL use
    // the Entityhub
    FieldQueryFactory queryFactory = site == null ? entityhub.getQueryFactory() : site.getQueryFactory();
    log.trace("Will use a query-factory of type [{}].", queryFactory.getClass().toString());
    FieldQuery query = queryFactory.createFieldQuery();
    // replace spaces with plus to create an AND search for all words in the
    // name!
    Constraint labelConstraint;
    // TODO: make case sensitivity configurable
    boolean casesensitive = false;
    String namedEntityLabel = casesensitive ? namedEntity.getName() : namedEntity.getName().toLowerCase();
    if (language != null) {
        // search labels in the language and without language
        labelConstraint = new TextConstraint(namedEntityLabel, casesensitive, language, null);
    } else {
        labelConstraint = new TextConstraint(namedEntityLabel, casesensitive);
    }
    query.setConstraint(nameField, labelConstraint);
    if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
        if (personState) {
            if (personType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(personType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    } else if (DBPEDIA_ORGANISATION.equals(namedEntity.getType())) {
        if (orgState) {
            if (orgType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(orgType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    } else if (OntologicalClasses.DBPEDIA_PLACE.equals(namedEntity.getType())) {
        if (this.placeState) {
            if (this.placeType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(placeType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    }
    query.setLimit(Math.max(20, this.numSuggestions * 3));
    log.trace("A query has been created of type [{}] and the following settings:\n{}", query.getClass().toString(), query.toString());
    if (null == site)
        log.trace("A query will be sent to the entity-hub of type [{}].", entityhub.getClass());
    else
        log.trace("A query will be sent to a site [id :: {}][type :: {}].", site.getId(), site.getClass());
    QueryResultList<Entity> results = // if site is NULL
    site == null ? entityhub.findEntities(query) : // use the Entityhub
    site.findEntities(// else the referenced site
    query);
    log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
    if (results.isEmpty()) {
        // no results nothing to do
        return Collections.emptyList();
    }
    // we need to normalise the confidence values from [0..1]
    // * levenshtein distance as absolute (1.0 for exact match)
    // * Solr scores * levenshtein to rank entities relative to each other
    Float maxScore = null;
    Float maxExactScore = null;
    List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
    // assumes entities are sorted by score
    for (Iterator<Entity> guesses = results.iterator(); guesses.hasNext(); ) {
        Suggestion match = new Suggestion(guesses.next());
        Representation rep = match.getEntity().getRepresentation();
        Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
        if (maxScore == null) {
            maxScore = score;
        }
        Iterator<Text> labels = rep.getText(nameField);
        while (labels.hasNext() && match.getLevenshtein() < 1.0) {
            Text label = labels.next();
            if (// if the content language is unknown ->
            language == null || // accept all labels
            label.getLanguage() == // accept labels with no
            null || // and labels in the same language as the content
            (language != null && label.getLanguage().startsWith(language))) {
                double actMatch = levenshtein(casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
                if (actMatch > match.getLevenshtein()) {
                    match.setLevenshtein(actMatch);
                    match.setMatchedLabel(label);
                }
            }
        }
        if (match.getMatchedLabel() != null) {
            if (match.getLevenshtein() == 1.0) {
                if (maxExactScore == null) {
                    maxExactScore = score;
                }
                // normalise exact matches against the best exact score
                match.setScore(score.doubleValue() / maxExactScore.doubleValue());
            } else {
                // normalise partial matches against the best match and the
                // Levenshtein similarity with the label
                match.setScore(score.doubleValue() * match.getLevenshtein() / maxScore.doubleValue());
            }
            matches.add(match);
        } else {
            log.debug("No value of {} for Entity {}!", nameField, match.getEntity().getId());
        }
    }
    // now sort the results
    Collections.sort(matches);
    return matches.subList(0, Math.min(matches.size(), numSuggestions));
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ArrayList(java.util.ArrayList) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) FieldQueryFactory(org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Example 9 with ReferenceConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.

the class CoreferenceFinder method lookupEntity.

/**
     * Gets an Entity from the configured {@link Site} based on the NER text and type.
     * 
     * @param ner
     * @param language
     * @return
     * @throws EngineException
     */
private Entity lookupEntity(Span ner, String language) throws EngineException {
    Site site = getReferencedSite();
    FieldQueryFactory queryFactory = site == null ? entityHub.getQueryFactory() : site.getQueryFactory();
    FieldQuery query = queryFactory.createFieldQuery();
    Constraint labelConstraint;
    String namedEntityLabel = ner.getSpan();
    labelConstraint = new TextConstraint(namedEntityLabel, false, language, null);
    query.setConstraint(RDFS_LABEL.getUnicodeString(), labelConstraint);
    query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType().getUnicodeString()));
    query.setLimit(1);
    QueryResultList<Entity> results = // if site is NULL
    site == null ? entityHub.findEntities(query) : // use the Entityhub
    site.findEntities(// else the referenced site
    query);
    if (results.isEmpty())
        return null;
    // We set the limit to 1 so if it found anything it should contain just 1 entry
    return results.iterator().next();
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) FieldQueryFactory(org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)

Example 10 with ReferenceConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.

the class SolrQueryFactory method initValueConstraint.

/**
     * @param indexConstraint
     * @param refConstraint
     */
private void initValueConstraint(IndexConstraint indexConstraint) {
    ValueConstraint valueConstraint = (ValueConstraint) indexConstraint.getConstraint();
    if (valueConstraint.getValues() == null) {
        indexConstraint.setInvalid(String.format("ValueConstraint without a value - that check only any value for " + "the parsed datatypes %s is present - can not be supported by a Solr query!", valueConstraint.getDataTypes()));
    } else {
        // first process the parsed dataTypes to get the supported types
        List<IndexDataType> indexDataTypes = new ArrayList<IndexDataType>();
        List<String> acceptedDataTypes = new ArrayList<String>();
        if (valueConstraint.getDataTypes() != null) {
            for (String dataType : valueConstraint.getDataTypes()) {
                IndexDataTypeEnum indexDataTypeEnumEntry = IndexDataTypeEnum.forUri(dataType);
                if (indexDataTypeEnumEntry != null) {
                    indexDataTypes.add(indexDataTypeEnumEntry.getIndexType());
                    acceptedDataTypes.add(dataType);
                } else {
                    // TODO: Add possibility to add warnings to indexConstraints
                    log.warn("A Datatype parsed for a ValueConstraint is not " + "supported and will be ignored (dataTypeUri={})", dataType);
                }
            }
        }
        //we support only a single dataType ...
        //  ... therefore remove additional data types from the ValueConstraint
        IndexDataType indexDataType = null;
        if (!indexDataTypes.isEmpty()) {
            indexDataType = indexDataTypes.get(0);
            if (indexDataTypes.size() > 1) {
                log.warn("Only a single DataType is supported for ValueConstraints!");
                while (acceptedDataTypes.size() > 1) {
                    String ignored = acceptedDataTypes.remove(acceptedDataTypes.size() - 1);
                    log.warn("  > ignore parsed dataType {}", ignored);
                }
            }
        }
        //else empty we will initialise based on the first parsed value!
        ConstraintValue constraintValue = new ConstraintValue(valueConstraint.getMode());
        //init the boost
        addBoost(constraintValue, valueConstraint);
        for (Object value : valueConstraint.getValues()) {
            IndexValue indexValue;
            if (indexDataType == null) {
                // get the dataType based on the type of the value
                try {
                    indexValue = indexValueFactory.createIndexValue(value);
                } catch (NoConverterException e) {
                    // if not found use the toString() and string as type
                    log.warn("Unable to create IndexValue for value {} (type: {}). Create IndexValue manually by using the first parsed IndexDataType {}", new Object[] { value, value.getClass(), IndexDataTypeEnum.STR.getIndexType() });
                    indexValue = new IndexValue(value.toString(), IndexDataTypeEnum.STR.getIndexType());
                }
                //initialise the IndexDataType for this query based on the first parsed value
                indexDataType = indexValue.getType();
            } else {
                indexValue = new IndexValue(value.toString(), indexDataType);
            }
            //add the constraint
            constraintValue.getValues().add(indexValue);
        }
        //indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.DATATYPE, indexDataType);
        IndexField indexField;
        if (IndexDataTypeEnum.TXT.getIndexType().equals(indexDataType)) {
            //NOTE: in case of TEXT we need also to add the language to create a valid
            //query!
            // * We take the language of the first parsed element
            indexField = new IndexField(indexConstraint.getPath(), indexDataType, constraintValue.getValues().iterator().next().getLanguage());
        } else {
            indexField = new IndexField(indexConstraint.getPath(), indexDataType);
        }
        //set FIELD, DATATYPE and LANGUAGE constraint by using the indexField
        indexConstraint.setIndexFieldConstraints(indexField);
        //set the VALUE
        //TODO: We need to somehow pass the MODE so that the encoder knows how
        //      to encode the values
        indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
        //update this constraint!
        if (valueConstraint instanceof ReferenceConstraint) {
            indexConstraint.setFieldQueryConstraint(valueConstraint);
        } else {
            indexConstraint.setFieldQueryConstraint(new ValueConstraint(valueConstraint.getValues(), Arrays.asList(indexDataType.getId())));
        }
    }
}
Also used : IndexDataType(org.apache.stanbol.entityhub.yard.solr.model.IndexDataType) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) ArrayList(java.util.ArrayList) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) NoConverterException(org.apache.stanbol.entityhub.yard.solr.model.NoConverterException) IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Aggregations

ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)14 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)9 TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)7 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)6 ArrayList (java.util.ArrayList)4 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)4 HashSet (java.util.HashSet)3 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)3 Constraint (org.apache.stanbol.entityhub.servicesapi.query.Constraint)3 RangeConstraint (org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)3 SimilarityConstraint (org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint)3 JSONObject (org.codehaus.jettison.json.JSONObject)3 ReconcileValue (org.apache.stanbol.entityhub.jersey.grefine.ReconcileValue)2 DataTypeEnum (org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum)2 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)2 FieldQueryFactory (org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory)2 JSONArray (org.codehaus.jettison.json.JSONArray)2 Test (org.junit.Test)2 Collection (java.util.Collection)1 LinkedHashSet (java.util.LinkedHashSet)1