use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.
the class BaseGoogleRefineReconcileResource method addTypeConstraint.
/**
* @param rQuery
* @param query
*/
private void addTypeConstraint(ReconcileQuery rQuery, FieldQuery query) {
//maybe an other column was also mapped to the TYPE_FIELD property
Collection<ReconcileValue> additionalTypes = rQuery.removeProperty(TYPE_FIELD);
Set<String> queryTypes = rQuery.getTypes();
Set<String> types = null;
if (additionalTypes == null) {
if (queryTypes != null) {
types = queryTypes;
}
} else {
types = new HashSet<String>();
if (queryTypes != null) {
types.add(rQuery.getQuery());
}
for (ReconcileValue value : additionalTypes) {
if (value != null) {
if (value.getId() != null) {
types.add(value.getId());
} else if (value.getValue() instanceof String) {
//TODO: check if the assumption that String values are
//good for types is valid
types.add((String) value.getValue());
}
}
//else null -> ignore
}
}
if (!types.isEmpty()) {
query.setConstraint(TYPE_FIELD, new ReferenceConstraint(types));
}
}
use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.
the class BaseGoogleRefineReconcileResource method addPropertyConstraints.
/**
* @param rQuery
* @param query
*/
private void addPropertyConstraints(ReconcileQuery rQuery, FieldQuery query) {
Collection<String> ids = new HashSet<String>();
// keep order for texts
List<String> texts = new ArrayList<String>();
Collection<Object> values = new HashSet<Object>();
//hold all references for @references special property
HashSet<String> references = new HashSet<String>();
//holds all texts for @fullText special property
List<String> fullText = null;
//holds the context for the @similarity special property
Collection<String> similarityContext = null;
//the field used for the @similarity special property
HashSet<String> similarityFields = new LinkedHashSet<String>();
for (Entry<ReconcileProperty, Collection<ReconcileValue>> propertyEntry : rQuery.getProperties()) {
ReconcileProperty property = propertyEntry.getKey();
// collect the properties
for (ReconcileValue value : propertyEntry.getValue()) {
if (value.getId() != null) {
ids.add(value.getId());
}
if (value.getValue() instanceof String) {
texts.add((String) value.getValue());
} else {
values.add(value.getValue());
}
}
//handle supported special properties
if (property.isSpecial()) {
if (property.getName().equalsIgnoreCase("references")) {
//if Users do parse parameters - so we need to collect all values
if (property.getParameter() != null) {
log.warn("parameters are not supported for @references -> ignore '{}'", property.getParameter());
}
if (ids.isEmpty()) {
log.warn("No URI values present for parsed @references property! (values: " + propertyEntry.getValue());
}
for (String id : ids) {
references.add(id);
}
} else if (property.getName().equalsIgnoreCase("fulltext")) {
//if Users do parse parameters - so we need to collect all values
if (property.getParameter() != null) {
log.warn("parameters are not supported for @fullText -> ignore '{}'", property.getParameter());
}
fullText = texts;
} else if (property.getName().equalsIgnoreCase("similarity")) {
String propUri = property.getParameter() != null ? nsPrefixService.getFullName(property.getParameter()) : SpecialFieldEnum.fullText.getUri();
if (propUri != null) {
similarityFields.add(propUri);
} else {
//TODO: maybe throw an Exception instead
log.warn("Unknown prefix '{}' used by Google Refine query parameter of property '{}'! " + "Will use the full text field as fallback", NamespaceMappingUtils.getPrefix(property.getParameter()), property);
similarityFields.add(SpecialFieldEnum.fullText.getUri());
}
similarityContext = texts;
} else {
//TODO: implement LDPATH support
log.warn("ignore unsupported special property {}", property);
}
} else {
// * non Reference | Text | Datatype values are ignored
if (!ids.isEmpty()) {
// only references -> create reference constraint
query.setConstraint(property.getName(), new ReferenceConstraint(ids));
if (ids.size() != propertyEntry.getValue().size()) {
log.info("Only some of the parsed values of the field {} contain" + "references -> will ignore values with missing references");
}
} else if (!texts.isEmpty()) {
// NOTE: This will use OR over all texts. To enforce AND one
// would need to parse a single string with all values e.g. by
// using StringUtils.join(texts," ")
query.setConstraint(property.getName(), new TextConstraint(texts));
if (ids.size() != propertyEntry.getValue().size()) {
log.info("Only some of the parsed values of the field {} are" + "of type String -> will ignore non-string values");
}
} else if (!values.isEmpty()) {
query.setConstraint(property.getName(), new ValueConstraint(values));
}
//else no values ... ignore property
}
//clean up
ids.clear();
values.clear();
}
//now add constraints for the collected special properties
if (!references.isEmpty()) {
//add references constraint
ReferenceConstraint refConstraint = new ReferenceConstraint(references, MODE.all);
query.setConstraint(SpecialFieldEnum.references.getUri(), refConstraint);
}
if (fullText != null && !fullText.isEmpty()) {
TextConstraint textConstraint = new TextConstraint(fullText);
query.setConstraint(SpecialFieldEnum.fullText.getUri(), textConstraint);
//add full text constraint
}
if (similarityContext != null && !similarityContext.isEmpty()) {
//add similarity constraint
Iterator<String> fieldIt = similarityFields.iterator();
String field = fieldIt.next();
SimilarityConstraint simConstraint;
if (fieldIt.hasNext()) {
List<String> addFields = new ArrayList<String>(similarityFields.size() - 1);
while (fieldIt.hasNext()) {
addFields.add(fieldIt.next());
}
simConstraint = new SimilarityConstraint(similarityContext, DataTypeEnum.Text, addFields);
} else {
simConstraint = new SimilarityConstraint(similarityContext, DataTypeEnum.Text);
}
query.setConstraint(field, simConstraint);
}
}
use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.
the class NamedEntityTaggingEngine method computeEntityRecommentations.
/**
* Computes the Enhancements
*
* @param site
* The {@link SiteException} id or <code>null</code> to use the {@link Entityhub}
* @param literalFactory
* the {@link LiteralFactory} used to create RDF Literals
* @param contentItemId
* the id of the contentItem
* @param textAnnotation
* the text annotation to enhance
* @param subsumedAnnotations
* other text annotations for the same entity
* @param language
* the language of the analysed text or <code>null</code> if not available.
* @return the suggestions for the parsed {@link NamedEntity}
* @throws EntityhubException
* On any Error while looking up Entities via the Entityhub
*/
protected final List<Suggestion> computeEntityRecommentations(Site site, NamedEntity namedEntity, List<IRI> subsumedAnnotations, String language) throws EntityhubException {
// First get the required properties for the parsed textAnnotation
// ... and check the values
log.debug("Process {}", namedEntity);
// if site is NULL use
// the Entityhub
FieldQueryFactory queryFactory = site == null ? entityhub.getQueryFactory() : site.getQueryFactory();
log.trace("Will use a query-factory of type [{}].", queryFactory.getClass().toString());
FieldQuery query = queryFactory.createFieldQuery();
// replace spaces with plus to create an AND search for all words in the
// name!
Constraint labelConstraint;
// TODO: make case sensitivity configurable
boolean casesensitive = false;
String namedEntityLabel = casesensitive ? namedEntity.getName() : namedEntity.getName().toLowerCase();
if (language != null) {
// search labels in the language and without language
labelConstraint = new TextConstraint(namedEntityLabel, casesensitive, language, null);
} else {
labelConstraint = new TextConstraint(namedEntityLabel, casesensitive);
}
query.setConstraint(nameField, labelConstraint);
if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
if (personState) {
if (personType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(personType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
} else if (DBPEDIA_ORGANISATION.equals(namedEntity.getType())) {
if (orgState) {
if (orgType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(orgType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
} else if (OntologicalClasses.DBPEDIA_PLACE.equals(namedEntity.getType())) {
if (this.placeState) {
if (this.placeType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(placeType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
}
query.setLimit(Math.max(20, this.numSuggestions * 3));
log.trace("A query has been created of type [{}] and the following settings:\n{}", query.getClass().toString(), query.toString());
if (null == site)
log.trace("A query will be sent to the entity-hub of type [{}].", entityhub.getClass());
else
log.trace("A query will be sent to a site [id :: {}][type :: {}].", site.getId(), site.getClass());
QueryResultList<Entity> results = // if site is NULL
site == null ? entityhub.findEntities(query) : // use the Entityhub
site.findEntities(// else the referenced site
query);
log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
if (results.isEmpty()) {
// no results nothing to do
return Collections.emptyList();
}
// we need to normalise the confidence values from [0..1]
// * levenshtein distance as absolute (1.0 for exact match)
// * Solr scores * levenshtein to rank entities relative to each other
Float maxScore = null;
Float maxExactScore = null;
List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
// assumes entities are sorted by score
for (Iterator<Entity> guesses = results.iterator(); guesses.hasNext(); ) {
Suggestion match = new Suggestion(guesses.next());
Representation rep = match.getEntity().getRepresentation();
Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
if (maxScore == null) {
maxScore = score;
}
Iterator<Text> labels = rep.getText(nameField);
while (labels.hasNext() && match.getLevenshtein() < 1.0) {
Text label = labels.next();
if (// if the content language is unknown ->
language == null || // accept all labels
label.getLanguage() == // accept labels with no
null || // and labels in the same language as the content
(language != null && label.getLanguage().startsWith(language))) {
double actMatch = levenshtein(casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
if (actMatch > match.getLevenshtein()) {
match.setLevenshtein(actMatch);
match.setMatchedLabel(label);
}
}
}
if (match.getMatchedLabel() != null) {
if (match.getLevenshtein() == 1.0) {
if (maxExactScore == null) {
maxExactScore = score;
}
// normalise exact matches against the best exact score
match.setScore(score.doubleValue() / maxExactScore.doubleValue());
} else {
// normalise partial matches against the best match and the
// Levenshtein similarity with the label
match.setScore(score.doubleValue() * match.getLevenshtein() / maxScore.doubleValue());
}
matches.add(match);
} else {
log.debug("No value of {} for Entity {}!", nameField, match.getEntity().getId());
}
}
// now sort the results
Collections.sort(matches);
return matches.subList(0, Math.min(matches.size(), numSuggestions));
}
use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.
the class CoreferenceFinder method lookupEntity.
/**
* Gets an Entity from the configured {@link Site} based on the NER text and type.
*
* @param ner
* @param language
* @return
* @throws EngineException
*/
private Entity lookupEntity(Span ner, String language) throws EngineException {
Site site = getReferencedSite();
FieldQueryFactory queryFactory = site == null ? entityHub.getQueryFactory() : site.getQueryFactory();
FieldQuery query = queryFactory.createFieldQuery();
Constraint labelConstraint;
String namedEntityLabel = ner.getSpan();
labelConstraint = new TextConstraint(namedEntityLabel, false, language, null);
query.setConstraint(RDFS_LABEL.getUnicodeString(), labelConstraint);
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType().getUnicodeString()));
query.setLimit(1);
QueryResultList<Entity> results = // if site is NULL
site == null ? entityHub.findEntities(query) : // use the Entityhub
site.findEntities(// else the referenced site
query);
if (results.isEmpty())
return null;
// We set the limit to 1 so if it found anything it should contain just 1 entry
return results.iterator().next();
}
use of org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint in project stanbol by apache.
the class SolrQueryFactory method initValueConstraint.
/**
* @param indexConstraint
* @param refConstraint
*/
private void initValueConstraint(IndexConstraint indexConstraint) {
ValueConstraint valueConstraint = (ValueConstraint) indexConstraint.getConstraint();
if (valueConstraint.getValues() == null) {
indexConstraint.setInvalid(String.format("ValueConstraint without a value - that check only any value for " + "the parsed datatypes %s is present - can not be supported by a Solr query!", valueConstraint.getDataTypes()));
} else {
// first process the parsed dataTypes to get the supported types
List<IndexDataType> indexDataTypes = new ArrayList<IndexDataType>();
List<String> acceptedDataTypes = new ArrayList<String>();
if (valueConstraint.getDataTypes() != null) {
for (String dataType : valueConstraint.getDataTypes()) {
IndexDataTypeEnum indexDataTypeEnumEntry = IndexDataTypeEnum.forUri(dataType);
if (indexDataTypeEnumEntry != null) {
indexDataTypes.add(indexDataTypeEnumEntry.getIndexType());
acceptedDataTypes.add(dataType);
} else {
// TODO: Add possibility to add warnings to indexConstraints
log.warn("A Datatype parsed for a ValueConstraint is not " + "supported and will be ignored (dataTypeUri={})", dataType);
}
}
}
//we support only a single dataType ...
// ... therefore remove additional data types from the ValueConstraint
IndexDataType indexDataType = null;
if (!indexDataTypes.isEmpty()) {
indexDataType = indexDataTypes.get(0);
if (indexDataTypes.size() > 1) {
log.warn("Only a single DataType is supported for ValueConstraints!");
while (acceptedDataTypes.size() > 1) {
String ignored = acceptedDataTypes.remove(acceptedDataTypes.size() - 1);
log.warn(" > ignore parsed dataType {}", ignored);
}
}
}
//else empty we will initialise based on the first parsed value!
ConstraintValue constraintValue = new ConstraintValue(valueConstraint.getMode());
//init the boost
addBoost(constraintValue, valueConstraint);
for (Object value : valueConstraint.getValues()) {
IndexValue indexValue;
if (indexDataType == null) {
// get the dataType based on the type of the value
try {
indexValue = indexValueFactory.createIndexValue(value);
} catch (NoConverterException e) {
// if not found use the toString() and string as type
log.warn("Unable to create IndexValue for value {} (type: {}). Create IndexValue manually by using the first parsed IndexDataType {}", new Object[] { value, value.getClass(), IndexDataTypeEnum.STR.getIndexType() });
indexValue = new IndexValue(value.toString(), IndexDataTypeEnum.STR.getIndexType());
}
//initialise the IndexDataType for this query based on the first parsed value
indexDataType = indexValue.getType();
} else {
indexValue = new IndexValue(value.toString(), indexDataType);
}
//add the constraint
constraintValue.getValues().add(indexValue);
}
//indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.DATATYPE, indexDataType);
IndexField indexField;
if (IndexDataTypeEnum.TXT.getIndexType().equals(indexDataType)) {
//NOTE: in case of TEXT we need also to add the language to create a valid
//query!
// * We take the language of the first parsed element
indexField = new IndexField(indexConstraint.getPath(), indexDataType, constraintValue.getValues().iterator().next().getLanguage());
} else {
indexField = new IndexField(indexConstraint.getPath(), indexDataType);
}
//set FIELD, DATATYPE and LANGUAGE constraint by using the indexField
indexConstraint.setIndexFieldConstraints(indexField);
//set the VALUE
//TODO: We need to somehow pass the MODE so that the encoder knows how
// to encode the values
indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
//update this constraint!
if (valueConstraint instanceof ReferenceConstraint) {
indexConstraint.setFieldQueryConstraint(valueConstraint);
} else {
indexConstraint.setFieldQueryConstraint(new ValueConstraint(valueConstraint.getValues(), Arrays.asList(indexDataType.getId())));
}
}
}
Aggregations