use of org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint in project stanbol by apache.
the class SparqlQueryUtils method main.
public static void main(String[] args) {
SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
// query.setConstraint("urn:field1", new
// ReferenceConstraint("urn:testReference"));
// query.setConstraint("urn:field1", new ReferenceConstraint(
// Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3"),MODE.any));
// query.setConstraint(SpecialFieldEnum.references.getUri(), new
// ReferenceConstraint(
// Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3")));
// query.setConstraint("urn:field1a", new ValueConstraint(null,
// Arrays.asList(
// DataTypeEnum.Float.getUri())));
// query.addSelectedField("urn:field1a");
// query.setConstraint("urn:field1b", new ValueConstraint(9, Arrays.asList(
// DataTypeEnum.Float.getUri())));
// query.setConstraint("urn:field1b", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
// DataTypeEnum.Float.getUri()),MODE.any));
// query.setConstraint("urn:field1c", new ValueConstraint(null, Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.addSelectedField("urn:field1c");
// query.setConstraint("urn:field1d", new ValueConstraint(9, Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.setConstraint("urn:field1d", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.setConstraint("urn:field2", new TextConstraint("test value"));
// query.setConstraint("urn:field3", new TextConstraint(Arrays.asList(
// "text value","anothertest","some more values"),true));
// query.setConstraint(SpecialFieldEnum.fullText.getUri(), new TextConstraint(Arrays.asList(
// "text value","anothertest","some more values"),true));
// query.setConstraint("urn:field2a", new TextConstraint(":-]"));
// //tests escaping of REGEX
query.setConstraint("urn:field3", new TextConstraint("\"quote", PatternType.none, true, "en", null));
//query.setConstraint("urn:field4", new TextConstraint("multi language text", "en", "de", null));
// query.setConstraint("urn:field5", new
// TextConstraint("wildcar*",PatternType.wildcard,false,"en","de"));
// query.addSelectedField("urn:field5");
// query.setConstraint("urn:field6", new TextConstraint("^regex",PatternType.REGEX,true));
// query.setConstraint("urn:field7", new
// TextConstraint("par*",PatternType.WildCard,false,"en","de",null));
// query.setConstraint("urn:field8", new TextConstraint(null,"en","de",null));
// query.setConstraint("urn:field9", new RangeConstraint((int)5, (int)10, true));
// query.setConstraint("urn:field10", new RangeConstraint((int)5, (int)10, false));
// query.setConstraint("urn:field11", new RangeConstraint(null, (int)10, true));
// query.setConstraint("urn:field12", new RangeConstraint((int)5, null, true));
//query.setConstraint("urn:field12", new RangeConstraint(new Date(), null, true));
query.setConstraint("urn:similarity", new SimilarityConstraint(Collections.singleton("This is a test"), DataTypeEnum.Text));
// query.addSelectedField("urn:field2a");
// query.addSelectedField("urn:field3");
query.setLimit(5);
query.setOffset(5);
System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.LARQ));
System.out.println();
System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Virtuoso));
System.out.println();
System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard));
System.out.println();
System.out.println(createSparqlConstructQuery(query, 0, SparqlEndpointTypeEnum.Virtuoso));
}
use of org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint in project stanbol by apache.
the class FieldQueryReader method parseSimilarityConstraint.
private static Constraint parseSimilarityConstraint(JSONObject jConstraint, NamespacePrefixService nsPrefixService) throws JSONException {
String context = jConstraint.optString("context");
if (context == null) {
throw new IllegalArgumentException("SimilarityConstraints MUST define a \"context\": \n " + jConstraint.toString(4));
}
JSONArray addFields = jConstraint.optJSONArray("addFields");
final List<String> fields;
if (addFields != null && addFields.length() > 0) {
fields = new ArrayList<String>(addFields.length());
for (int i = 0; i < addFields.length(); i++) {
String field = addFields.optString(i);
field = field != null ? nsPrefixService.getFullName(field) : null;
if (field != null && !field.isEmpty()) {
fields.add(field);
}
}
} else {
fields = null;
}
return new SimilarityConstraint(context, fields);
}
use of org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint in project stanbol by apache.
the class BaseGoogleRefineReconcileResource method addPropertyConstraints.
/**
* @param rQuery
* @param query
*/
private void addPropertyConstraints(ReconcileQuery rQuery, FieldQuery query) {
Collection<String> ids = new HashSet<String>();
// keep order for texts
List<String> texts = new ArrayList<String>();
Collection<Object> values = new HashSet<Object>();
//hold all references for @references special property
HashSet<String> references = new HashSet<String>();
//holds all texts for @fullText special property
List<String> fullText = null;
//holds the context for the @similarity special property
Collection<String> similarityContext = null;
//the field used for the @similarity special property
HashSet<String> similarityFields = new LinkedHashSet<String>();
for (Entry<ReconcileProperty, Collection<ReconcileValue>> propertyEntry : rQuery.getProperties()) {
ReconcileProperty property = propertyEntry.getKey();
// collect the properties
for (ReconcileValue value : propertyEntry.getValue()) {
if (value.getId() != null) {
ids.add(value.getId());
}
if (value.getValue() instanceof String) {
texts.add((String) value.getValue());
} else {
values.add(value.getValue());
}
}
//handle supported special properties
if (property.isSpecial()) {
if (property.getName().equalsIgnoreCase("references")) {
//if Users do parse parameters - so we need to collect all values
if (property.getParameter() != null) {
log.warn("parameters are not supported for @references -> ignore '{}'", property.getParameter());
}
if (ids.isEmpty()) {
log.warn("No URI values present for parsed @references property! (values: " + propertyEntry.getValue());
}
for (String id : ids) {
references.add(id);
}
} else if (property.getName().equalsIgnoreCase("fulltext")) {
//if Users do parse parameters - so we need to collect all values
if (property.getParameter() != null) {
log.warn("parameters are not supported for @fullText -> ignore '{}'", property.getParameter());
}
fullText = texts;
} else if (property.getName().equalsIgnoreCase("similarity")) {
String propUri = property.getParameter() != null ? nsPrefixService.getFullName(property.getParameter()) : SpecialFieldEnum.fullText.getUri();
if (propUri != null) {
similarityFields.add(propUri);
} else {
//TODO: maybe throw an Exception instead
log.warn("Unknown prefix '{}' used by Google Refine query parameter of property '{}'! " + "Will use the full text field as fallback", NamespaceMappingUtils.getPrefix(property.getParameter()), property);
similarityFields.add(SpecialFieldEnum.fullText.getUri());
}
similarityContext = texts;
} else {
//TODO: implement LDPATH support
log.warn("ignore unsupported special property {}", property);
}
} else {
// * non Reference | Text | Datatype values are ignored
if (!ids.isEmpty()) {
// only references -> create reference constraint
query.setConstraint(property.getName(), new ReferenceConstraint(ids));
if (ids.size() != propertyEntry.getValue().size()) {
log.info("Only some of the parsed values of the field {} contain" + "references -> will ignore values with missing references");
}
} else if (!texts.isEmpty()) {
// NOTE: This will use OR over all texts. To enforce AND one
// would need to parse a single string with all values e.g. by
// using StringUtils.join(texts," ")
query.setConstraint(property.getName(), new TextConstraint(texts));
if (ids.size() != propertyEntry.getValue().size()) {
log.info("Only some of the parsed values of the field {} are" + "of type String -> will ignore non-string values");
}
} else if (!values.isEmpty()) {
query.setConstraint(property.getName(), new ValueConstraint(values));
}
//else no values ... ignore property
}
//clean up
ids.clear();
values.clear();
}
//now add constraints for the collected special properties
if (!references.isEmpty()) {
//add references constraint
ReferenceConstraint refConstraint = new ReferenceConstraint(references, MODE.all);
query.setConstraint(SpecialFieldEnum.references.getUri(), refConstraint);
}
if (fullText != null && !fullText.isEmpty()) {
TextConstraint textConstraint = new TextConstraint(fullText);
query.setConstraint(SpecialFieldEnum.fullText.getUri(), textConstraint);
//add full text constraint
}
if (similarityContext != null && !similarityContext.isEmpty()) {
//add similarity constraint
Iterator<String> fieldIt = similarityFields.iterator();
String field = fieldIt.next();
SimilarityConstraint simConstraint;
if (fieldIt.hasNext()) {
List<String> addFields = new ArrayList<String>(similarityFields.size() - 1);
while (fieldIt.hasNext()) {
addFields.add(fieldIt.next());
}
simConstraint = new SimilarityConstraint(similarityContext, DataTypeEnum.Text, addFields);
} else {
simConstraint = new SimilarityConstraint(similarityContext, DataTypeEnum.Text);
}
query.setConstraint(field, simConstraint);
}
}
use of org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint in project stanbol by apache.
the class SolrQueryFactory method parseFieldQuery.
/**
* Converts the field query to a SolrQuery. In addition changes the parsed
* FieldQuery (e.g. removing unsupported features, setting defaults for
* missing parameters)
* @param fieldQuery the field query (will be modified to reflect the query
* as executed)
* @param select the SELECT mode
* @return the SolrQuery
*/
public SolrQuery parseFieldQuery(FieldQuery fieldQuery, SELECT select) {
SolrQuery query = initSolrQuery(fieldQuery);
setSelected(query, fieldQuery, select);
StringBuilder queryString = new StringBuilder();
Map<String, Constraint> processedFieldConstraints = new HashMap<String, Constraint>();
boolean firstConstraint = true;
boolean similarityConstraintPresent = false;
for (Entry<String, Constraint> fieldConstraint : fieldQuery) {
if (fieldConstraint.getValue().getType() == ConstraintType.similarity) {
// TODO: log make the FieldQuery ensure that there is no more than one instead of similarity
// constraint per query
List<String> fields = new ArrayList<String>();
fields.add(fieldConstraint.getKey());
SimilarityConstraint simConstraint = (SimilarityConstraint) fieldConstraint.getValue();
final IndexValue contextValue = indexValueFactory.createIndexValue(simConstraint.getContext());
fields.addAll(simConstraint.getAdditionalFields());
if (!similarityConstraintPresent) {
//similarity constraint present
similarityConstraintPresent = true;
//add the constraint to the query
query.setRequestHandler(MLT_QUERY_TYPE);
query.set(MATCH_INCLUDE, false);
query.set(MIN_DOC_FREQ, 1);
query.set(MIN_TERM_FREQ, 1);
query.set(INTERESTING_TERMS, "details");
//testing
query.set("mlt.boost", true);
List<String> indexFields = new ArrayList<String>();
for (String field : fields) {
//we need to get the actual fields in the index for the
//logical fields parsed with the constraint
IndexDataTypeEnum mapedIndexTypeEnum = IndexDataTypeEnum.forDataTyoe(simConstraint.getContextType());
IndexField indexField = new IndexField(Collections.singletonList(field), mapedIndexTypeEnum == null ? null : mapedIndexTypeEnum.getIndexType(), simConstraint.getLanguages());
indexFields.addAll(fieldMapper.getQueryFieldNames(indexField));
}
query.set(SIMILARITY_FIELDS, indexFields.toArray(new String[fields.size()]));
query.set(STREAM_BODY, contextValue.getValue());
processedFieldConstraints.put(fieldConstraint.getKey(), fieldConstraint.getValue());
} else {
//similarity constraint already present -> ignore further
//NOTE: users are informed about that by NOT including further
// similarity constraints in the query included in the
// response
log.warn("The parsed FieldQuery contains multiple Similarity constraints." + "However only a single one can be supported per query. Because of " + "this all further Similarity constraints will be ignored!");
log.warn("Ignore SimilarityConstraint:");
log.warn(" > Field : {}", fieldConstraint.getKey());
log.warn(" > Context : {}", simConstraint.getContext());
log.warn(" > Add Fields : {}", simConstraint.getAdditionalFields());
}
} else {
IndexConstraint indexConstraint = createIndexConstraint(fieldConstraint);
if (indexConstraint.isInvalid()) {
log.warn("Unable to create IndexConstraint for Constraint {} (type: {}) and Field {} (Reosens: {})", new Object[] { fieldConstraint.getValue(), fieldConstraint.getValue().getType(), fieldConstraint.getKey(), indexConstraint.getInvalidMessages() });
} else {
if (firstConstraint) {
queryString.append('(');
firstConstraint = false;
} else {
queryString.append(") AND (");
}
indexConstraint.encode(queryString);
//set the constraint (may be changed because of some unsupported features)
processedFieldConstraints.put(fieldConstraint.getKey(), //if null
indexConstraint.getFieldQueryConstraint() == null ? //assume no change and add the parsed one
fieldConstraint.getValue() : //add the changed version
indexConstraint.getFieldQueryConstraint());
}
}
}
if (!firstConstraint) {
queryString.append(')');
}
//set the constraints as processed to the parsed query
fieldQuery.removeAllConstraints();
for (Entry<String, Constraint> constraint : processedFieldConstraints.entrySet()) {
fieldQuery.setConstraint(constraint.getKey(), constraint.getValue());
}
if (queryString.length() > 0) {
String qs = queryString.toString();
log.debug("QueryString: {}", qs);
if (MLT_QUERY_TYPE.equals(query.getRequestHandler())) {
query.set(CommonParams.FQ, qs);
} else {
query.setQuery(qs);
}
}
log.debug("Solr Query: {}", query);
return query;
}
use of org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint in project stanbol by apache.
the class FieldQueryToJsonUtils method convertConstraintToJSON.
/**
* Converts a {@link Constraint} to JSON
*
* @param constraint the {@link Constraint}
* @param nsPrefixService Optionally the service that is used to convert data type
* URIs to '{prefix}:{localname}'
* @return the JSON representation
* @throws JSONException
*/
private static JSONObject convertConstraintToJSON(Constraint constraint, NamespacePrefixService nsPrefixService) throws JSONException {
JSONObject jConstraint = new JSONObject();
jConstraint.put("type", constraint.getType().name());
switch(constraint.getType()) {
case //both ValueConstraint and ReferenceConstraint
value:
ValueConstraint valueConstraint = ((ValueConstraint) constraint);
if (valueConstraint.getValues() != null) {
if (valueConstraint.getValues().size() == 1) {
jConstraint.put("value", valueConstraint.getValues().iterator().next());
} else {
jConstraint.put("value", new JSONArray(valueConstraint.getValues()));
}
}
if (constraint instanceof ReferenceConstraint) {
//the type "reference" is not present in the ConstraintType
//enum, because internally ReferenceConstraints are just a
//ValueConstraint with a predefined data type, but "reference"
//is still a valid value of the type property in JSON
jConstraint.put("type", "reference");
} else {
// valueConstraint
jConstraint.put("type", constraint.getType().name());
//for valueConstraints we need to add also the dataType(s)
Collection<String> dataTypes = valueConstraint.getDataTypes();
if (dataTypes != null && !dataTypes.isEmpty()) {
if (dataTypes.size() == 1) {
String dataType = dataTypes.iterator().next();
jConstraint.put("datatype", nsPrefixService != null ? nsPrefixService.getShortName(dataType) : dataType);
} else {
ArrayList<String> dataTypeValues = new ArrayList<String>(dataTypes.size());
for (String dataType : dataTypes) {
dataTypeValues.add(nsPrefixService != null ? nsPrefixService.getShortName(dataType) : dataType);
}
jConstraint.put("datatype", dataTypeValues);
}
}
}
//finally write the MODE
if (valueConstraint.getMode() != null) {
jConstraint.put("mode", valueConstraint.getMode());
}
break;
case text:
TextConstraint textConstraint = (TextConstraint) constraint;
Collection<String> languages = textConstraint.getLanguages();
if (languages != null && !languages.isEmpty()) {
if (languages.size() == 1) {
jConstraint.put("language", languages.iterator().next());
} else {
jConstraint.put("language", new JSONArray(languages));
}
}
jConstraint.put("patternType", textConstraint.getPatternType().name());
if (textConstraint.getTexts() != null && !textConstraint.getTexts().isEmpty()) {
if (textConstraint.getTexts().size() == 1) {
//write a string
jConstraint.put("text", textConstraint.getTexts().get(0));
} else {
//write an array
jConstraint.put("text", textConstraint.getTexts());
}
}
if (textConstraint.isCaseSensitive()) {
jConstraint.put("caseSensitive", true);
}
//write the proximity ranking state (if defined)
if (textConstraint.isProximityRanking() != null) {
jConstraint.put("proximityRanking", textConstraint.isProximityRanking());
}
break;
case range:
RangeConstraint rangeConstraint = (RangeConstraint) constraint;
Set<DataTypeEnum> dataTypes = EnumSet.noneOf(DataTypeEnum.class);
if (rangeConstraint.getLowerBound() != null) {
jConstraint.put("lowerBound", rangeConstraint.getLowerBound());
dataTypes.addAll(DataTypeEnum.getPrimaryDataTypes(rangeConstraint.getLowerBound().getClass()));
}
if (rangeConstraint.getUpperBound() != null) {
jConstraint.put("upperBound", rangeConstraint.getUpperBound());
dataTypes.addAll(DataTypeEnum.getPrimaryDataTypes(rangeConstraint.getUpperBound().getClass()));
}
jConstraint.put("inclusive", rangeConstraint.isInclusive());
if (!dataTypes.isEmpty()) {
jConstraint.put("datatype", dataTypes.iterator().next().getShortName());
}
break;
case similarity:
SimilarityConstraint simConstraint = (SimilarityConstraint) constraint;
jConstraint.put("context", simConstraint.getContext());
if (!simConstraint.getAdditionalFields().isEmpty()) {
jConstraint.put("addFields", new JSONArray(simConstraint.getAdditionalFields()));
}
break;
default:
//unknown constraint type
log.warn("Unsupported Constriant Type " + constraint.getType() + " (implementing class=" + constraint.getClass() + "| toString=" + constraint + ") -> skiped");
break;
}
return jConstraint;
}
Aggregations