use of org.dspace.content.MetadataSchema in project DSpace by DSpace.
the class ItemIndexFactoryImpl method addDiscoveryFields.
@Override
public void addDiscoveryFields(SolrInputDocument doc, Context context, Item item, List<DiscoveryConfiguration> discoveryConfigurations) throws SQLException, IOException {
// use the item service to retrieve the owning collection also for inprogress submission
Collection collection = (Collection) itemService.getParentObject(context, item);
// Keep a list of our sort values which we added, sort values can only be added once
List<String> sortFieldsAdded = new ArrayList<>();
Map<String, List<DiscoverySearchFilter>> searchFilters = new HashMap<>();
Set<String> hitHighlightingFields = new HashSet<>();
try {
// A map used to save each sidebarFacet config by the metadata fields
Map<String, DiscoverySortFieldConfiguration> sortFields = new HashMap<>();
Map<String, DiscoveryRecentSubmissionsConfiguration> recentSubmissionsConfigurationMap = new HashMap<>();
Set<String> moreLikeThisFields = new HashSet<>();
// some configuration are returned multiple times, skip them to save CPU cycles
Set<String> appliedConf = new HashSet<>();
// it is common to have search filter shared between multiple configurations
Set<String> appliedDiscoverySearchFilter = new HashSet<>();
for (DiscoveryConfiguration discoveryConfiguration : discoveryConfigurations) {
if (appliedConf.contains(discoveryConfiguration.getId())) {
continue;
} else {
appliedConf.add(discoveryConfiguration.getId());
}
for (int i = 0; i < discoveryConfiguration.getSearchFilters().size(); i++) {
if (appliedDiscoverySearchFilter.contains(discoveryConfiguration.getSearchFilters().get(i).getIndexFieldName())) {
continue;
} else {
appliedDiscoverySearchFilter.add(discoveryConfiguration.getSearchFilters().get(i).getIndexFieldName());
}
List<MetadataValue> metadataValueList = new LinkedList<>();
boolean shouldExposeMinMax = false;
DiscoverySearchFilter discoverySearchFilter = discoveryConfiguration.getSearchFilters().get(i);
if (StringUtils.equalsIgnoreCase(discoverySearchFilter.getFilterType(), "facet")) {
if (((DiscoverySearchFilterFacet) discoverySearchFilter).exposeMinAndMaxValue()) {
shouldExposeMinMax = true;
}
}
for (int j = 0; j < discoverySearchFilter.getMetadataFields().size(); j++) {
String metadataField = discoverySearchFilter.getMetadataFields().get(j);
List<DiscoverySearchFilter> resultingList;
if (searchFilters.get(metadataField) != null) {
resultingList = searchFilters.get(metadataField);
} else {
// New metadata field, create a new list for it
resultingList = new ArrayList<>();
}
if (shouldExposeMinMax) {
String[] splittedMetadataField = metadataField.split("\\.");
String schema = splittedMetadataField[0];
String element = splittedMetadataField.length > 1 ? splittedMetadataField[1] : null;
String qualifier = splittedMetadataField.length > 2 ? splittedMetadataField[2] : null;
metadataValueList.addAll(itemService.getMetadata(item, schema, element, qualifier, Item.ANY));
}
resultingList.add(discoverySearchFilter);
searchFilters.put(metadataField, resultingList);
}
if (!metadataValueList.isEmpty() && shouldExposeMinMax) {
metadataValueList.sort((mdv1, mdv2) -> mdv1.getValue().compareTo(mdv2.getValue()));
MetadataValue firstMetadataValue = metadataValueList.get(0);
MetadataValue lastMetadataValue = metadataValueList.get(metadataValueList.size() - 1);
doc.addField(discoverySearchFilter.getIndexFieldName() + "_min", firstMetadataValue.getValue());
doc.addField(discoverySearchFilter.getIndexFieldName() + "_min_sort", firstMetadataValue.getValue());
doc.addField(discoverySearchFilter.getIndexFieldName() + "_max", lastMetadataValue.getValue());
doc.addField(discoverySearchFilter.getIndexFieldName() + "_max_sort", lastMetadataValue.getValue());
}
}
DiscoverySortConfiguration sortConfiguration = discoveryConfiguration.getSearchSortConfiguration();
if (sortConfiguration != null) {
for (DiscoverySortFieldConfiguration discoverySortConfiguration : sortConfiguration.getSortFields()) {
sortFields.put(discoverySortConfiguration.getMetadataField(), discoverySortConfiguration);
}
}
DiscoveryRecentSubmissionsConfiguration recentSubmissionConfiguration = discoveryConfiguration.getRecentSubmissionConfiguration();
if (recentSubmissionConfiguration != null) {
recentSubmissionsConfigurationMap.put(recentSubmissionConfiguration.getMetadataSortField(), recentSubmissionConfiguration);
}
DiscoveryHitHighlightingConfiguration hitHighlightingConfiguration = discoveryConfiguration.getHitHighlightingConfiguration();
if (hitHighlightingConfiguration != null) {
List<DiscoveryHitHighlightFieldConfiguration> fieldConfigurations = hitHighlightingConfiguration.getMetadataFields();
for (DiscoveryHitHighlightFieldConfiguration fieldConfiguration : fieldConfigurations) {
hitHighlightingFields.add(fieldConfiguration.getField());
}
}
DiscoveryMoreLikeThisConfiguration moreLikeThisConfiguration = discoveryConfiguration.getMoreLikeThisConfiguration();
if (moreLikeThisConfiguration != null) {
for (String metadataField : moreLikeThisConfiguration.getSimilarityMetadataFields()) {
moreLikeThisFields.add(metadataField);
}
}
}
List<String> toProjectionFields = new ArrayList<>();
String[] projectionFields = DSpaceServicesFactory.getInstance().getConfigurationService().getArrayProperty("discovery.index.projection");
if (projectionFields != null) {
for (String field : projectionFields) {
toProjectionFields.add(field.trim());
}
}
List<String> toIgnoreMetadataFields = SearchUtils.getIgnoredMetadataFields(item.getType());
List<MetadataValue> mydc = itemService.getMetadata(item, Item.ANY, Item.ANY, Item.ANY, Item.ANY);
for (MetadataValue meta : mydc) {
MetadataField metadataField = meta.getMetadataField();
MetadataSchema metadataSchema = metadataField.getMetadataSchema();
String field = metadataSchema.getName() + "." + metadataField.getElement();
String unqualifiedField = field;
String value = meta.getValue();
if (value == null) {
continue;
}
if (metadataField.getQualifier() != null && !metadataField.getQualifier().trim().equals("")) {
field += "." + metadataField.getQualifier();
}
// We are not indexing provenance, this is useless
if (toIgnoreMetadataFields != null && (toIgnoreMetadataFields.contains(field) || toIgnoreMetadataFields.contains(unqualifiedField + "." + Item.ANY))) {
continue;
}
String authority = null;
String preferedLabel = null;
List<String> variants = null;
boolean isAuthorityControlled = metadataAuthorityService.isAuthorityControlled(metadataField);
int minConfidence = isAuthorityControlled ? metadataAuthorityService.getMinConfidence(metadataField) : Choices.CF_ACCEPTED;
if (isAuthorityControlled && meta.getAuthority() != null && meta.getConfidence() >= minConfidence) {
boolean ignoreAuthority = DSpaceServicesFactory.getInstance().getConfigurationService().getPropertyAsType("discovery.index.authority.ignore." + field, DSpaceServicesFactory.getInstance().getConfigurationService().getPropertyAsType("discovery.index.authority.ignore", Boolean.FALSE), true);
if (!ignoreAuthority) {
authority = meta.getAuthority();
boolean ignorePrefered = DSpaceServicesFactory.getInstance().getConfigurationService().getPropertyAsType("discovery.index.authority.ignore-prefered." + field, DSpaceServicesFactory.getInstance().getConfigurationService().getPropertyAsType("discovery.index.authority.ignore-prefered", Boolean.FALSE), true);
if (!ignorePrefered) {
try {
preferedLabel = choiceAuthorityService.getLabel(meta, collection, meta.getLanguage());
} catch (Exception e) {
log.warn("Failed to get preferred label for " + field, e);
}
}
boolean ignoreVariants = DSpaceServicesFactory.getInstance().getConfigurationService().getPropertyAsType("discovery.index.authority.ignore-variants." + field, DSpaceServicesFactory.getInstance().getConfigurationService().getPropertyAsType("discovery.index.authority.ignore-variants", Boolean.FALSE), true);
if (!ignoreVariants) {
try {
variants = choiceAuthorityService.getVariants(meta, collection);
} catch (Exception e) {
log.warn("Failed to get variants for " + field, e);
}
}
}
}
if ((searchFilters.get(field) != null || searchFilters.get(unqualifiedField + "." + Item.ANY) != null)) {
List<DiscoverySearchFilter> searchFilterConfigs = searchFilters.get(field);
if (searchFilterConfigs == null) {
searchFilterConfigs = searchFilters.get(unqualifiedField + "." + Item.ANY);
}
for (DiscoverySearchFilter searchFilter : searchFilterConfigs) {
Date date = null;
String separator = DSpaceServicesFactory.getInstance().getConfigurationService().getProperty("discovery.solr.facets.split.char");
if (separator == null) {
separator = SearchUtils.FILTER_SEPARATOR;
}
if (searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE)) {
// For our search filters that are dates we format them properly
date = MultiFormatDateParser.parse(value);
if (date != null) {
// TODO: make this date format configurable !
value = DateFormatUtils.formatUTC(date, "yyyy-MM-dd");
}
}
doc.addField(searchFilter.getIndexFieldName(), value);
doc.addField(searchFilter.getIndexFieldName() + "_keyword", value);
if (authority != null && preferedLabel == null) {
doc.addField(searchFilter.getIndexFieldName() + "_keyword", value + SearchUtils.AUTHORITY_SEPARATOR + authority);
doc.addField(searchFilter.getIndexFieldName() + "_authority", authority);
doc.addField(searchFilter.getIndexFieldName() + "_acid", value.toLowerCase() + separator + value + SearchUtils.AUTHORITY_SEPARATOR + authority);
}
if (preferedLabel != null) {
doc.addField(searchFilter.getIndexFieldName(), preferedLabel);
doc.addField(searchFilter.getIndexFieldName() + "_keyword", preferedLabel);
doc.addField(searchFilter.getIndexFieldName() + "_keyword", preferedLabel + SearchUtils.AUTHORITY_SEPARATOR + authority);
doc.addField(searchFilter.getIndexFieldName() + "_authority", authority);
doc.addField(searchFilter.getIndexFieldName() + "_acid", preferedLabel.toLowerCase() + separator + preferedLabel + SearchUtils.AUTHORITY_SEPARATOR + authority);
}
if (variants != null) {
for (String var : variants) {
doc.addField(searchFilter.getIndexFieldName() + "_keyword", var);
doc.addField(searchFilter.getIndexFieldName() + "_acid", var.toLowerCase() + separator + var + SearchUtils.AUTHORITY_SEPARATOR + authority);
}
}
// Add a dynamic fields for auto complete in search
doc.addField(searchFilter.getIndexFieldName() + "_ac", value.toLowerCase() + separator + value);
if (preferedLabel != null) {
doc.addField(searchFilter.getIndexFieldName() + "_ac", preferedLabel.toLowerCase() + separator + preferedLabel);
}
if (variants != null) {
for (String var : variants) {
doc.addField(searchFilter.getIndexFieldName() + "_ac", var.toLowerCase() + separator + var);
}
}
if (searchFilter.getFilterType().equals(DiscoverySearchFilterFacet.FILTER_TYPE_FACET)) {
if (searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_TEXT)) {
// Solr has issues with facet prefix and cases
if (authority != null) {
String facetValue = preferedLabel != null ? preferedLabel : value;
doc.addField(searchFilter.getIndexFieldName() + "_filter", facetValue.toLowerCase() + separator + facetValue + SearchUtils.AUTHORITY_SEPARATOR + authority);
} else {
doc.addField(searchFilter.getIndexFieldName() + "_filter", value.toLowerCase() + separator + value);
}
} else if (searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE)) {
if (date != null) {
String indexField = searchFilter.getIndexFieldName() + ".year";
String yearUTC = DateFormatUtils.formatUTC(date, "yyyy");
doc.addField(searchFilter.getIndexFieldName() + "_keyword", yearUTC);
// add the year to the autocomplete index
doc.addField(searchFilter.getIndexFieldName() + "_ac", yearUTC);
doc.addField(indexField, yearUTC);
if (yearUTC.startsWith("0")) {
doc.addField(searchFilter.getIndexFieldName() + "_keyword", yearUTC.replaceFirst("0*", ""));
// add date without starting zeros for autocomplete e filtering
doc.addField(searchFilter.getIndexFieldName() + "_ac", yearUTC.replaceFirst("0*", ""));
doc.addField(searchFilter.getIndexFieldName() + "_ac", value.replaceFirst("0*", ""));
doc.addField(searchFilter.getIndexFieldName() + "_keyword", value.replaceFirst("0*", ""));
}
// & lower bound year of our facet
if (doc.getField(indexField + "_sort") == null) {
// We can only add one year so take the first one
doc.addField(indexField + "_sort", yearUTC);
}
}
} else if (searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_HIERARCHICAL)) {
HierarchicalSidebarFacetConfiguration hierarchicalSidebarFacetConfiguration = (HierarchicalSidebarFacetConfiguration) searchFilter;
String[] subValues = value.split(hierarchicalSidebarFacetConfiguration.getSplitter());
if (hierarchicalSidebarFacetConfiguration.isSkipFirstNodeLevel() && 1 < subValues.length) {
// Remove the first element of our array
subValues = (String[]) ArrayUtils.subarray(subValues, 1, subValues.length);
}
for (int i = 0; i < subValues.length; i++) {
StringBuilder valueBuilder = new StringBuilder();
for (int j = 0; j <= i; j++) {
valueBuilder.append(subValues[j]);
if (j < i) {
valueBuilder.append(hierarchicalSidebarFacetConfiguration.getSplitter());
}
}
String indexValue = valueBuilder.toString().trim();
doc.addField(searchFilter.getIndexFieldName() + "_tax_" + i + "_filter", indexValue.toLowerCase() + separator + indexValue);
// We add the field x times that it has occurred
for (int j = i; j < subValues.length; j++) {
doc.addField(searchFilter.getIndexFieldName() + "_filter", indexValue.toLowerCase() + separator + indexValue);
doc.addField(searchFilter.getIndexFieldName() + "_keyword", indexValue);
}
}
}
}
}
}
if ((sortFields.get(field) != null || recentSubmissionsConfigurationMap.get(field) != null) && !sortFieldsAdded.contains(field)) {
// Only add sort value once
String type;
if (sortFields.get(field) != null) {
type = sortFields.get(field).getType();
} else {
type = recentSubmissionsConfigurationMap.get(field).getType();
}
if (type.equals(DiscoveryConfigurationParameters.TYPE_DATE)) {
Date date = MultiFormatDateParser.parse(value);
if (date != null) {
String stringDate = SolrUtils.getDateFormatter().format(date);
doc.addField(field + "_dt", stringDate);
} else {
log.warn("Error while indexing sort date field, item: " + item.getHandle() + " metadata field: " + field + " date value: " + date);
}
} else {
doc.addField(field + "_sort", value);
}
sortFieldsAdded.add(field);
}
if (hitHighlightingFields.contains(field) || hitHighlightingFields.contains("*") || hitHighlightingFields.contains(unqualifiedField + "." + Item.ANY)) {
if (authority != null) {
doc.addField(field + "_hl", value + SearchUtils.AUTHORITY_SEPARATOR + authority);
} else {
doc.addField(field + "_hl", value);
}
}
if (moreLikeThisFields.contains(field) || moreLikeThisFields.contains(unqualifiedField + "." + Item.ANY)) {
doc.addField(field + "_mlt", value);
}
doc.addField(field, value);
if (authority != null) {
doc.addField(field + "_authority", authority);
}
if (toProjectionFields.contains(field) || toProjectionFields.contains(unqualifiedField + "." + Item.ANY)) {
StringBuffer variantsToStore = new StringBuffer();
if (variants != null) {
for (String var : variants) {
variantsToStore.append(VARIANTS_STORE_SEPARATOR);
variantsToStore.append(var);
}
}
doc.addField(field + "_stored", value + STORE_SEPARATOR + preferedLabel + STORE_SEPARATOR + (variantsToStore.length() > VARIANTS_STORE_SEPARATOR.length() ? variantsToStore.substring(VARIANTS_STORE_SEPARATOR.length()) : "null") + STORE_SEPARATOR + authority + STORE_SEPARATOR + meta.getLanguage());
}
if (meta.getLanguage() != null && !meta.getLanguage().trim().equals("")) {
String langField = field + "." + meta.getLanguage();
doc.addField(langField, value);
}
}
} catch (Exception e) {
log.error(LogHelper.getHeader(context, "item_metadata_discovery_error", "Item identifier: " + item.getID()), e);
}
log.debug(" Added Metadata");
try {
List<MetadataValue> values = itemService.getMetadataByMetadataString(item, "dc.relation.ispartof");
if (values != null && values.size() > 0 && values.get(0) != null && values.get(0).getValue() != null) {
// group on parent
String handlePrefix = handleService.getCanonicalPrefix();
doc.addField("publication_grp", values.get(0).getValue().replaceFirst(handlePrefix, ""));
} else {
// group on self
doc.addField("publication_grp", item.getHandle());
}
} catch (Exception e) {
log.error(LogHelper.getHeader(context, "item_publication_group_discovery_error", "Item identifier: " + item.getID()), e);
}
log.debug(" Added Grouping");
}
use of org.dspace.content.MetadataSchema in project DSpace by DSpace.
the class MetadataFieldServiceTest method testMetadataFieldCaching.
@Test
public void testMetadataFieldCaching() throws Exception {
MetadataField subjectField = metadataFieldService.findByElement(context, "dc", "subject", null);
MetadataField issnField = metadataFieldService.findByElement(context, "dc", "identifier", "issn");
MetadataSchema dspaceSchema = metadataSchemaService.find(context, "dspace");
subjectField.setMetadataSchema(dspaceSchema);
issnField.setMetadataSchema(dspaceSchema);
// Searching for dspace.subject and dspace.identifier.issn should return the already stored metadatafields
assertEquals(subjectField, metadataFieldService.findByElement(context, "dspace", "subject", null));
assertEquals(issnField, metadataFieldService.findByElement(context, "dspace", "identifier", "issn"));
// The dspace.subject and dspace.identifier.issn metadatafields should now reference the 'dspace' metadataschema
assertEquals("dspace", metadataFieldService.findByElement(context, "dspace", "subject", null).getMetadataSchema().getName());
assertEquals("dspace", metadataFieldService.findByElement(context, "dspace", "identifier", "issn").getMetadataSchema().getName());
// Metadatafields dc.subject and dc.identifier.issn should no longer be found
assertNull(metadataFieldService.findByElement(context, "dc", "subject", null));
assertNull(metadataFieldService.findByElement(context, "dc", "identifier", "issn"));
// Same tests, new context
context.complete();
context = new Context();
assertEquals(subjectField, metadataFieldService.findByElement(context, "dspace", "subject", null));
assertEquals(issnField, metadataFieldService.findByElement(context, "dspace", "identifier", "issn"));
assertEquals("dspace", metadataFieldService.findByElement(context, "dspace", "subject", null).getMetadataSchema().getName());
assertEquals("dspace", metadataFieldService.findByElement(context, "dspace", "identifier", "issn").getMetadataSchema().getName());
assertNull(metadataFieldService.findByElement(context, "dc", "subject", null));
assertNull(metadataFieldService.findByElement(context, "dc", "identifier", "issn"));
}
use of org.dspace.content.MetadataSchema in project DSpace by DSpace.
the class GoogleMetadata method buildFieldName.
/**
* Construct metadata field name out of Metadatum components
*
* @param v The Metadatum to construct a name for.
* @return The complete metadata field name.
*/
protected String buildFieldName(MetadataValue v) {
StringBuilder name = new StringBuilder();
MetadataField metadataField = v.getMetadataField();
MetadataSchema metadataSchema = v.getMetadataField().getMetadataSchema();
name.append(metadataSchema.getName()).append(".").append(metadataField.getElement());
if (null != metadataField.getQualifier()) {
name.append(".").append(metadataField.getQualifier());
}
return name.toString();
}
use of org.dspace.content.MetadataSchema in project DSpace by DSpace.
the class MetadataImporter method loadType.
/**
* Process a node in the metadata registry XML file. The node must
* be a "dc-type" node. If the type already exists, then it
* will not be re-imported.
*
* @param context DSpace context object
* @param node the node in the DOM tree
* @throws SQLException if database error
* @throws IOException if IO error
* @throws TransformerException if transformer error
* @throws AuthorizeException if authorization error
* @throws NonUniqueMetadataException if duplicate metadata
* @throws RegistryImportException if import fails
*/
private static void loadType(Context context, Node node) throws SQLException, IOException, AuthorizeException, NonUniqueMetadataException, RegistryImportException, XPathExpressionException {
// Get the values
String schema = RegistryImporter.getElementData(node, "schema");
String element = RegistryImporter.getElementData(node, "element");
String qualifier = RegistryImporter.getElementData(node, "qualifier");
String scopeNote = RegistryImporter.getElementData(node, "scope_note");
// If the schema is not provided default to DC
if (schema == null) {
schema = MetadataSchemaEnum.DC.getName();
}
// Find the matching schema object
MetadataSchema schemaObj = metadataSchemaService.find(context, schema);
if (schemaObj == null) {
throw new RegistryImportException("Schema '" + schema + "' is not registered and does not exist.");
}
MetadataField mf = metadataFieldService.findByElement(context, schemaObj, element, qualifier);
if (mf != null) {
// Metadata field already exists, skipping it
return;
}
// Actually create this metadata field as it doesn't yet exist
String fieldName = schema + "." + element + "." + qualifier;
if (qualifier == null) {
fieldName = schema + "." + element;
}
log.info("Registering metadata field " + fieldName);
MetadataField field = metadataFieldService.create(context, schemaObj, element, qualifier, scopeNote);
metadataFieldService.update(context, field);
}
use of org.dspace.content.MetadataSchema in project DSpace by DSpace.
the class MetadataExporter method getSchemaName.
/**
* Helper method to retrieve a schema name for the field.
* Caches the name after looking up the id.
*
* @param context DSpace Context
* @param mdField DSpace metadata field
* @return name of schema
* @throws SQLException if database error
* @throws RegistryExportException if export error
*/
private static String getSchemaName(Context context, MetadataField mdField) throws SQLException, RegistryExportException {
// Get name from cache
String name = schemaMap.get(mdField.getMetadataSchema().getID());
if (name == null) {
// Name not retrieved before, so get the schema now
MetadataSchema mdSchema = metadataSchemaService.find(context, mdField.getMetadataSchema().getID());
if (mdSchema != null) {
name = mdSchema.getName();
schemaMap.put(mdSchema.getID(), name);
} else {
// Can't find the schema
throw new RegistryExportException("Can't get schema name for field");
}
}
return name;
}
Aggregations