use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.
the class MCRSolrIndexer method rebuildMetadataIndex.
/**
* Rebuilds solr's metadata index.
*
* @param list
* list of identifiers of the objects to index
* @param solrClient
* solr server to index
*/
public static void rebuildMetadataIndex(List<String> list, SolrClient solrClient) {
LOGGER.info("Re-building Metadata Index");
if (list.isEmpty()) {
LOGGER.info("Sorry, no documents to index");
return;
}
StopWatch swatch = new StopWatch();
swatch.start();
int totalCount = list.size();
LOGGER.info("Sending {} objects to solr for reindexing", totalCount);
MCRXMLMetadataManager metadataMgr = MCRXMLMetadataManager.instance();
MCRSolrIndexStatistic statistic = null;
HashMap<MCRObjectID, MCRContent> contentMap = new HashMap<>((int) (BULK_SIZE * 1.4));
int i = 0;
for (String id : list) {
i++;
try {
LOGGER.debug("Preparing \"{}\" for indexing", id);
MCRObjectID objId = MCRObjectID.getInstance(id);
MCRContent content = metadataMgr.retrieveContent(objId);
contentMap.put(objId, content);
if (i % BULK_SIZE == 0 || totalCount == i) {
MCRSolrIndexHandler indexHandler = MCRSolrIndexHandlerFactory.getInstance().getIndexHandler(contentMap);
indexHandler.setCommitWithin(BATCH_AUTO_COMMIT_WITHIN_MS);
indexHandler.setSolrServer(solrClient);
statistic = indexHandler.getStatistic();
submitIndexHandler(indexHandler);
contentMap = new HashMap<>((int) (BULK_SIZE * 1.4));
}
} catch (Exception ex) {
LOGGER.error("Error creating index thread for object {}", id, ex);
}
}
long durationInMilliSeconds = swatch.getTime();
if (statistic != null) {
statistic.addTime(durationInMilliSeconds);
}
}
use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.
the class MCRSolrFileIndexBaseAccumulator method getDerivateModified.
/**
* returns ISO8601 formated string of when derivate was last modified
*
* @param derivateID
* @throws IOException
* thrown by {@link MCRCache.ModifiedHandle#getLastModified()}
*/
private static String getDerivateModified(final String derivateID) throws IOException {
MCRObjectID derID = MCRObjectID.getInstance(derivateID);
MCRCache.ModifiedHandle modifiedHandle = XML_MANAGER.getLastModifiedHandle(derID, 30, TimeUnit.SECONDS);
String modified = derivateModified.getIfUpToDate(derivateID, modifiedHandle);
if (modified == null) {
Date date = new Date(modifiedHandle.getLastModified());
MCRISO8601Date date2 = new MCRISO8601Date();
date2.setDate(date);
modified = date2.getISOString();
derivateModified.put(derivateID, modified);
}
return modified;
}
use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.
the class MCRSolrFileIndexBaseAccumulator method accumulate.
@Override
public void accumulate(SolrInputDocument doc, Path input, BasicFileAttributes attr) throws IOException {
doc.setField("id", input.toUri().toString());
String absolutePath = '/' + input.subpath(0, input.getNameCount()).toString();
try {
// check if this is an MCRPath -> more metadata
MCRPath mcrPath = MCRPath.toMCRPath(input);
MCRObjectID mcrObjID = MCRMetadataManager.getObjectId(MCRObjectID.getInstance(mcrPath.getOwner()), 10, TimeUnit.SECONDS);
if (mcrObjID == null) {
LOGGER.warn("Could not determine MCRObject for file {}", absolutePath);
doc.setField("returnId", mcrPath.getOwner());
} else {
doc.setField("returnId", mcrObjID.toString());
doc.setField("objectProject", mcrObjID.getProjectId());
}
String ownerID = mcrPath.getOwner();
doc.setField("derivateID", ownerID);
doc.setField("derivateModified", getDerivateModified(ownerID));
Collection<MCRCategoryID> linksFromReference = MCRCategLinkServiceFactory.getInstance().getLinksFromReference(new MCRCategLinkReference(mcrPath));
HashSet<MCRCategoryID> linkedCategories = new HashSet<>(linksFromReference);
for (MCRCategoryID category : linksFromReference) {
for (MCRCategory parent : CATEGORY_DAO.getParents(category)) {
linkedCategories.add(parent.getId());
}
}
for (MCRCategoryID category : linkedCategories) {
doc.addField("fileCategory", category.toString());
}
} catch (ProviderMismatchException e) {
LOGGER.warn("Cannot build all fields as input is not an instance of MCRPath: {}", input);
}
doc.setField("objectType", "data_file");
doc.setField("fileName", input.getFileName().toString());
doc.setField("filePath", absolutePath);
doc.setField("stream_size", attr.size());
doc.setField("stream_name", absolutePath);
doc.setField("stream_source_info", input.toString());
doc.setField("stream_content_type", MCRContentTypes.probeContentType(input));
doc.setField("extension", Files.getFileExtension(input.getFileName().toString()));
MCRISO8601Date iDate = new MCRISO8601Date();
iDate.setDate(new Date(attr.lastModifiedTime().toMillis()));
doc.setField("modified", iDate.getISOString());
}
use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.
the class MCRSolrIndexHandlerFactory method getIndexHandler.
public MCRSolrIndexHandler getIndexHandler(MCRObjectID... ids) throws IOException {
if (ids.length == 1) {
MCRContent content = MCRXMLMetadataManager.instance().retrieveContent(ids[0]);
return getIndexHandler(content, ids[0]);
}
HashMap<MCRObjectID, MCRContent> contentMap = new HashMap<>();
for (MCRObjectID id : ids) {
MCRContent content = MCRXMLMetadataManager.instance().retrieveContent(id);
contentMap.put(id, content);
}
return getIndexHandler(contentMap);
}
use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.
the class MCRSolrIndexHandlerFactory method getIndexHandler.
public MCRSolrIndexHandler getIndexHandler(MCRBase... derOrObjs) {
if (derOrObjs.length == 1) {
MCRBaseContent content = new MCRBaseContent(derOrObjs[0]);
return getIndexHandler(content, derOrObjs[0].getId());
}
HashMap<MCRObjectID, MCRContent> contentMap = new HashMap<>();
for (MCRBase derOrObj : derOrObjs) {
MCRBaseContent content = new MCRBaseContent(derOrObj);
contentMap.put(derOrObj.getId(), content);
}
return getIndexHandler(contentMap);
}
Aggregations