Search in sources :

Example 16 with MCRObjectID

use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.

the class MCRSolrIndexer method rebuildMetadataIndex.

/**
 * Rebuilds solr's metadata index.
 *
 * @param list
 *            list of identifiers of the objects to index
 * @param solrClient
 *            solr server to index
 */
public static void rebuildMetadataIndex(List<String> list, SolrClient solrClient) {
    LOGGER.info("Re-building Metadata Index");
    if (list.isEmpty()) {
        LOGGER.info("Sorry, no documents to index");
        return;
    }
    StopWatch swatch = new StopWatch();
    swatch.start();
    int totalCount = list.size();
    LOGGER.info("Sending {} objects to solr for reindexing", totalCount);
    MCRXMLMetadataManager metadataMgr = MCRXMLMetadataManager.instance();
    MCRSolrIndexStatistic statistic = null;
    HashMap<MCRObjectID, MCRContent> contentMap = new HashMap<>((int) (BULK_SIZE * 1.4));
    int i = 0;
    for (String id : list) {
        i++;
        try {
            LOGGER.debug("Preparing \"{}\" for indexing", id);
            MCRObjectID objId = MCRObjectID.getInstance(id);
            MCRContent content = metadataMgr.retrieveContent(objId);
            contentMap.put(objId, content);
            if (i % BULK_SIZE == 0 || totalCount == i) {
                MCRSolrIndexHandler indexHandler = MCRSolrIndexHandlerFactory.getInstance().getIndexHandler(contentMap);
                indexHandler.setCommitWithin(BATCH_AUTO_COMMIT_WITHIN_MS);
                indexHandler.setSolrServer(solrClient);
                statistic = indexHandler.getStatistic();
                submitIndexHandler(indexHandler);
                contentMap = new HashMap<>((int) (BULK_SIZE * 1.4));
            }
        } catch (Exception ex) {
            LOGGER.error("Error creating index thread for object {}", id, ex);
        }
    }
    long durationInMilliSeconds = swatch.getTime();
    if (statistic != null) {
        statistic.addTime(durationInMilliSeconds);
    }
}
Also used : MCRSolrIndexStatistic(org.mycore.solr.index.statistic.MCRSolrIndexStatistic) HashMap(java.util.HashMap) MCRXMLMetadataManager(org.mycore.datamodel.common.MCRXMLMetadataManager) MCRContent(org.mycore.common.content.MCRContent) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) StopWatch(org.apache.commons.lang.time.StopWatch) MCRObjectID(org.mycore.datamodel.metadata.MCRObjectID)

Example 17 with MCRObjectID

use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.

the class MCRSolrFileIndexBaseAccumulator method getDerivateModified.

/**
 * returns ISO8601 formated string of when derivate was last modified
 *
 * @param derivateID
 * @throws IOException
 *             thrown by {@link MCRCache.ModifiedHandle#getLastModified()}
 */
private static String getDerivateModified(final String derivateID) throws IOException {
    MCRObjectID derID = MCRObjectID.getInstance(derivateID);
    MCRCache.ModifiedHandle modifiedHandle = XML_MANAGER.getLastModifiedHandle(derID, 30, TimeUnit.SECONDS);
    String modified = derivateModified.getIfUpToDate(derivateID, modifiedHandle);
    if (modified == null) {
        Date date = new Date(modifiedHandle.getLastModified());
        MCRISO8601Date date2 = new MCRISO8601Date();
        date2.setDate(date);
        modified = date2.getISOString();
        derivateModified.put(derivateID, modified);
    }
    return modified;
}
Also used : MCRCache(org.mycore.common.MCRCache) MCRObjectID(org.mycore.datamodel.metadata.MCRObjectID) MCRISO8601Date(org.mycore.datamodel.common.MCRISO8601Date) MCRISO8601Date(org.mycore.datamodel.common.MCRISO8601Date) Date(java.util.Date)

Example 18 with MCRObjectID

use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.

the class MCRSolrFileIndexBaseAccumulator method accumulate.

@Override
public void accumulate(SolrInputDocument doc, Path input, BasicFileAttributes attr) throws IOException {
    doc.setField("id", input.toUri().toString());
    String absolutePath = '/' + input.subpath(0, input.getNameCount()).toString();
    try {
        // check if this is an MCRPath -> more metadata
        MCRPath mcrPath = MCRPath.toMCRPath(input);
        MCRObjectID mcrObjID = MCRMetadataManager.getObjectId(MCRObjectID.getInstance(mcrPath.getOwner()), 10, TimeUnit.SECONDS);
        if (mcrObjID == null) {
            LOGGER.warn("Could not determine MCRObject for file {}", absolutePath);
            doc.setField("returnId", mcrPath.getOwner());
        } else {
            doc.setField("returnId", mcrObjID.toString());
            doc.setField("objectProject", mcrObjID.getProjectId());
        }
        String ownerID = mcrPath.getOwner();
        doc.setField("derivateID", ownerID);
        doc.setField("derivateModified", getDerivateModified(ownerID));
        Collection<MCRCategoryID> linksFromReference = MCRCategLinkServiceFactory.getInstance().getLinksFromReference(new MCRCategLinkReference(mcrPath));
        HashSet<MCRCategoryID> linkedCategories = new HashSet<>(linksFromReference);
        for (MCRCategoryID category : linksFromReference) {
            for (MCRCategory parent : CATEGORY_DAO.getParents(category)) {
                linkedCategories.add(parent.getId());
            }
        }
        for (MCRCategoryID category : linkedCategories) {
            doc.addField("fileCategory", category.toString());
        }
    } catch (ProviderMismatchException e) {
        LOGGER.warn("Cannot build all fields as input is not an instance of MCRPath: {}", input);
    }
    doc.setField("objectType", "data_file");
    doc.setField("fileName", input.getFileName().toString());
    doc.setField("filePath", absolutePath);
    doc.setField("stream_size", attr.size());
    doc.setField("stream_name", absolutePath);
    doc.setField("stream_source_info", input.toString());
    doc.setField("stream_content_type", MCRContentTypes.probeContentType(input));
    doc.setField("extension", Files.getFileExtension(input.getFileName().toString()));
    MCRISO8601Date iDate = new MCRISO8601Date();
    iDate.setDate(new Date(attr.lastModifiedTime().toMillis()));
    doc.setField("modified", iDate.getISOString());
}
Also used : MCRCategory(org.mycore.datamodel.classifications2.MCRCategory) MCRCategoryID(org.mycore.datamodel.classifications2.MCRCategoryID) MCRObjectID(org.mycore.datamodel.metadata.MCRObjectID) ProviderMismatchException(java.nio.file.ProviderMismatchException) MCRPath(org.mycore.datamodel.niofs.MCRPath) MCRISO8601Date(org.mycore.datamodel.common.MCRISO8601Date) MCRCategLinkReference(org.mycore.datamodel.classifications2.MCRCategLinkReference) MCRISO8601Date(org.mycore.datamodel.common.MCRISO8601Date) Date(java.util.Date) HashSet(java.util.HashSet)

Example 19 with MCRObjectID

use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.

the class MCRSolrIndexHandlerFactory method getIndexHandler.

public MCRSolrIndexHandler getIndexHandler(MCRObjectID... ids) throws IOException {
    if (ids.length == 1) {
        MCRContent content = MCRXMLMetadataManager.instance().retrieveContent(ids[0]);
        return getIndexHandler(content, ids[0]);
    }
    HashMap<MCRObjectID, MCRContent> contentMap = new HashMap<>();
    for (MCRObjectID id : ids) {
        MCRContent content = MCRXMLMetadataManager.instance().retrieveContent(id);
        contentMap.put(id, content);
    }
    return getIndexHandler(contentMap);
}
Also used : HashMap(java.util.HashMap) MCRObjectID(org.mycore.datamodel.metadata.MCRObjectID) MCRContent(org.mycore.common.content.MCRContent)

Example 20 with MCRObjectID

use of org.mycore.datamodel.metadata.MCRObjectID in project mycore by MyCoRe-Org.

the class MCRSolrIndexHandlerFactory method getIndexHandler.

public MCRSolrIndexHandler getIndexHandler(MCRBase... derOrObjs) {
    if (derOrObjs.length == 1) {
        MCRBaseContent content = new MCRBaseContent(derOrObjs[0]);
        return getIndexHandler(content, derOrObjs[0].getId());
    }
    HashMap<MCRObjectID, MCRContent> contentMap = new HashMap<>();
    for (MCRBase derOrObj : derOrObjs) {
        MCRBaseContent content = new MCRBaseContent(derOrObj);
        contentMap.put(derOrObj.getId(), content);
    }
    return getIndexHandler(contentMap);
}
Also used : HashMap(java.util.HashMap) MCRBaseContent(org.mycore.common.content.MCRBaseContent) MCRBase(org.mycore.datamodel.metadata.MCRBase) MCRObjectID(org.mycore.datamodel.metadata.MCRObjectID) MCRContent(org.mycore.common.content.MCRContent)

Aggregations

MCRObjectID (org.mycore.datamodel.metadata.MCRObjectID)144 IOException (java.io.IOException)37 MCRObject (org.mycore.datamodel.metadata.MCRObject)32 MCRException (org.mycore.common.MCRException)30 MCRCommand (org.mycore.frontend.cli.annotation.MCRCommand)30 MCRDerivate (org.mycore.datamodel.metadata.MCRDerivate)29 MCRPath (org.mycore.datamodel.niofs.MCRPath)25 MCRAccessException (org.mycore.access.MCRAccessException)22 Document (org.jdom2.Document)20 MCRPersistenceException (org.mycore.common.MCRPersistenceException)18 MCRMetaLinkID (org.mycore.datamodel.metadata.MCRMetaLinkID)16 JDOMException (org.jdom2.JDOMException)15 MCRBase (org.mycore.datamodel.metadata.MCRBase)15 SAXException (org.xml.sax.SAXException)15 Date (java.util.Date)14 MCRActiveLinkException (org.mycore.datamodel.common.MCRActiveLinkException)13 MCRSession (org.mycore.common.MCRSession)11 MCRContent (org.mycore.common.content.MCRContent)11 MCRPersistentIdentifierException (org.mycore.pi.exceptions.MCRPersistentIdentifierException)11 URISyntaxException (java.net.URISyntaxException)10