Search in sources :

Example 1 with Restrictions

use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions in project manifoldcf by apache.

the class ConfluenceRepositoryConnector method processPageAsAttachment.

/**
 * <p>
 * Process the specific attachment
 * </p>
 *
 * @param activeSecurity     Security enabled/disabled
 * @param documentIdentifier The original documentIdentifier
 * @param parentRestrictions The list of parent restrictions
 * @param pageId             The pageId being an attachment
 * @param version            The version of the page
 * @param activities
 * @param doLog
 * @throws IOException
 * @throws ServiceInterruption
 */
private ProcessResult processPageAsAttachment(final boolean activeSecurity, final String documentIdentifier, final List<String> parentRestrictions, final String pageId, final String version, final IProcessActivity activities, final boolean doLog) throws ManifoldCFException, ServiceInterruption, IOException {
    final String[] ids = ConfluenceUtil.getAttachmentAndPageId(pageId);
    Attachment attachment = new Attachment();
    try {
        attachment = confluenceClient.getAttachment(ids[0]);
    } catch (final Exception e) {
        handlePageException(e, "attachment processing");
    }
    final Map<String, String> extraProperties = Maps.newHashMap();
    extraProperties.put("attachedBy", ids[1]);
    return processPageInternal(activeSecurity, parentRestrictions, attachment, documentIdentifier, version, activities, doLog, extraProperties);
}
Also used : Attachment(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Attachment) InterruptedIOException(java.io.InterruptedIOException) ParseException(org.json.simple.parser.ParseException) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException) IOException(java.io.IOException)

Example 2 with Restrictions

use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions in project manifoldcf by apache.

the class ConfluenceRepositoryConnector method processPageInternal.

/**
 * <p>
 * Process the specific page
 * </p>
 *
 * @param activeSecurity             Security enabled/disabled
 * @param parentRestrictions         The list of parent restrictions
 * @param page                       The page to process
 * @param manifoldDocumentIdentifier
 * @param version                    The version of the page
 * @param activities
 * @param doLog
 *
 * @throws ManifoldCFException
 * @throws IOException
 * @throws ServiceInterruption
 */
private ProcessResult processPageInternal(final boolean activeSecurity, final List<String> parentRestrictions, final Page page, final String manifoldDocumentIdentifier, final String version, final IProcessActivity activities, final boolean doLog, final Map<String, String> extraProperties) throws ManifoldCFException, ServiceInterruption, IOException {
    if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) {
        Logging.connectors.debug("Confluence: This content exists: " + page.getId());
    }
    final RepositoryDocument rd = new RepositoryDocument();
    final Date createdDate = page.getCreatedDate();
    final Date lastModified = page.getLastModifiedDate();
    final DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM, Locale.ROOT);
    /*
     * Retain page in Manifold because it has not changed from last time This is needed to keep the identifier in Manifold data, because by default if a document is not retained nor
     * ingested, it will be deleted by the framework
     */
    final StringBuilder versionBuilder = new StringBuilder();
    versionBuilder.append(df.format(lastModified));
    final List<String> pageRestrictions = new ArrayList<String>();
    if (activeSecurity) {
        final List<Restrictions> restrictions = getPageReadRestrictions(page.getId());
        for (final Restrictions res : restrictions) {
            final ReadRestrictions rr = res.getReadRestrictions();
            rr.getUsers().forEach(user -> {
                pageRestrictions.add("user-" + user.getUserKey());
            });
            rr.getGroups().forEach(group -> {
                pageRestrictions.add("group-" + group.getName());
            });
        }
    }
    // Order the page restrictions alphabetically so the version will be always the same in case the same restrictions between two crawls are
    // not retrieved in the same order
    pageRestrictions.sort(String::compareToIgnoreCase);
    versionBuilder.append("+");
    packList(versionBuilder, pageRestrictions, '+');
    versionBuilder.append("+");
    packList(versionBuilder, parentRestrictions, '+');
    final String lastVersion = versionBuilder.toString();
    // Get and reference page direct childs if any
    if (page.getType() == PageType.PAGE) {
        final List<Page> pageChilds = getPageChilds(page.getId());
        for (final Page childPage : pageChilds) {
            final JSONObject child = new JSONObject();
            child.put("id", childPage.getId());
            final List<String> childParentRestrictions = new ArrayList<>();
            // its child pages
            if (activeSecurity) {
                if (pageRestrictions.isEmpty()) {
                    childParentRestrictions.addAll(parentRestrictions);
                } else {
                    childParentRestrictions.addAll(pageRestrictions);
                }
            }
            childParentRestrictions.sort(String::compareToIgnoreCase);
            child.put("parentRestricions", childParentRestrictions);
            activities.addDocumentReference(CHILD_PREFIX + child.toJSONString());
        }
    }
    if (!activities.checkDocumentNeedsReindexing(manifoldDocumentIdentifier, lastVersion)) {
        return new ProcessResult(page.getLength(), "RETAINED", "");
    }
    if (!activities.checkLengthIndexable(page.getLength())) {
        activities.noDocument(manifoldDocumentIdentifier, lastVersion);
        final String errorCode = IProcessActivity.EXCLUDED_LENGTH;
        final String errorDesc = "Excluding document because of length (" + page.getLength() + ")";
        return new ProcessResult(page.getLength(), errorCode, errorDesc);
    }
    if (!activities.checkMimeTypeIndexable(page.getMediaType())) {
        activities.noDocument(manifoldDocumentIdentifier, lastVersion);
        final String errorCode = IProcessActivity.EXCLUDED_MIMETYPE;
        final String errorDesc = "Excluding document because of mime type (" + page.getMediaType() + ")";
        return new ProcessResult(page.getLength(), errorCode, errorDesc);
    }
    if (!activities.checkDateIndexable(lastModified)) {
        activities.noDocument(manifoldDocumentIdentifier, lastVersion);
        final String errorCode = IProcessActivity.EXCLUDED_DATE;
        final String errorDesc = "Excluding document because of date (" + lastModified + ")";
        return new ProcessResult(page.getLength(), errorCode, errorDesc);
    }
    if (!activities.checkURLIndexable(page.getWebUrl())) {
        activities.noDocument(manifoldDocumentIdentifier, lastVersion);
        final String errorCode = IProcessActivity.EXCLUDED_URL;
        final String errorDesc = "Excluding document because of URL ('" + page.getWebUrl() + "')";
        return new ProcessResult(page.getLength(), errorCode, errorDesc);
    }
    /* Add repository document information */
    rd.setMimeType(page.getMediaType());
    if (createdDate != null) {
        rd.setCreatedDate(createdDate);
    }
    if (lastModified != null) {
        rd.setModifiedDate(lastModified);
    }
    rd.setIndexingDate(new Date());
    /* Adding Page Metadata */
    final Map<String, Object> pageMetadata = page.getMetadataAsMap();
    for (final Entry<String, Object> entry : pageMetadata.entrySet()) {
        if (entry.getValue() instanceof List) {
            final List<?> list = (List<?>) entry.getValue();
            rd.addField(entry.getKey(), list.toArray(new String[list.size()]));
        } else if (entry.getValue() != null) {
            final String key = entry.getKey();
            final String value = entry.getValue().toString();
            rd.addField(key, value);
            if (key.toLowerCase(Locale.ROOT).contentEquals("title")) {
                rd.addField("stream_name", value);
            }
        }
    }
    rd.addField("source", "confluence");
    /* Adding extra properties */
    for (final Entry<String, String> entry : extraProperties.entrySet()) {
        rd.addField(entry.getKey(), entry.getValue());
    }
    final String documentURI = page.getWebUrl();
    /* Set repository document ACLs */
    if (activeSecurity) {
        rd.setSecurity(RepositoryDocument.SECURITY_TYPE_SHARE, new String[] { "space-" + page.getSpace() }, new String[] { defaultAuthorityDenyToken });
        if (parentRestrictions.size() > 0) {
            rd.setSecurity(RepositoryDocument.SECURITY_TYPE_PARENT, parentRestrictions.toArray(new String[0]), new String[] { defaultAuthorityDenyToken });
        }
        if (pageRestrictions.size() > 0) {
            rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, pageRestrictions.toArray(new String[0]), new String[] { defaultAuthorityDenyToken });
        }
    }
    rd.setBinary(page.getContentStream(), page.getLength());
    rd.addField("size", String.valueOf(page.getLength()));
    rd.addField("url", documentURI);
    /* Ingest document */
    activities.ingestDocumentWithException(manifoldDocumentIdentifier, lastVersion, documentURI, rd);
    return new ProcessResult(page.getLength(), null, null);
}
Also used : ReadRestrictions(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions.ReadRestrictions) ArrayList(java.util.ArrayList) Page(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page) Date(java.util.Date) JSONObject(org.json.simple.JSONObject) DateFormat(java.text.DateFormat) JSONObject(org.json.simple.JSONObject) ArrayList(java.util.ArrayList) List(java.util.List) RepositoryDocument(org.apache.manifoldcf.agents.interfaces.RepositoryDocument) ReadRestrictions(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions.ReadRestrictions) Restrictions(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions)

Aggregations

IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 DateFormat (java.text.DateFormat)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 List (java.util.List)1 RepositoryDocument (org.apache.manifoldcf.agents.interfaces.RepositoryDocument)1 ManifoldCFException (org.apache.manifoldcf.core.interfaces.ManifoldCFException)1 Attachment (org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Attachment)1 Page (org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page)1 Restrictions (org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions)1 ReadRestrictions (org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions.ReadRestrictions)1 JSONObject (org.json.simple.JSONObject)1 ParseException (org.json.simple.parser.ParseException)1