use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions in project manifoldcf by apache.
the class ConfluenceRepositoryConnector method processPageAsAttachment.
/**
* <p>
* Process the specific attachment
* </p>
*
* @param activeSecurity Security enabled/disabled
* @param documentIdentifier The original documentIdentifier
* @param parentRestrictions The list of parent restrictions
* @param pageId The pageId being an attachment
* @param version The version of the page
* @param activities
* @param doLog
* @throws IOException
* @throws ServiceInterruption
*/
private ProcessResult processPageAsAttachment(final boolean activeSecurity, final String documentIdentifier, final List<String> parentRestrictions, final String pageId, final String version, final IProcessActivity activities, final boolean doLog) throws ManifoldCFException, ServiceInterruption, IOException {
final String[] ids = ConfluenceUtil.getAttachmentAndPageId(pageId);
Attachment attachment = new Attachment();
try {
attachment = confluenceClient.getAttachment(ids[0]);
} catch (final Exception e) {
handlePageException(e, "attachment processing");
}
final Map<String, String> extraProperties = Maps.newHashMap();
extraProperties.put("attachedBy", ids[1]);
return processPageInternal(activeSecurity, parentRestrictions, attachment, documentIdentifier, version, activities, doLog, extraProperties);
}
use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Restrictions in project manifoldcf by apache.
the class ConfluenceRepositoryConnector method processPageInternal.
/**
* <p>
* Process the specific page
* </p>
*
* @param activeSecurity Security enabled/disabled
* @param parentRestrictions The list of parent restrictions
* @param page The page to process
* @param manifoldDocumentIdentifier
* @param version The version of the page
* @param activities
* @param doLog
*
* @throws ManifoldCFException
* @throws IOException
* @throws ServiceInterruption
*/
private ProcessResult processPageInternal(final boolean activeSecurity, final List<String> parentRestrictions, final Page page, final String manifoldDocumentIdentifier, final String version, final IProcessActivity activities, final boolean doLog, final Map<String, String> extraProperties) throws ManifoldCFException, ServiceInterruption, IOException {
if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) {
Logging.connectors.debug("Confluence: This content exists: " + page.getId());
}
final RepositoryDocument rd = new RepositoryDocument();
final Date createdDate = page.getCreatedDate();
final Date lastModified = page.getLastModifiedDate();
final DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM, Locale.ROOT);
/*
* Retain page in Manifold because it has not changed from last time This is needed to keep the identifier in Manifold data, because by default if a document is not retained nor
* ingested, it will be deleted by the framework
*/
final StringBuilder versionBuilder = new StringBuilder();
versionBuilder.append(df.format(lastModified));
final List<String> pageRestrictions = new ArrayList<String>();
if (activeSecurity) {
final List<Restrictions> restrictions = getPageReadRestrictions(page.getId());
for (final Restrictions res : restrictions) {
final ReadRestrictions rr = res.getReadRestrictions();
rr.getUsers().forEach(user -> {
pageRestrictions.add("user-" + user.getUserKey());
});
rr.getGroups().forEach(group -> {
pageRestrictions.add("group-" + group.getName());
});
}
}
// Order the page restrictions alphabetically so the version will be always the same in case the same restrictions between two crawls are
// not retrieved in the same order
pageRestrictions.sort(String::compareToIgnoreCase);
versionBuilder.append("+");
packList(versionBuilder, pageRestrictions, '+');
versionBuilder.append("+");
packList(versionBuilder, parentRestrictions, '+');
final String lastVersion = versionBuilder.toString();
// Get and reference page direct childs if any
if (page.getType() == PageType.PAGE) {
final List<Page> pageChilds = getPageChilds(page.getId());
for (final Page childPage : pageChilds) {
final JSONObject child = new JSONObject();
child.put("id", childPage.getId());
final List<String> childParentRestrictions = new ArrayList<>();
// its child pages
if (activeSecurity) {
if (pageRestrictions.isEmpty()) {
childParentRestrictions.addAll(parentRestrictions);
} else {
childParentRestrictions.addAll(pageRestrictions);
}
}
childParentRestrictions.sort(String::compareToIgnoreCase);
child.put("parentRestricions", childParentRestrictions);
activities.addDocumentReference(CHILD_PREFIX + child.toJSONString());
}
}
if (!activities.checkDocumentNeedsReindexing(manifoldDocumentIdentifier, lastVersion)) {
return new ProcessResult(page.getLength(), "RETAINED", "");
}
if (!activities.checkLengthIndexable(page.getLength())) {
activities.noDocument(manifoldDocumentIdentifier, lastVersion);
final String errorCode = IProcessActivity.EXCLUDED_LENGTH;
final String errorDesc = "Excluding document because of length (" + page.getLength() + ")";
return new ProcessResult(page.getLength(), errorCode, errorDesc);
}
if (!activities.checkMimeTypeIndexable(page.getMediaType())) {
activities.noDocument(manifoldDocumentIdentifier, lastVersion);
final String errorCode = IProcessActivity.EXCLUDED_MIMETYPE;
final String errorDesc = "Excluding document because of mime type (" + page.getMediaType() + ")";
return new ProcessResult(page.getLength(), errorCode, errorDesc);
}
if (!activities.checkDateIndexable(lastModified)) {
activities.noDocument(manifoldDocumentIdentifier, lastVersion);
final String errorCode = IProcessActivity.EXCLUDED_DATE;
final String errorDesc = "Excluding document because of date (" + lastModified + ")";
return new ProcessResult(page.getLength(), errorCode, errorDesc);
}
if (!activities.checkURLIndexable(page.getWebUrl())) {
activities.noDocument(manifoldDocumentIdentifier, lastVersion);
final String errorCode = IProcessActivity.EXCLUDED_URL;
final String errorDesc = "Excluding document because of URL ('" + page.getWebUrl() + "')";
return new ProcessResult(page.getLength(), errorCode, errorDesc);
}
/* Add repository document information */
rd.setMimeType(page.getMediaType());
if (createdDate != null) {
rd.setCreatedDate(createdDate);
}
if (lastModified != null) {
rd.setModifiedDate(lastModified);
}
rd.setIndexingDate(new Date());
/* Adding Page Metadata */
final Map<String, Object> pageMetadata = page.getMetadataAsMap();
for (final Entry<String, Object> entry : pageMetadata.entrySet()) {
if (entry.getValue() instanceof List) {
final List<?> list = (List<?>) entry.getValue();
rd.addField(entry.getKey(), list.toArray(new String[list.size()]));
} else if (entry.getValue() != null) {
final String key = entry.getKey();
final String value = entry.getValue().toString();
rd.addField(key, value);
if (key.toLowerCase(Locale.ROOT).contentEquals("title")) {
rd.addField("stream_name", value);
}
}
}
rd.addField("source", "confluence");
/* Adding extra properties */
for (final Entry<String, String> entry : extraProperties.entrySet()) {
rd.addField(entry.getKey(), entry.getValue());
}
final String documentURI = page.getWebUrl();
/* Set repository document ACLs */
if (activeSecurity) {
rd.setSecurity(RepositoryDocument.SECURITY_TYPE_SHARE, new String[] { "space-" + page.getSpace() }, new String[] { defaultAuthorityDenyToken });
if (parentRestrictions.size() > 0) {
rd.setSecurity(RepositoryDocument.SECURITY_TYPE_PARENT, parentRestrictions.toArray(new String[0]), new String[] { defaultAuthorityDenyToken });
}
if (pageRestrictions.size() > 0) {
rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT, pageRestrictions.toArray(new String[0]), new String[] { defaultAuthorityDenyToken });
}
}
rd.setBinary(page.getContentStream(), page.getLength());
rd.addField("size", String.valueOf(page.getLength()));
rd.addField("url", documentURI);
/* Ingest document */
activities.ingestDocumentWithException(manifoldDocumentIdentifier, lastVersion, documentURI, rd);
return new ProcessResult(page.getLength(), null, null);
}
Aggregations