use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.
the class ConfluenceClient method getPage.
/**
* <p>
* Get a Confluence page identified by its id
* </p>
*
* @param pageId
* the page id
* @return the Confluence page
* @throws Exception
*/
public Page getPage(final String pageId) throws Exception {
String url = String.format(Locale.ROOT, "%s://%s:%s%s%s/%s?%s", protocol, host, port, path, CONTENT_PATH, pageId, EXPANDABLE_PARAMETERS);
url = sanitizeUrl(url);
logger.debug("[Processing] Hitting url for getting document content : {}", url);
final HttpGet httpGet = createGetRequest(url);
try (CloseableHttpResponse response = executeRequest(httpGet)) {
final HttpEntity entity = response.getEntity();
final MutablePage page = pageFromHttpEntity(entity);
EntityUtils.consume(entity);
final List<Label> labels = getLabels(pageId);
page.setLabels(labels);
return page;
} catch (final Exception e) {
logger.error("[Processing] Failed to get page {}. Error: {}", url, e.getMessage());
throw e;
}
}
use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.
the class ConfluenceClient method pageFromHttpEntity.
/**
* <p>
* Creates a Confluence page object from the given entity returned by the server
* </p>
*
* @param entity
* the {@code HttpEntity} to create the {@code MutablePage} from
* @return the Confluence page instance
* @throws Exception
*/
private MutablePage pageFromHttpEntity(final HttpEntity entity) throws Exception {
final String stringEntity = EntityUtils.toString(entity, "UTF-8");
final JSONParser parser = new JSONParser();
final JSONObject responseObject = (JSONObject) parser.parse(new StringReader(stringEntity));
@SuppressWarnings("unchecked") final MutablePage response = ((ConfluenceResourceBuilder<MutablePage>) MutablePage.builder()).fromJson(responseObject, new MutablePage());
return response;
}
use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.
the class ConfluenceClient method getConfluenceRestrictionsResources.
/**
* <p>
* Get the {@code ConfluenceResources} from the given url
* </p>
*
* @param url
* The url identifying the REST resource to get the documents
* @param builder
* The builder used to build the resources contained in the response
* @return a {@code ConfluenceRestrictionsResponse} containing the page results
* @throws Exception
*/
private ConfluenceRestrictionsResponse<? extends ConfluenceResource> getConfluenceRestrictionsResources(final String url, final ConfluenceResourceBuilder<? extends ConfluenceResource> builder) throws Exception {
logger.debug("[Processing] Hitting url for get confluence resources: {}", sanitizeUrl(url));
final HttpGet httpGet = createGetRequest(url);
try (CloseableHttpResponse response = executeRequest(httpGet)) {
final ConfluenceRestrictionsResponse<? extends ConfluenceResource> confluenceResponse = restrictionsResponseFromHttpEntity(response.getEntity(), builder);
EntityUtils.consume(response.getEntity());
return confluenceResponse;
} catch (final IOException e) {
logger.error("[Processing] Failed to get page(s)", e);
throw new Exception("Confluence appears to be down", e);
}
}
use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.
the class ConfluenceClient method executeRequest.
/**
* <p>
* Execute the given {@code HttpUriRequest} using the configured client
* </p>
*
* @param request
* the {@code HttpUriRequest} to be executed
* @return the {@code HttpResponse} object returned from the server
* @throws Exception
*/
private CloseableHttpResponse executeRequest(final HttpUriRequest request) throws Exception {
final String url = request.getURI().toString();
logger.debug("[Processing] Hitting url for getting document content : {}", url);
CloseableHttpResponse response = null;
try {
response = httpClient.execute(request, httpContext);
if (response.getStatusLine().getStatusCode() != 200) {
final String errorDesc = response.getStatusLine().getStatusCode() + " " + response.getStatusLine().getReasonPhrase();
response.close();
throw new Exception("Confluence error. " + errorDesc);
}
return response;
} catch (final Exception e) {
if (response != null) {
response.close();
}
logger.error("[Processing] Failed to get page {}. Error: {}", url, e.getMessage());
throw e;
}
}
use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.
the class ConfluenceRepositoryConnector method processSeedAttachments.
/**
* <p>
* Process seed attachments for the given page
* </p>
*
* @param page
* @param activities
*/
private void processSeedAttachments(final Page page, final ISeedingActivity activities) throws ManifoldCFException, ServiceInterruption {
long lastStart = 0;
final long defaultSize = 50;
if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) {
Logging.connectors.debug(new MessageFormat("Processing page {} attachments starting from {} and size {}", Locale.ROOT).format(new Object[] { page.getId(), lastStart, defaultSize }));
}
try {
Boolean isLast = true;
do {
final ConfluenceResponse<Attachment> response = confluenceClient.getPageAttachments(page.getId(), (int) lastStart, (int) defaultSize);
int count = 0;
for (final Page resultPage : response.getResults()) {
activities.addSeedDocument(ConfluenceUtil.generateRepositoryDocumentIdentifier(resultPage.getId(), page.getId()));
count++;
}
if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) {
Logging.connectors.debug(new MessageFormat("Fetched and added {} seed document attachments for page {}", Locale.ROOT).format(new Object[] { new Integer(count), page.getId() }));
}
lastStart += count;
isLast = response.isLast();
if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) {
Logging.connectors.debug(new MessageFormat("New start {0} and size {1}", Locale.ROOT).format(new Object[] { lastStart, defaultSize }));
}
} while (!isLast);
} catch (final Exception e) {
handleConfluenceDownException(e, "seeding");
}
}
Aggregations