Search in sources :

Example 1 with Page

use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.

the class ConfluenceClient method getPage.

/**
 * <p>
 * Get a Confluence page identified by its id
 * </p>
 *
 * @param pageId
 *          the page id
 * @return the Confluence page
 * @throws Exception
 */
public Page getPage(final String pageId) throws Exception {
    String url = String.format(Locale.ROOT, "%s://%s:%s%s%s/%s?%s", protocol, host, port, path, CONTENT_PATH, pageId, EXPANDABLE_PARAMETERS);
    url = sanitizeUrl(url);
    logger.debug("[Processing] Hitting url for getting document content : {}", url);
    final HttpGet httpGet = createGetRequest(url);
    try (CloseableHttpResponse response = executeRequest(httpGet)) {
        final HttpEntity entity = response.getEntity();
        final MutablePage page = pageFromHttpEntity(entity);
        EntityUtils.consume(entity);
        final List<Label> labels = getLabels(pageId);
        page.setLabels(labels);
        return page;
    } catch (final Exception e) {
        logger.error("[Processing] Failed to get page {}. Error: {}", url, e.getMessage());
        throw e;
    }
}
Also used : HttpEntity(org.apache.http.HttpEntity) MutablePage(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.MutablePage) HttpGet(org.apache.http.client.methods.HttpGet) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) Label(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Label) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException) ConfluenceException(org.apache.manifoldcf.crawler.connectors.confluence.v6.exception.ConfluenceException) IOException(java.io.IOException)

Example 2 with Page

use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.

the class ConfluenceClient method pageFromHttpEntity.

/**
 * <p>
 * Creates a Confluence page object from the given entity returned by the server
 * </p>
 *
 * @param entity
 *          the {@code HttpEntity} to create the {@code MutablePage} from
 * @return the Confluence page instance
 * @throws Exception
 */
private MutablePage pageFromHttpEntity(final HttpEntity entity) throws Exception {
    final String stringEntity = EntityUtils.toString(entity, "UTF-8");
    final JSONParser parser = new JSONParser();
    final JSONObject responseObject = (JSONObject) parser.parse(new StringReader(stringEntity));
    @SuppressWarnings("unchecked") final MutablePage response = ((ConfluenceResourceBuilder<MutablePage>) MutablePage.builder()).fromJson(responseObject, new MutablePage());
    return response;
}
Also used : JSONObject(org.json.simple.JSONObject) MutablePage(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.MutablePage) StringReader(java.io.StringReader) ConfluenceResourceBuilder(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.builder.ConfluenceResourceBuilder) JSONParser(org.json.simple.parser.JSONParser)

Example 3 with Page

use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.

the class ConfluenceClient method getConfluenceRestrictionsResources.

/**
 * <p>
 * Get the {@code ConfluenceResources} from the given url
 * </p>
 *
 * @param url
 *          The url identifying the REST resource to get the documents
 * @param builder
 *          The builder used to build the resources contained in the response
 * @return a {@code ConfluenceRestrictionsResponse} containing the page results
 * @throws Exception
 */
private ConfluenceRestrictionsResponse<? extends ConfluenceResource> getConfluenceRestrictionsResources(final String url, final ConfluenceResourceBuilder<? extends ConfluenceResource> builder) throws Exception {
    logger.debug("[Processing] Hitting url for get confluence resources: {}", sanitizeUrl(url));
    final HttpGet httpGet = createGetRequest(url);
    try (CloseableHttpResponse response = executeRequest(httpGet)) {
        final ConfluenceRestrictionsResponse<? extends ConfluenceResource> confluenceResponse = restrictionsResponseFromHttpEntity(response.getEntity(), builder);
        EntityUtils.consume(response.getEntity());
        return confluenceResponse;
    } catch (final IOException e) {
        logger.error("[Processing] Failed to get page(s)", e);
        throw new Exception("Confluence appears to be down", e);
    }
}
Also used : HttpGet(org.apache.http.client.methods.HttpGet) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) IOException(java.io.IOException) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException) ConfluenceException(org.apache.manifoldcf.crawler.connectors.confluence.v6.exception.ConfluenceException) IOException(java.io.IOException)

Example 4 with Page

use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.

the class ConfluenceClient method executeRequest.

/**
 * <p>
 * Execute the given {@code HttpUriRequest} using the configured client
 * </p>
 *
 * @param request
 *          the {@code HttpUriRequest} to be executed
 * @return the {@code HttpResponse} object returned from the server
 * @throws Exception
 */
private CloseableHttpResponse executeRequest(final HttpUriRequest request) throws Exception {
    final String url = request.getURI().toString();
    logger.debug("[Processing] Hitting url for getting document content : {}", url);
    CloseableHttpResponse response = null;
    try {
        response = httpClient.execute(request, httpContext);
        if (response.getStatusLine().getStatusCode() != 200) {
            final String errorDesc = response.getStatusLine().getStatusCode() + " " + response.getStatusLine().getReasonPhrase();
            response.close();
            throw new Exception("Confluence error. " + errorDesc);
        }
        return response;
    } catch (final Exception e) {
        if (response != null) {
            response.close();
        }
        logger.error("[Processing] Failed to get page {}. Error: {}", url, e.getMessage());
        throw e;
    }
}
Also used : CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException) ConfluenceException(org.apache.manifoldcf.crawler.connectors.confluence.v6.exception.ConfluenceException) IOException(java.io.IOException)

Example 5 with Page

use of org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page in project manifoldcf by apache.

the class ConfluenceRepositoryConnector method processSeedAttachments.

/**
 * <p>
 * Process seed attachments for the given page
 * </p>
 *
 * @param page
 * @param activities
 */
private void processSeedAttachments(final Page page, final ISeedingActivity activities) throws ManifoldCFException, ServiceInterruption {
    long lastStart = 0;
    final long defaultSize = 50;
    if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) {
        Logging.connectors.debug(new MessageFormat("Processing page {} attachments starting from {} and size {}", Locale.ROOT).format(new Object[] { page.getId(), lastStart, defaultSize }));
    }
    try {
        Boolean isLast = true;
        do {
            final ConfluenceResponse<Attachment> response = confluenceClient.getPageAttachments(page.getId(), (int) lastStart, (int) defaultSize);
            int count = 0;
            for (final Page resultPage : response.getResults()) {
                activities.addSeedDocument(ConfluenceUtil.generateRepositoryDocumentIdentifier(resultPage.getId(), page.getId()));
                count++;
            }
            if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) {
                Logging.connectors.debug(new MessageFormat("Fetched and added {} seed document attachments for page {}", Locale.ROOT).format(new Object[] { new Integer(count), page.getId() }));
            }
            lastStart += count;
            isLast = response.isLast();
            if (Logging.connectors != null && Logging.connectors.isDebugEnabled()) {
                Logging.connectors.debug(new MessageFormat("New start {0} and size {1}", Locale.ROOT).format(new Object[] { lastStart, defaultSize }));
            }
        } while (!isLast);
    } catch (final Exception e) {
        handleConfluenceDownException(e, "seeding");
    }
}
Also used : MessageFormat(java.text.MessageFormat) JSONObject(org.json.simple.JSONObject) Attachment(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Attachment) Page(org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page) InterruptedIOException(java.io.InterruptedIOException) ParseException(org.json.simple.parser.ParseException) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException) IOException(java.io.IOException)

Aggregations

IOException (java.io.IOException)14 Page (model.Page)9 ManifoldCFException (org.apache.manifoldcf.core.interfaces.ManifoldCFException)9 List (java.util.List)5 ServletException (javax.servlet.ServletException)5 CloseableHttpResponse (org.apache.http.client.methods.CloseableHttpResponse)5 ConfluenceException (org.apache.manifoldcf.crawler.connectors.confluence.v6.exception.ConfluenceException)5 JSONObject (org.json.simple.JSONObject)5 InterruptedIOException (java.io.InterruptedIOException)4 SQLException (java.sql.SQLException)4 HttpGet (org.apache.http.client.methods.HttpGet)4 Page (org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Page)4 ParseException (org.json.simple.parser.ParseException)4 MessageFormat (java.text.MessageFormat)3 ArrayList (java.util.ArrayList)2 HttpEntity (org.apache.http.HttpEntity)2 Attachment (org.apache.manifoldcf.crawler.connectors.confluence.v6.model.Attachment)2 MutablePage (org.apache.manifoldcf.crawler.connectors.confluence.v6.model.MutablePage)2 StringReader (java.io.StringReader)1 DateFormat (java.text.DateFormat)1