Search in sources :

Example 6 with HttpHead

use of org.apache.http.client.methods.HttpHead in project crawler4j by yasserg.

the class PageFetcherHtmlOnly method fetchPage.

@Override
public PageFetchResult fetchPage(WebURL webUrl) throws InterruptedException, IOException, PageBiggerThanMaxSizeException {
    String toFetchURL = webUrl.getURL();
    PageFetchResult fetchResult = new PageFetchResult();
    HttpHead head = null;
    try {
        head = new HttpHead(toFetchURL);
        synchronized (mutex) {
            long now = new Date().getTime();
            if (now - this.lastFetchTime < getConfig().getPolitenessDelay()) {
                Thread.sleep(getConfig().getPolitenessDelay() - (now - this.lastFetchTime));
            }
            this.lastFetchTime = new Date().getTime();
        }
        HttpResponse response = httpClient.execute(head);
        fetchResult.setEntity(response.getEntity());
        fetchResult.setResponseHeaders(response.getAllHeaders());
        fetchResult.setFetchedUrl(toFetchURL);
        fetchResult.setStatusCode(response.getStatusLine().getStatusCode());
        String contentType = response.containsHeader("Content-Type") ? response.getFirstHeader("Content-Type").getValue() : null;
        String typeStr = (contentType != null) ? contentType.toLowerCase() : "";
        if (typeStr.equals("") || (typeStr.contains("text") && typeStr.contains("html"))) {
            return super.fetchPage(webUrl);
        } else {
            return fetchResult;
        }
    } finally {
        if (head != null) {
            head.abort();
        }
    }
}
Also used : PageFetchResult(edu.uci.ics.crawler4j.fetcher.PageFetchResult) HttpResponse(org.apache.http.HttpResponse) HttpHead(org.apache.http.client.methods.HttpHead) Date(java.util.Date)

Example 7 with HttpHead

use of org.apache.http.client.methods.HttpHead in project nifi by apache.

the class PostHTTP method getDestinationAcceptance.

private DestinationAccepts getDestinationAcceptance(final boolean sendAsFlowFile, final HttpClient client, final String uri, final ComponentLog logger, final String transactionId) throws IOException {
    final HttpHead head = new HttpHead(uri);
    if (sendAsFlowFile) {
        head.addHeader(TRANSACTION_ID_HEADER, transactionId);
    }
    final HttpResponse response = client.execute(head);
    // we assume that the destination can support FlowFile v1 always when the processor is also configured to send as a FlowFile
    // otherwise, we do not bother to make any determinations concerning this compatibility
    final boolean acceptsFlowFileV1 = sendAsFlowFile;
    boolean acceptsFlowFileV2 = false;
    boolean acceptsFlowFileV3 = false;
    boolean acceptsGzip = false;
    Integer protocolVersion = null;
    final int statusCode = response.getStatusLine().getStatusCode();
    if (statusCode == Status.METHOD_NOT_ALLOWED.getStatusCode()) {
        return new DestinationAccepts(acceptsFlowFileV3, acceptsFlowFileV2, acceptsFlowFileV1, false, null);
    } else if (statusCode == Status.OK.getStatusCode()) {
        Header[] headers = response.getHeaders(ACCEPT);
        // If configured to send as a flowfile, determine the capabilities of the endpoint
        if (sendAsFlowFile) {
            if (headers != null) {
                for (final Header header : headers) {
                    for (final String accepted : header.getValue().split(",")) {
                        final String trimmed = accepted.trim();
                        if (trimmed.equals(APPLICATION_FLOW_FILE_V3)) {
                            acceptsFlowFileV3 = true;
                        } else if (trimmed.equals(APPLICATION_FLOW_FILE_V2)) {
                            acceptsFlowFileV2 = true;
                        }
                    }
                }
            }
            final Header destinationVersion = response.getFirstHeader(PROTOCOL_VERSION_HEADER);
            if (destinationVersion != null) {
                try {
                    protocolVersion = Integer.valueOf(destinationVersion.getValue());
                } catch (final NumberFormatException e) {
                // nothing to do here really.... it's an invalid value, so treat the same as if not specified
                }
            }
            if (acceptsFlowFileV3) {
                logger.debug("Connection to URI " + uri + " will be using Content Type " + APPLICATION_FLOW_FILE_V3 + " if sending data as FlowFile");
            } else if (acceptsFlowFileV2) {
                logger.debug("Connection to URI " + uri + " will be using Content Type " + APPLICATION_FLOW_FILE_V2 + " if sending data as FlowFile");
            } else if (acceptsFlowFileV1) {
                logger.debug("Connection to URI " + uri + " will be using Content Type " + APPLICATION_FLOW_FILE_V1 + " if sending data as FlowFile");
            }
        }
        headers = response.getHeaders(ACCEPT_ENCODING);
        if (headers != null) {
            for (final Header header : headers) {
                for (final String accepted : header.getValue().split(",")) {
                    if (accepted.equalsIgnoreCase("gzip")) {
                        acceptsGzip = true;
                    }
                }
            }
        }
        if (acceptsGzip) {
            logger.debug("Connection to URI " + uri + " indicates that inline GZIP compression is supported");
        } else {
            logger.debug("Connection to URI " + uri + " indicates that it does NOT support inline GZIP compression");
        }
        return new DestinationAccepts(acceptsFlowFileV3, acceptsFlowFileV2, acceptsFlowFileV1, acceptsGzip, protocolVersion);
    } else {
        logger.warn("Unable to communicate with destination; when attempting to perform an HTTP HEAD, got unexpected response code of " + statusCode + ": " + response.getStatusLine().getReasonPhrase());
        return new DestinationAccepts(false, false, false, false, null);
    }
}
Also used : Header(org.apache.http.Header) HttpResponse(org.apache.http.HttpResponse) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) HttpHead(org.apache.http.client.methods.HttpHead)

Example 8 with HttpHead

use of org.apache.http.client.methods.HttpHead in project groovity by disney.

the class HttpGroovitySourceLocator method getHttpGroovityScriptSource.

protected HttpGroovitySource getHttpGroovityScriptSource(URI uri) throws IOException {
    HttpHead headReq = new HttpHead(uri);
    CloseableHttpResponse response = this.client.execute(headReq);
    try {
        if (log.isDebugEnabled()) {
            log.debug("Issued head request for " + uri + ", got " + response.getStatusLine());
        }
        long lastModified = System.currentTimeMillis();
        Header header = response.getFirstHeader("Last-Modified");
        if (header != null) {
            lastModified = DateUtils.parseDate(header.getValue()).getTime();
        }
        String path = uri.getPath().substring(this.base_uri.getPath().length() - 1);
        if (log.isDebugEnabled()) {
            log.debug("getHttpGroovityScriptSource: path=" + path);
        }
        return new HttpGroovitySource(uri, path, lastModified, this);
    } finally {
        response.close();
    }
}
Also used : Header(org.apache.http.Header) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) HttpHead(org.apache.http.client.methods.HttpHead)

Example 9 with HttpHead

use of org.apache.http.client.methods.HttpHead in project knox by apache.

the class DefaultDispatch method doHead.

@Override
public void doHead(URI url, HttpServletRequest request, HttpServletResponse response) throws IOException, URISyntaxException {
    final HttpHead method = new HttpHead(url);
    copyRequestHeaderFields(method, request);
    executeRequest(method, request, response);
}
Also used : HttpHead(org.apache.http.client.methods.HttpHead)

Example 10 with HttpHead

use of org.apache.http.client.methods.HttpHead in project tutorials by eugenp.

the class HttpClientUnshortenLiveTest method expandSingleLevelSafe.

private Pair<Integer, String> expandSingleLevelSafe(final String url) throws IOException {
    HttpHead request = null;
    HttpEntity httpEntity = null;
    InputStream entityContentStream = null;
    try {
        request = new HttpHead(url);
        final HttpResponse httpResponse = client.execute(request);
        httpEntity = httpResponse.getEntity();
        entityContentStream = httpEntity.getContent();
        final int statusCode = httpResponse.getStatusLine().getStatusCode();
        if (statusCode != 301 && statusCode != 302) {
            return new ImmutablePair<>(statusCode, url);
        }
        final Header[] headers = httpResponse.getHeaders(HttpHeaders.LOCATION);
        Preconditions.checkState(headers.length == 1);
        final String newUrl = headers[0].getValue();
        return new ImmutablePair<>(statusCode, newUrl);
    } catch (final IllegalArgumentException uriEx) {
        return new ImmutablePair<>(500, url);
    } finally {
        if (request != null) {
            request.releaseConnection();
        }
        if (entityContentStream != null) {
            entityContentStream.close();
        }
        if (httpEntity != null) {
            EntityUtils.consume(httpEntity);
        }
    }
}
Also used : HttpEntity(org.apache.http.HttpEntity) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) Header(org.apache.http.Header) InputStream(java.io.InputStream) HttpResponse(org.apache.http.HttpResponse) HttpHead(org.apache.http.client.methods.HttpHead)

Aggregations

HttpHead (org.apache.http.client.methods.HttpHead)100 HttpResponse (org.apache.http.HttpResponse)40 HttpGet (org.apache.http.client.methods.HttpGet)28 CloseableHttpResponse (org.apache.http.client.methods.CloseableHttpResponse)25 Test (org.junit.Test)24 IOException (java.io.IOException)23 URI (java.net.URI)22 HttpPut (org.apache.http.client.methods.HttpPut)22 Header (org.apache.http.Header)21 HttpPost (org.apache.http.client.methods.HttpPost)21 CloseableHttpClient (org.apache.http.impl.client.CloseableHttpClient)19 HttpRequestBase (org.apache.http.client.methods.HttpRequestBase)15 HttpDelete (org.apache.http.client.methods.HttpDelete)13 InputStream (java.io.InputStream)12 HttpEntity (org.apache.http.HttpEntity)10 File (java.io.File)9 StringEntity (org.apache.http.entity.StringEntity)9 HttpOptions (org.apache.http.client.methods.HttpOptions)8 URISyntaxException (java.net.URISyntaxException)6 URL (java.net.URL)6