Search in sources :

Example 1 with DefaultHttpResponseParser

use of org.apache.http.impl.io.DefaultHttpResponseParser in project commoncrawl-examples by commoncrawl.

the class ArcRecord method getHttpResponse.

/**
   * <p>Returns an HTTP response object parsed from the ARC record payload.<p>
   * <p>Note: The payload is parsed on-demand, but is only parsed once.  The
   * parsed data is saved for subsequent calls.</p>
   *
   * @return The ARC record payload as an HTTP response object.  See the Apache
   * HttpComponents project.
   */
public HttpResponse getHttpResponse() throws IOException, HttpException {
    if (this._httpResponse != null)
        return this._httpResponse;
    if (this._payload == null) {
        LOG.error("Unable to parse HTTP response: Payload has not been set");
        return null;
    }
    if (this._url != null && !this._url.startsWith("http://") && !this._url.startsWith("https://")) {
        LOG.error("Unable to parse HTTP response: URL protocol is not HTTP");
        return null;
    }
    this._httpResponse = null;
    // Find where the HTTP headers stop
    int end = this._searchForCRLFCRLF(this._payload);
    if (end == -1) {
        LOG.error("Unable to parse HTTP response: End of HTTP headers not found");
        return null;
    }
    // Parse the HTTP status line and headers
    DefaultHttpResponseParser parser = new DefaultHttpResponseParser(new ByteArraySessionInputBuffer(this._payload, 0, end), new BasicLineParser(), new DefaultHttpResponseFactory(), new BasicHttpParams());
    this._httpResponse = parser.parse();
    if (this._httpResponse == null) {
        LOG.error("Unable to parse HTTP response");
        return null;
    }
    // Set the reset of the payload as the HTTP entity.  Use an InputStreamEntity
    // to avoid a memory copy.
    InputStreamEntity entity = new InputStreamEntity(new ByteArrayInputStream(this._payload, end, this._payload.length - end), this._payload.length - end);
    entity.setContentType(this._httpResponse.getFirstHeader("Content-Type"));
    entity.setContentEncoding(this._httpResponse.getFirstHeader("Content-Encoding"));
    this._httpResponse.setEntity(entity);
    return this._httpResponse;
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DefaultHttpResponseParser(org.apache.http.impl.io.DefaultHttpResponseParser) DefaultHttpResponseFactory(org.apache.http.impl.DefaultHttpResponseFactory) BasicLineParser(org.apache.http.message.BasicLineParser) BasicHttpParams(org.apache.http.params.BasicHttpParams) InputStreamEntity(org.apache.http.entity.InputStreamEntity)

Example 2 with DefaultHttpResponseParser

use of org.apache.http.impl.io.DefaultHttpResponseParser in project hadoop-pcap by RIPE-NCC.

the class HttpPcapReader method processPacketPayload.

@Override
protected void processPacketPayload(Packet packet, final byte[] payload) {
    String protocol = (String) packet.get(Packet.PROTOCOL);
    if (!PcapReader.PROTOCOL_TCP.equals(protocol))
        return;
    HttpPacket httpPacket = (HttpPacket) packet;
    Integer srcPort = (Integer) packet.get(Packet.SRC_PORT);
    Integer dstPort = (Integer) packet.get(Packet.DST_PORT);
    if ((HTTP_PORT == srcPort || HTTP_PORT == dstPort) && packet.containsKey(Packet.REASSEMBLED_TCP_FRAGMENTS)) {
        final SessionInputBuffer inBuf = new AbstractSessionInputBuffer() {

            {
                init(new ByteArrayInputStream(payload), 1024, params);
            }

            @Override
            public boolean isDataAvailable(int timeout) throws IOException {
                return true;
            }
        };
        final SessionOutputBuffer outBuf = new AbstractSessionOutputBuffer() {
        };
        if (HTTP_PORT == srcPort) {
            HttpMessageParser<HttpResponse> parser = new DefaultHttpResponseParser(inBuf, null, respFactory, params);
            HttpClientConnection conn = new DefaultClientConnection() {

                {
                    init(inBuf, outBuf, params);
                }

                @Override
                protected void assertNotOpen() {
                }

                @Override
                protected void assertOpen() {
                }
            };
            try {
                HttpResponse response = parser.parse();
                conn.receiveResponseEntity(response);
                propagateHeaders(httpPacket, response.getAllHeaders());
            } catch (IOException e) {
                LOG.error("IOException when decoding HTTP response", e);
            } catch (HttpException e) {
                LOG.error("HttpException when decoding HTTP response", e);
            }
        } else if (HTTP_PORT == dstPort) {
            HttpMessageParser<HttpRequest> parser = new DefaultHttpRequestParser(inBuf, null, reqFactory, params);
            try {
                HttpRequest request = parser.parse();
                propagateHeaders(httpPacket, request.getAllHeaders());
            } catch (IOException e) {
                LOG.error("IOException when decoding HTTP request", e);
            } catch (HttpException e) {
                LOG.error("HttpException when decoding HTTP request", e);
            }
        }
    }
}
Also used : HttpRequest(org.apache.http.HttpRequest) SessionInputBuffer(org.apache.http.io.SessionInputBuffer) AbstractSessionInputBuffer(org.apache.http.impl.io.AbstractSessionInputBuffer) DefaultClientConnection(org.apache.http.impl.conn.DefaultClientConnection) HttpPacket(net.ripe.hadoop.pcap.packet.HttpPacket) HttpClientConnection(org.apache.http.HttpClientConnection) HttpMessageParser(org.apache.http.io.HttpMessageParser) HttpResponse(org.apache.http.HttpResponse) AbstractSessionInputBuffer(org.apache.http.impl.io.AbstractSessionInputBuffer) IOException(java.io.IOException) SessionOutputBuffer(org.apache.http.io.SessionOutputBuffer) AbstractSessionOutputBuffer(org.apache.http.impl.io.AbstractSessionOutputBuffer) ByteArrayInputStream(java.io.ByteArrayInputStream) DefaultHttpResponseParser(org.apache.http.impl.io.DefaultHttpResponseParser) AbstractSessionOutputBuffer(org.apache.http.impl.io.AbstractSessionOutputBuffer) HttpException(org.apache.http.HttpException) DefaultHttpRequestParser(org.apache.http.impl.io.DefaultHttpRequestParser)

Aggregations

ByteArrayInputStream (java.io.ByteArrayInputStream)2 DefaultHttpResponseParser (org.apache.http.impl.io.DefaultHttpResponseParser)2 IOException (java.io.IOException)1 HttpPacket (net.ripe.hadoop.pcap.packet.HttpPacket)1 HttpClientConnection (org.apache.http.HttpClientConnection)1 HttpException (org.apache.http.HttpException)1 HttpRequest (org.apache.http.HttpRequest)1 HttpResponse (org.apache.http.HttpResponse)1 InputStreamEntity (org.apache.http.entity.InputStreamEntity)1 DefaultHttpResponseFactory (org.apache.http.impl.DefaultHttpResponseFactory)1 DefaultClientConnection (org.apache.http.impl.conn.DefaultClientConnection)1 AbstractSessionInputBuffer (org.apache.http.impl.io.AbstractSessionInputBuffer)1 AbstractSessionOutputBuffer (org.apache.http.impl.io.AbstractSessionOutputBuffer)1 DefaultHttpRequestParser (org.apache.http.impl.io.DefaultHttpRequestParser)1 HttpMessageParser (org.apache.http.io.HttpMessageParser)1 SessionInputBuffer (org.apache.http.io.SessionInputBuffer)1 SessionOutputBuffer (org.apache.http.io.SessionOutputBuffer)1 BasicLineParser (org.apache.http.message.BasicLineParser)1 BasicHttpParams (org.apache.http.params.BasicHttpParams)1