use of org.apache.http.impl.DefaultHttpResponseFactory in project commoncrawl-examples by commoncrawl.
the class ArcRecord method getHttpResponse.
/**
* <p>Returns an HTTP response object parsed from the ARC record payload.<p>
* <p>Note: The payload is parsed on-demand, but is only parsed once. The
* parsed data is saved for subsequent calls.</p>
*
* @return The ARC record payload as an HTTP response object. See the Apache
* HttpComponents project.
*/
public HttpResponse getHttpResponse() throws IOException, HttpException {
if (this._httpResponse != null)
return this._httpResponse;
if (this._payload == null) {
LOG.error("Unable to parse HTTP response: Payload has not been set");
return null;
}
if (this._url != null && !this._url.startsWith("http://") && !this._url.startsWith("https://")) {
LOG.error("Unable to parse HTTP response: URL protocol is not HTTP");
return null;
}
this._httpResponse = null;
// Find where the HTTP headers stop
int end = this._searchForCRLFCRLF(this._payload);
if (end == -1) {
LOG.error("Unable to parse HTTP response: End of HTTP headers not found");
return null;
}
// Parse the HTTP status line and headers
DefaultHttpResponseParser parser = new DefaultHttpResponseParser(new ByteArraySessionInputBuffer(this._payload, 0, end), new BasicLineParser(), new DefaultHttpResponseFactory(), new BasicHttpParams());
this._httpResponse = parser.parse();
if (this._httpResponse == null) {
LOG.error("Unable to parse HTTP response");
return null;
}
// Set the reset of the payload as the HTTP entity. Use an InputStreamEntity
// to avoid a memory copy.
InputStreamEntity entity = new InputStreamEntity(new ByteArrayInputStream(this._payload, end, this._payload.length - end), this._payload.length - end);
entity.setContentType(this._httpResponse.getFirstHeader("Content-Type"));
entity.setContentEncoding(this._httpResponse.getFirstHeader("Content-Encoding"));
this._httpResponse.setEntity(entity);
return this._httpResponse;
}
Aggregations