use of com.twinsoft.convertigo.beans.extractionrules.siteclipper.IRequestRule in project convertigo by convertigo.
the class SiteClipperScreenClass method getRequestRules.
public List<IRequestRule> getRequestRules() {
List<ExtractionRule> rules = getExtractionRules();
List<IRequestRule> requestRules = new ArrayList<IRequestRule>(rules.size());
for (ExtractionRule rule : rules) {
if (rule instanceof IRequestRule) {
requestRules.add((IRequestRule) rule);
}
}
return requestRules;
}
use of com.twinsoft.convertigo.beans.extractionrules.siteclipper.IRequestRule in project convertigo by convertigo.
the class SiteClipperConnector method doProcessRequest.
private void doProcessRequest(Shuttle shuttle) throws IOException, ServletException, EngineException {
shuttle.statisticsTaskID = context.statistics.start(EngineStatistics.GET_DOCUMENT);
try {
shuttle.sharedScope = context.getSharedScope();
String domain = shuttle.getRequest(QueryPart.host) + shuttle.getRequest(QueryPart.port);
Engine.logSiteClipper.trace("(SiteClipperConnector) Prepare the request for the domain " + domain);
if (!shouldRewrite(domain)) {
Engine.logSiteClipper.info("(SiteClipperConnector) The domain " + domain + " is not allowed with this connector");
shuttle.response.sendError(HttpServletResponse.SC_FORBIDDEN, "The domain " + domain + " is not allowed with this connector");
return;
}
String uri = shuttle.getRequest(QueryPart.uri);
Engine.logSiteClipper.info("Preparing " + shuttle.request.getMethod() + " " + shuttle.getRequestUrl());
HttpMethod httpMethod = null;
XulRecorder xulRecorder = context.getXulRecorder();
if (xulRecorder != null) {
httpMethod = shuttle.httpMethod = xulRecorder.getRecord(shuttle.getRequestUrlAndQuery());
}
if (httpMethod == null) {
try {
switch(shuttle.getRequestHttpMethodType()) {
case GET:
httpMethod = new GetMethod(uri);
break;
case POST:
httpMethod = new PostMethod(uri);
((PostMethod) httpMethod).setRequestEntity(new InputStreamRequestEntity(shuttle.request.getInputStream()));
break;
case PUT:
httpMethod = new PutMethod(uri);
((PutMethod) httpMethod).setRequestEntity(new InputStreamRequestEntity(shuttle.request.getInputStream()));
break;
case DELETE:
httpMethod = new DeleteMethod(uri);
break;
case HEAD:
httpMethod = new HeadMethod(uri);
break;
case OPTIONS:
httpMethod = new OptionsMethod(uri);
break;
case TRACE:
httpMethod = new TraceMethod(uri);
break;
default:
throw new ServletException("(SiteClipperConnector) unknown http method " + shuttle.request.getMethod());
}
httpMethod.setFollowRedirects(false);
} catch (Exception e) {
throw new ServletException("(SiteClipperConnector) unexpected exception will building the http method : " + e.getMessage());
}
shuttle.httpMethod = httpMethod;
SiteClipperScreenClass screenClass = getCurrentScreenClass();
Engine.logSiteClipper.info("Request screen class: " + screenClass.getName());
for (String name : Collections.list(GenericUtils.<Enumeration<String>>cast(shuttle.request.getHeaderNames()))) {
if (requestHeadersToIgnore.contains(HeaderName.parse(name))) {
Engine.logSiteClipper.trace("(SiteClipperConnector) Ignoring request header " + name);
} else {
String value = shuttle.request.getHeader(name);
Engine.logSiteClipper.trace("(SiteClipperConnector) Copying request header " + name + "=" + value);
shuttle.setRequestCustomHeader(name, value);
}
}
Engine.logSiteClipper.debug("(SiteClipperConnector) applying request rules for the screenclass " + screenClass.getName());
for (IRequestRule rule : screenClass.getRequestRules()) {
if (rule.isEnabled()) {
Engine.logSiteClipper.trace("(SiteClipperConnector) applying request rule " + rule.getName());
rule.fireEvents();
boolean done = rule.applyOnRequest(shuttle);
Engine.logSiteClipper.debug("(SiteClipperConnector) the request rule " + rule.getName() + " is " + (done ? "well" : "not") + " applied");
} else {
Engine.logSiteClipper.trace("(SiteClipperConnector) skip the disabled request rule " + rule.getName());
}
}
for (Entry<String, String> header : shuttle.requestCustomHeaders.entrySet()) {
Engine.logSiteClipper.trace("(SiteClipperConnector) Push request header " + header.getKey() + "=" + header.getValue());
httpMethod.addRequestHeader(header.getKey(), header.getValue());
}
String queryString = shuttle.request.getQueryString();
if (queryString != null) {
try {
// Fake test in order to check query string validity
new URI("http://localhost/index?" + queryString, true, httpMethod.getParams().getUriCharset());
} catch (URIException e) {
// Bugfix #2103
StringBuffer newQuery = new StringBuffer();
for (String part : RegexpUtils.pattern_and.split(queryString)) {
String[] pair = RegexpUtils.pattern_equals.split(part, 2);
try {
newQuery.append('&').append(URLEncoder.encode(URLDecoder.decode(pair[0], "UTF-8"), "UTF-8"));
if (pair.length > 1) {
newQuery.append('=').append(URLEncoder.encode(URLDecoder.decode(pair[1], "UTF-8"), "UTF-8"));
}
} catch (UnsupportedEncodingException ee) {
Engine.logSiteClipper.trace("(SiteClipperConnector) failed to encode query part : " + part);
}
}
queryString = newQuery.length() > 0 ? newQuery.substring(1) : newQuery.toString();
Engine.logSiteClipper.trace("(SiteClipperConnector) re-encode query : " + queryString);
}
}
Engine.logSiteClipper.debug("(SiteClipperConnector) Copying the query string : " + queryString);
httpMethod.setQueryString(queryString);
// if (context.httpState == null) {
// Engine.logSiteClipper.debug("(SiteClipperConnector) Creating new HttpState for context id " + context.contextID);
// context.httpState = new HttpState();
// } else {
// Engine.logSiteClipper.debug("(SiteClipperConnector) Using HttpState of context id " + context.contextID);
// }
getHttpState(shuttle);
HostConfiguration hostConfiguration = getHostConfiguration(shuttle);
HttpMethodParams httpMethodParams = httpMethod.getParams();
httpMethodParams.setBooleanParameter("http.connection.stalecheck", true);
httpMethodParams.setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, true));
Engine.logSiteClipper.info("Requesting " + httpMethod.getName() + " " + hostConfiguration.getHostURL() + httpMethod.getURI().toString());
HttpClient httpClient = context.getHttpClient3(shuttle.getHttpPool());
HttpUtils.logCurrentHttpConnection(httpClient, hostConfiguration, shuttle.getHttpPool());
httpClient.executeMethod(hostConfiguration, httpMethod, context.httpState);
} else {
Engine.logSiteClipper.info("Retrieve recorded response from Context");
}
int status = httpMethod.getStatusCode();
shuttle.processState = ProcessState.response;
Engine.logSiteClipper.info("Request terminated with status " + status);
shuttle.response.setStatus(status);
if (Engine.isStudioMode() && status == HttpServletResponse.SC_OK && shuttle.getResponseMimeType().startsWith("text/")) {
fireDataChanged(new ConnectorEvent(this, shuttle.getResponseAsString()));
}
SiteClipperScreenClass screenClass = getCurrentScreenClass();
Engine.logSiteClipper.info("Response screen class: " + screenClass.getName());
if (Engine.isStudioMode()) {
Engine.theApp.fireObjectDetected(new EngineEvent(screenClass));
}
for (Header header : httpMethod.getResponseHeaders()) {
String name = header.getName();
if (responseHeadersToIgnore.contains(HeaderName.parse(name))) {
Engine.logSiteClipper.trace("(SiteClipperConnector) Ignoring response header " + name);
} else {
String value = header.getValue();
Engine.logSiteClipper.trace("(SiteClipperConnector) Copying response header " + name + "=" + value);
shuttle.responseCustomHeaders.put(name, value);
}
}
String contentLength = HeaderName.ContentLength.getResponseHeader(httpMethod);
Engine.logSiteClipper.debug("(SiteClipperConnector) applying response rules for the screenclass " + screenClass.getName());
for (IResponseRule rule : screenClass.getResponseRules()) {
if (rule.isEnabled()) {
Engine.logSiteClipper.trace("(SiteClipperConnector) applying response rule " + rule.getName());
rule.fireEvents();
boolean done = rule.applyOnResponse(shuttle);
Engine.logSiteClipper.debug("(SiteClipperConnector) the response rule " + rule.getName() + " is " + (done ? "well" : "not") + " applied");
} else {
Engine.logSiteClipper.trace("(SiteClipperConnector) skip the disabled response rule " + rule.getName());
}
}
for (Entry<String, String> header : shuttle.responseCustomHeaders.entrySet()) {
Engine.logSiteClipper.trace("(SiteClipperConnector) Push request header " + header.getKey() + "=" + header.getValue());
shuttle.response.addHeader(header.getKey(), header.getValue());
}
if (shuttle.postInstructions != null) {
JSONArray instructions = new JSONArray();
for (IClientInstruction instruction : shuttle.postInstructions) {
try {
instructions.put(instruction.getInstruction());
} catch (JSONException e) {
Engine.logSiteClipper.error("(SiteClipperConnector) Failed to add a post instruction due to a JSONException", e);
}
}
String codeToInject = "<script>C8O_postInstructions = " + instructions.toString() + "</script>\n" + "<script src=\"" + shuttle.getRequest(QueryPart.full_convertigo_path) + "/scripts/jquery.min.js\"></script>\n" + "<script src=\"" + shuttle.getRequest(QueryPart.full_convertigo_path) + "/scripts/siteclipper.js\"></script>\n";
String content = shuttle.getResponseAsString();
Matcher matcher = HtmlLocation.head_top.matcher(content);
String newContent = RegexpUtils.inject(matcher, codeToInject);
if (newContent == null) {
matcher = HtmlLocation.body_top.matcher(content);
newContent = RegexpUtils.inject(matcher, codeToInject);
}
if (newContent != null) {
shuttle.setResponseAsString(newContent);
} else {
Engine.logSiteClipper.info("(SiteClipperConnector) Failed to find a head or body tag in the response content");
Engine.logSiteClipper.trace("(SiteClipperConnector) Response content : \"" + content + "\"");
}
}
long nbBytes = 0L;
String responseContentLength = HeaderName.ContentLength.getHeader(shuttle.response);
if (shuttle.responseAsString != null && shuttle.responseAsString.hashCode() != shuttle.responseAsStringOriginal.hashCode()) {
OutputStream os = shuttle.response.getOutputStream();
shuttle.responseAsByte = shuttle.responseAsString.getBytes(shuttle.getResponseCharset());
nbBytes = shuttle.responseAsByte.length;
switch(shuttle.getResponseContentEncoding()) {
case gzip:
os = new GZIPOutputStream(os);
break;
case deflate:
os = new DeflaterOutputStream(os, new Deflater(Deflater.DEFAULT_COMPRESSION | Deflater.DEFAULT_STRATEGY, true));
break;
default:
if (responseContentLength == null) {
HeaderName.ContentLength.setHeader(shuttle.response, "" + nbBytes);
}
break;
}
IOUtils.write(shuttle.responseAsByte, os);
os.close();
} else {
InputStream is;
if (shuttle.responseAsByte == null) {
if (responseContentLength == null && contentLength != null) {
HeaderName.ContentLength.setHeader(shuttle.response, contentLength);
}
is = httpMethod.getResponseBodyAsStream();
} else {
if (responseContentLength == null) {
HeaderName.ContentLength.setHeader(shuttle.response, "" + shuttle.responseAsByte.length);
}
is = new ByteArrayInputStream(shuttle.responseAsByte);
}
if (is != null) {
nbBytes = StreamUtils.copyAutoFlush(is, shuttle.response.getOutputStream());
Engine.logSiteClipper.trace("(SiteClipperConnector) Response body copyied (" + nbBytes + " bytes)");
}
}
shuttle.response.getOutputStream().close();
shuttle.score = getScore(nbBytes);
Engine.logSiteClipper.debug("(SiteClipperConnector) Request terminated with a score of " + shuttle.score);
} finally {
long duration = context.statistics.stop(shuttle.statisticsTaskID);
if (context.requestedObject != null) {
try {
Engine.theApp.billingManager.insertBilling(context, Long.valueOf(duration), Long.valueOf(shuttle.score));
} catch (Exception e) {
Engine.logContext.warn("Unable to insert billing ticket (the billing is thus ignored): [" + e.getClass().getName() + "] " + e.getMessage());
}
}
}
}
Aggregations