Search in sources :

Example 1 with XulRecorder

use of com.twinsoft.convertigo.engine.parsers.XulRecorder in project convertigo by convertigo.

the class SiteClipperConnector method doProcessRequest.

private void doProcessRequest(Shuttle shuttle) throws IOException, ServletException, EngineException {
    shuttle.statisticsTaskID = context.statistics.start(EngineStatistics.GET_DOCUMENT);
    try {
        shuttle.sharedScope = context.getSharedScope();
        String domain = shuttle.getRequest(QueryPart.host) + shuttle.getRequest(QueryPart.port);
        Engine.logSiteClipper.trace("(SiteClipperConnector) Prepare the request for the domain " + domain);
        if (!shouldRewrite(domain)) {
            Engine.logSiteClipper.info("(SiteClipperConnector) The domain " + domain + " is not allowed with this connector");
            shuttle.response.sendError(HttpServletResponse.SC_FORBIDDEN, "The domain " + domain + " is not allowed with this connector");
            return;
        }
        String uri = shuttle.getRequest(QueryPart.uri);
        Engine.logSiteClipper.info("Preparing " + shuttle.request.getMethod() + " " + shuttle.getRequestUrl());
        HttpMethod httpMethod = null;
        XulRecorder xulRecorder = context.getXulRecorder();
        if (xulRecorder != null) {
            httpMethod = shuttle.httpMethod = xulRecorder.getRecord(shuttle.getRequestUrlAndQuery());
        }
        if (httpMethod == null) {
            try {
                switch(shuttle.getRequestHttpMethodType()) {
                    case GET:
                        httpMethod = new GetMethod(uri);
                        break;
                    case POST:
                        httpMethod = new PostMethod(uri);
                        ((PostMethod) httpMethod).setRequestEntity(new InputStreamRequestEntity(shuttle.request.getInputStream()));
                        break;
                    case PUT:
                        httpMethod = new PutMethod(uri);
                        ((PutMethod) httpMethod).setRequestEntity(new InputStreamRequestEntity(shuttle.request.getInputStream()));
                        break;
                    case DELETE:
                        httpMethod = new DeleteMethod(uri);
                        break;
                    case HEAD:
                        httpMethod = new HeadMethod(uri);
                        break;
                    case OPTIONS:
                        httpMethod = new OptionsMethod(uri);
                        break;
                    case TRACE:
                        httpMethod = new TraceMethod(uri);
                        break;
                    default:
                        throw new ServletException("(SiteClipperConnector) unknown http method " + shuttle.request.getMethod());
                }
                httpMethod.setFollowRedirects(false);
            } catch (Exception e) {
                throw new ServletException("(SiteClipperConnector) unexpected exception will building the http method : " + e.getMessage());
            }
            shuttle.httpMethod = httpMethod;
            SiteClipperScreenClass screenClass = getCurrentScreenClass();
            Engine.logSiteClipper.info("Request screen class: " + screenClass.getName());
            for (String name : Collections.list(GenericUtils.<Enumeration<String>>cast(shuttle.request.getHeaderNames()))) {
                if (requestHeadersToIgnore.contains(HeaderName.parse(name))) {
                    Engine.logSiteClipper.trace("(SiteClipperConnector) Ignoring request header " + name);
                } else {
                    String value = shuttle.request.getHeader(name);
                    Engine.logSiteClipper.trace("(SiteClipperConnector) Copying request header " + name + "=" + value);
                    shuttle.setRequestCustomHeader(name, value);
                }
            }
            Engine.logSiteClipper.debug("(SiteClipperConnector) applying request rules for the screenclass " + screenClass.getName());
            for (IRequestRule rule : screenClass.getRequestRules()) {
                if (rule.isEnabled()) {
                    Engine.logSiteClipper.trace("(SiteClipperConnector) applying request rule " + rule.getName());
                    rule.fireEvents();
                    boolean done = rule.applyOnRequest(shuttle);
                    Engine.logSiteClipper.debug("(SiteClipperConnector) the request rule " + rule.getName() + " is " + (done ? "well" : "not") + " applied");
                } else {
                    Engine.logSiteClipper.trace("(SiteClipperConnector) skip the disabled request rule " + rule.getName());
                }
            }
            for (Entry<String, String> header : shuttle.requestCustomHeaders.entrySet()) {
                Engine.logSiteClipper.trace("(SiteClipperConnector) Push request header " + header.getKey() + "=" + header.getValue());
                httpMethod.addRequestHeader(header.getKey(), header.getValue());
            }
            String queryString = shuttle.request.getQueryString();
            if (queryString != null) {
                try {
                    // Fake test in order to check query string validity
                    new URI("http://localhost/index?" + queryString, true, httpMethod.getParams().getUriCharset());
                } catch (URIException e) {
                    // Bugfix #2103
                    StringBuffer newQuery = new StringBuffer();
                    for (String part : RegexpUtils.pattern_and.split(queryString)) {
                        String[] pair = RegexpUtils.pattern_equals.split(part, 2);
                        try {
                            newQuery.append('&').append(URLEncoder.encode(URLDecoder.decode(pair[0], "UTF-8"), "UTF-8"));
                            if (pair.length > 1) {
                                newQuery.append('=').append(URLEncoder.encode(URLDecoder.decode(pair[1], "UTF-8"), "UTF-8"));
                            }
                        } catch (UnsupportedEncodingException ee) {
                            Engine.logSiteClipper.trace("(SiteClipperConnector) failed to encode query part : " + part);
                        }
                    }
                    queryString = newQuery.length() > 0 ? newQuery.substring(1) : newQuery.toString();
                    Engine.logSiteClipper.trace("(SiteClipperConnector) re-encode query : " + queryString);
                }
            }
            Engine.logSiteClipper.debug("(SiteClipperConnector) Copying the query string : " + queryString);
            httpMethod.setQueryString(queryString);
            // if (context.httpState == null) {
            // Engine.logSiteClipper.debug("(SiteClipperConnector) Creating new HttpState for context id " + context.contextID);
            // context.httpState = new HttpState();
            // } else {
            // Engine.logSiteClipper.debug("(SiteClipperConnector) Using HttpState of context id " + context.contextID);
            // }
            getHttpState(shuttle);
            HostConfiguration hostConfiguration = getHostConfiguration(shuttle);
            HttpMethodParams httpMethodParams = httpMethod.getParams();
            httpMethodParams.setBooleanParameter("http.connection.stalecheck", true);
            httpMethodParams.setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, true));
            Engine.logSiteClipper.info("Requesting " + httpMethod.getName() + " " + hostConfiguration.getHostURL() + httpMethod.getURI().toString());
            HttpClient httpClient = context.getHttpClient3(shuttle.getHttpPool());
            HttpUtils.logCurrentHttpConnection(httpClient, hostConfiguration, shuttle.getHttpPool());
            httpClient.executeMethod(hostConfiguration, httpMethod, context.httpState);
        } else {
            Engine.logSiteClipper.info("Retrieve recorded response from Context");
        }
        int status = httpMethod.getStatusCode();
        shuttle.processState = ProcessState.response;
        Engine.logSiteClipper.info("Request terminated with status " + status);
        shuttle.response.setStatus(status);
        if (Engine.isStudioMode() && status == HttpServletResponse.SC_OK && shuttle.getResponseMimeType().startsWith("text/")) {
            fireDataChanged(new ConnectorEvent(this, shuttle.getResponseAsString()));
        }
        SiteClipperScreenClass screenClass = getCurrentScreenClass();
        Engine.logSiteClipper.info("Response screen class: " + screenClass.getName());
        if (Engine.isStudioMode()) {
            Engine.theApp.fireObjectDetected(new EngineEvent(screenClass));
        }
        for (Header header : httpMethod.getResponseHeaders()) {
            String name = header.getName();
            if (responseHeadersToIgnore.contains(HeaderName.parse(name))) {
                Engine.logSiteClipper.trace("(SiteClipperConnector) Ignoring response header " + name);
            } else {
                String value = header.getValue();
                Engine.logSiteClipper.trace("(SiteClipperConnector) Copying response header " + name + "=" + value);
                shuttle.responseCustomHeaders.put(name, value);
            }
        }
        String contentLength = HeaderName.ContentLength.getResponseHeader(httpMethod);
        Engine.logSiteClipper.debug("(SiteClipperConnector) applying response rules for the screenclass " + screenClass.getName());
        for (IResponseRule rule : screenClass.getResponseRules()) {
            if (rule.isEnabled()) {
                Engine.logSiteClipper.trace("(SiteClipperConnector) applying response rule " + rule.getName());
                rule.fireEvents();
                boolean done = rule.applyOnResponse(shuttle);
                Engine.logSiteClipper.debug("(SiteClipperConnector) the response rule " + rule.getName() + " is " + (done ? "well" : "not") + " applied");
            } else {
                Engine.logSiteClipper.trace("(SiteClipperConnector) skip the disabled response rule " + rule.getName());
            }
        }
        for (Entry<String, String> header : shuttle.responseCustomHeaders.entrySet()) {
            Engine.logSiteClipper.trace("(SiteClipperConnector) Push request header " + header.getKey() + "=" + header.getValue());
            shuttle.response.addHeader(header.getKey(), header.getValue());
        }
        if (shuttle.postInstructions != null) {
            JSONArray instructions = new JSONArray();
            for (IClientInstruction instruction : shuttle.postInstructions) {
                try {
                    instructions.put(instruction.getInstruction());
                } catch (JSONException e) {
                    Engine.logSiteClipper.error("(SiteClipperConnector) Failed to add a post instruction due to a JSONException", e);
                }
            }
            String codeToInject = "<script>C8O_postInstructions = " + instructions.toString() + "</script>\n" + "<script src=\"" + shuttle.getRequest(QueryPart.full_convertigo_path) + "/scripts/jquery.min.js\"></script>\n" + "<script src=\"" + shuttle.getRequest(QueryPart.full_convertigo_path) + "/scripts/siteclipper.js\"></script>\n";
            String content = shuttle.getResponseAsString();
            Matcher matcher = HtmlLocation.head_top.matcher(content);
            String newContent = RegexpUtils.inject(matcher, codeToInject);
            if (newContent == null) {
                matcher = HtmlLocation.body_top.matcher(content);
                newContent = RegexpUtils.inject(matcher, codeToInject);
            }
            if (newContent != null) {
                shuttle.setResponseAsString(newContent);
            } else {
                Engine.logSiteClipper.info("(SiteClipperConnector) Failed to find a head or body tag in the response content");
                Engine.logSiteClipper.trace("(SiteClipperConnector) Response content : \"" + content + "\"");
            }
        }
        long nbBytes = 0L;
        String responseContentLength = HeaderName.ContentLength.getHeader(shuttle.response);
        if (shuttle.responseAsString != null && shuttle.responseAsString.hashCode() != shuttle.responseAsStringOriginal.hashCode()) {
            OutputStream os = shuttle.response.getOutputStream();
            shuttle.responseAsByte = shuttle.responseAsString.getBytes(shuttle.getResponseCharset());
            nbBytes = shuttle.responseAsByte.length;
            switch(shuttle.getResponseContentEncoding()) {
                case gzip:
                    os = new GZIPOutputStream(os);
                    break;
                case deflate:
                    os = new DeflaterOutputStream(os, new Deflater(Deflater.DEFAULT_COMPRESSION | Deflater.DEFAULT_STRATEGY, true));
                    break;
                default:
                    if (responseContentLength == null) {
                        HeaderName.ContentLength.setHeader(shuttle.response, "" + nbBytes);
                    }
                    break;
            }
            IOUtils.write(shuttle.responseAsByte, os);
            os.close();
        } else {
            InputStream is;
            if (shuttle.responseAsByte == null) {
                if (responseContentLength == null && contentLength != null) {
                    HeaderName.ContentLength.setHeader(shuttle.response, contentLength);
                }
                is = httpMethod.getResponseBodyAsStream();
            } else {
                if (responseContentLength == null) {
                    HeaderName.ContentLength.setHeader(shuttle.response, "" + shuttle.responseAsByte.length);
                }
                is = new ByteArrayInputStream(shuttle.responseAsByte);
            }
            if (is != null) {
                nbBytes = StreamUtils.copyAutoFlush(is, shuttle.response.getOutputStream());
                Engine.logSiteClipper.trace("(SiteClipperConnector) Response body copyied (" + nbBytes + " bytes)");
            }
        }
        shuttle.response.getOutputStream().close();
        shuttle.score = getScore(nbBytes);
        Engine.logSiteClipper.debug("(SiteClipperConnector) Request terminated with a score of " + shuttle.score);
    } finally {
        long duration = context.statistics.stop(shuttle.statisticsTaskID);
        if (context.requestedObject != null) {
            try {
                Engine.theApp.billingManager.insertBilling(context, Long.valueOf(duration), Long.valueOf(shuttle.score));
            } catch (Exception e) {
                Engine.logContext.warn("Unable to insert billing ticket (the billing is thus ignored): [" + e.getClass().getName() + "] " + e.getMessage());
            }
        }
    }
}
Also used : InputStreamRequestEntity(org.apache.commons.httpclient.methods.InputStreamRequestEntity) PostMethod(org.apache.commons.httpclient.methods.PostMethod) Matcher(java.util.regex.Matcher) GZIPOutputStream(java.util.zip.GZIPOutputStream) DeflaterOutputStream(java.util.zip.DeflaterOutputStream) OutputStream(java.io.OutputStream) DefaultHttpMethodRetryHandler(org.apache.commons.httpclient.DefaultHttpMethodRetryHandler) XulRecorder(com.twinsoft.convertigo.engine.parsers.XulRecorder) SiteClipperScreenClass(com.twinsoft.convertigo.beans.screenclasses.SiteClipperScreenClass) URI(org.apache.commons.httpclient.URI) ServletException(javax.servlet.ServletException) HeadMethod(org.apache.commons.httpclient.methods.HeadMethod) OptionsMethod(org.apache.commons.httpclient.methods.OptionsMethod) URIException(org.apache.commons.httpclient.URIException) Deflater(java.util.zip.Deflater) GZIPOutputStream(java.util.zip.GZIPOutputStream) DeflaterOutputStream(java.util.zip.DeflaterOutputStream) DeleteMethod(org.apache.commons.httpclient.methods.DeleteMethod) ConnectorEvent(com.twinsoft.convertigo.beans.core.ConnectorEvent) HostConfiguration(org.apache.commons.httpclient.HostConfiguration) GZIPInputStream(java.util.zip.GZIPInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InflaterInputStream(java.util.zip.InflaterInputStream) InputStream(java.io.InputStream) TraceMethod(org.apache.commons.httpclient.methods.TraceMethod) JSONArray(org.codehaus.jettison.json.JSONArray) UnsupportedEncodingException(java.io.UnsupportedEncodingException) JSONException(org.codehaus.jettison.json.JSONException) HttpMethodParams(org.apache.commons.httpclient.params.HttpMethodParams) ServletException(javax.servlet.ServletException) URIException(org.apache.commons.httpclient.URIException) EngineException(com.twinsoft.convertigo.engine.EngineException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) JSONException(org.codehaus.jettison.json.JSONException) Header(org.apache.commons.httpclient.Header) ByteArrayInputStream(java.io.ByteArrayInputStream) IResponseRule(com.twinsoft.convertigo.beans.extractionrules.siteclipper.IResponseRule) HttpClient(org.apache.commons.httpclient.HttpClient) GetMethod(org.apache.commons.httpclient.methods.GetMethod) PutMethod(org.apache.commons.httpclient.methods.PutMethod) EngineEvent(com.twinsoft.convertigo.engine.EngineEvent) HttpMethod(org.apache.commons.httpclient.HttpMethod) IRequestRule(com.twinsoft.convertigo.beans.extractionrules.siteclipper.IRequestRule) IClientInstruction(com.twinsoft.convertigo.engine.siteclipper.clientinstruction.IClientInstruction)

Aggregations

ConnectorEvent (com.twinsoft.convertigo.beans.core.ConnectorEvent)1 IRequestRule (com.twinsoft.convertigo.beans.extractionrules.siteclipper.IRequestRule)1 IResponseRule (com.twinsoft.convertigo.beans.extractionrules.siteclipper.IResponseRule)1 SiteClipperScreenClass (com.twinsoft.convertigo.beans.screenclasses.SiteClipperScreenClass)1 EngineEvent (com.twinsoft.convertigo.engine.EngineEvent)1 EngineException (com.twinsoft.convertigo.engine.EngineException)1 XulRecorder (com.twinsoft.convertigo.engine.parsers.XulRecorder)1 IClientInstruction (com.twinsoft.convertigo.engine.siteclipper.clientinstruction.IClientInstruction)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 MalformedURLException (java.net.MalformedURLException)1 Matcher (java.util.regex.Matcher)1 Deflater (java.util.zip.Deflater)1 DeflaterOutputStream (java.util.zip.DeflaterOutputStream)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 InflaterInputStream (java.util.zip.InflaterInputStream)1