Search in sources :

Example 16 with Header

use of org.apache.http.Header in project Xponents by OpenSextant.

the class DefaultWebCrawl method collectItemsOnPage.

/**
     * Internal method for parsing and harvesting from a single page and then crawling deeper, if instructed to do so.
     * 
     * @param pageContent raw HTML
     * @param url  url for HTML
     * @param site  top level url for site
     */
protected void collectItemsOnPage(String pageContent, URL url, URL site) {
    Collection<HyperLink> items = parseContentPage(pageContent, url, site);
    /* 2. Collect items on this page.
         *
         */
    for (HyperLink l : items) {
        if (filterOut(l)) {
            continue;
        }
        if (this.isAllowCurrentSiteOnly() && !(l.isCurrentSite() || l.isCurrentHost())) {
            // Page represented by link, l, is on another website.
            log.debug("Not on current site: {}", l);
            continue;
        }
        if (this.isAllowCurrentDirOnly() && !l.isCurrentPage()) {
            // Page represented by link, l, is on another directory on same or site.
            log.debug("Not on current directory: {}", l);
            continue;
        }
        /* TODO: fix "key", as it represents not just path, but unique URLs
             * different URLs with same path would collide.
             * TODO: in general fix the ability to crawl off requested site.
             *  If that is really needed, this is not the crawling capability you want.
             *
             */
        String key = l.getNormalPath();
        if (key == null) {
            key = l.getAbsoluteURL();
        }
        if (found.containsKey(key)) {
            // We already did this.
            continue;
        }
        if (userFilteredOut(key)) {
            // We don't want to do this.
            log.debug("Filtered Out by User: {}", key);
            continue;
        }
        found.put(key, l);
        if (saved.contains(l.getId())) {
            // ignore.
            continue;
        }
        // Download artifacts
        if (l.isFile() || l.isWebPage()) {
            pause();
            log.info("Pulling page {}", l);
            try {
                //
                try {
                    if (listener != null && listener.exists(l.getId())) {
                        // You already collected this. So it will be ignored.
                        continue;
                    }
                } catch (Exception err1) {
                    log.error("Collection Listener error", err1);
                    continue;
                }
                // create URL for link and download artifact.
                HttpResponse itemPage = getPage(l.getURL());
                // the relative path.
                if (itemPage.getStatusLine().getStatusCode() >= 400) {
                    this.errorPages.add(l.getAbsoluteURL());
                    log.error("Failing on this request, HTTP status>=400, LINK={}", l.getURL());
                    continue;
                }
                /*
                     * Identify the correct type of file this item is, from HTTP headers &amp; MIME, not just the link
                     */
                Header contentType = itemPage.getEntity().getContentType();
                if (contentType != null) {
                    l.setMIMEType(contentType.getValue());
                }
                /*
                     * Create a non-trivial path for the item.
                     * 
                     */
                String fpath = l.getNormalPath();
                if (l.isDynamic()) {
                    if (!fpath.endsWith(".html")) {
                        fpath = fpath + ".html";
                    }
                }
                File itemSaved = createArchiveFile(fpath, false);
                File dir = new File(itemSaved.getParentFile().getAbsolutePath());
                FileUtility.makeDirectory(dir);
                l.setFilepath(itemSaved);
                // CACHE the identify of this URL.
                saved.add(l.getId());
                WebClient.downloadFile(itemPage.getEntity(), itemSaved.getAbsolutePath());
                convertContent(itemSaved, l);
                //
                if (l.isWebPage() && depth <= MAX_DEPTH) {
                    collectItems(l.getAbsoluteURL(), site);
                }
            } catch (Exception fileErr) {
                log.error("Item for URL {} was not saved due to a net or IO issue.", l.getAbsoluteURL(), fileErr);
            }
        }
    }
    --depth;
}
Also used : Header(org.apache.http.Header) HttpResponse(org.apache.http.HttpResponse) File(java.io.File) MalformedURLException(java.net.MalformedURLException) ConfigException(org.opensextant.ConfigException) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException)

Example 17 with Header

use of org.apache.http.Header in project android_frameworks_base by ResurrectionRemix.

the class MultipartTest method testParts.

public void testParts() throws Exception {
    StringBuffer filebuffer = new StringBuffer();
    String filepartStr = "this is file part";
    filebuffer.append(filepartStr);
    File upload = File.createTempFile("Multipart", "test");
    FileWriter outFile = new FileWriter(upload);
    BufferedWriter out = new BufferedWriter(outFile);
    try {
        out.write(filebuffer.toString());
        out.flush();
    } finally {
        out.close();
    }
    Part[] parts = new Part[3];
    parts[0] = new StringPart("stringpart", "PART1!!");
    parts[1] = new FilePart(upload.getName(), upload);
    parts[2] = new StringPart("stringpart", "PART2!!");
    MultipartEntity me = new MultipartEntity(parts);
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    me.writeTo(os);
    Header h = me.getContentType();
    String boundry = EncodingUtils.getAsciiString(me.getMultipartBoundary());
    StringBuffer contentType = new StringBuffer("multipart/form-data");
    contentType.append("; boundary=");
    contentType.append(boundry);
    assertEquals("Multipart content type error", contentType.toString(), h.getValue());
    final String CRLF = "\r\n";
    StringBuffer output = new StringBuffer();
    output.append("--");
    output.append(boundry);
    output.append(CRLF);
    output.append("Content-Disposition: form-data; name=\"stringpart\"");
    output.append(CRLF);
    output.append("Content-Type: text/plain; charset=US-ASCII");
    output.append(CRLF);
    output.append("Content-Transfer-Encoding: 8bit");
    output.append(CRLF);
    output.append(CRLF);
    output.append("PART1!!");
    output.append(CRLF);
    output.append("--");
    output.append(boundry);
    output.append(CRLF);
    output.append("Content-Disposition: form-data; name=\"");
    output.append(upload.getName());
    output.append("\"; filename=\"");
    output.append(upload.getName());
    output.append("\"");
    output.append(CRLF);
    output.append("Content-Type: application/octet-stream; charset=ISO-8859-1");
    output.append(CRLF);
    output.append("Content-Transfer-Encoding: binary");
    output.append(CRLF);
    output.append(CRLF);
    output.append(filepartStr);
    output.append(CRLF);
    output.append("--");
    output.append(boundry);
    output.append(CRLF);
    output.append("Content-Disposition: form-data; name=\"stringpart\"");
    output.append(CRLF);
    output.append("Content-Type: text/plain; charset=US-ASCII");
    output.append(CRLF);
    output.append("Content-Transfer-Encoding: 8bit");
    output.append(CRLF);
    output.append(CRLF);
    output.append("PART2!!");
    output.append(CRLF);
    output.append("--");
    output.append(boundry);
    output.append("--");
    output.append(CRLF);
    // System.out.print(output.toString());
    assertEquals("Multipart content error", output.toString(), os.toString());
// System.out.print(os.toString());
}
Also used : Header(org.apache.http.Header) FileWriter(java.io.FileWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) File(java.io.File) BufferedWriter(java.io.BufferedWriter)

Example 18 with Header

use of org.apache.http.Header in project hadoop-pcap by RIPE-NCC.

the class HttpPcapReader method propagateHeaders.

private void propagateHeaders(HttpPacket packet, Header[] headers) {
    LinkedList<String> headerKeys = new LinkedList<String>();
    for (Header header : headers) {
        String headerKey = HEADER_PREFIX + header.getName().toLowerCase();
        headerKeys.add(headerKey);
        packet.put(headerKey, header.getValue());
    }
    packet.put(HttpPacket.HTTP_HEADERS, Joiner.on(',').join(headerKeys));
}
Also used : Header(org.apache.http.Header) LinkedList(java.util.LinkedList)

Example 19 with Header

use of org.apache.http.Header in project ats-framework by Axway.

the class HttpClient method addRequestHeaders.

private void addRequestHeaders(HttpRequestBase httpMethod) throws FileTransferException {
    // pass user credentials with the very first headers
    if (preemptiveBasicAuthentication) {
        if (this.username == null) {
            throw new FileTransferException("We cannot set user credentials as the user name is not set");
        }
        try {
            BasicScheme schema = new BasicScheme(Charset.forName("US-ASCII"));
            Header authenticationHeader = schema.authenticate(// here we make 'empty' http request, just so we could authenticate the credentials
            new UsernamePasswordCredentials(this.username, this.userpass), new HttpGet(), httpContext);
            httpMethod.addHeader(authenticationHeader);
        } catch (AuthenticationException ae) {
            throw new FileTransferException("Unable to add Basic Authentication header", ae);
        }
    }
    // Add the rest of the request headers
    for (Header header : requestHeaders) {
        httpMethod.setHeader(header);
    }
}
Also used : FileTransferException(com.axway.ats.common.filetransfer.FileTransferException) BasicScheme(org.apache.http.impl.auth.BasicScheme) Header(org.apache.http.Header) BasicHeader(org.apache.http.message.BasicHeader) AuthenticationException(org.apache.http.auth.AuthenticationException) HttpGet(org.apache.http.client.methods.HttpGet) UsernamePasswordCredentials(org.apache.http.auth.UsernamePasswordCredentials)

Example 20 with Header

use of org.apache.http.Header in project nhin-d by DirectProject.

the class UsecuredServiceRequestBase_checkContentTypeTest method testCheckContentType_incompatibleType_assertServiceException.

@Test
public void testCheckContentType_incompatibleType_assertServiceException() throws Exception {
    MockServiceRequest req = new MockServiceRequest(null, "http://service/svc", "Test");
    Header hdr = mock(Header.class);
    when(hdr.getName()).thenReturn("Content-Type");
    when(hdr.getValue()).thenReturn("text/xml");
    HttpEntity entity = mock(HttpEntity.class);
    when(entity.getContentType()).thenReturn(hdr);
    boolean exceptionOccured = false;
    try {
        req.checkContentType("text/plain", entity);
    } catch (ServiceException e) {
        exceptionOccured = true;
    }
    assertTrue(exceptionOccured);
}
Also used : Header(org.apache.http.Header) HttpEntity(org.apache.http.HttpEntity) ServiceException(org.nhindirect.common.rest.exceptions.ServiceException) Test(org.junit.Test)

Aggregations

Header (org.apache.http.Header)906 HttpResponse (org.apache.http.HttpResponse)368 HttpGet (org.apache.http.client.methods.HttpGet)253 Test (org.junit.Test)206 IOException (java.io.IOException)200 BasicHeader (org.apache.http.message.BasicHeader)160 HttpEntity (org.apache.http.HttpEntity)134 TestHttpClient (io.undertow.testutils.TestHttpClient)94 URI (java.net.URI)93 CloseableHttpResponse (org.apache.http.client.methods.CloseableHttpResponse)93 ArrayList (java.util.ArrayList)90 CloseableHttpClient (org.apache.http.impl.client.CloseableHttpClient)78 HashMap (java.util.HashMap)76 InputStream (java.io.InputStream)67 URISyntaxException (java.net.URISyntaxException)64 HttpPost (org.apache.http.client.methods.HttpPost)62 StatusLine (org.apache.http.StatusLine)60 List (java.util.List)52 StringEntity (org.apache.http.entity.StringEntity)51 HttpHost (org.apache.http.HttpHost)46