use of org.apache.http.Header in project Xponents by OpenSextant.
the class DefaultWebCrawl method collectItemsOnPage.
/**
* Internal method for parsing and harvesting from a single page and then crawling deeper, if instructed to do so.
*
* @param pageContent raw HTML
* @param url url for HTML
* @param site top level url for site
*/
protected void collectItemsOnPage(String pageContent, URL url, URL site) {
Collection<HyperLink> items = parseContentPage(pageContent, url, site);
/* 2. Collect items on this page.
*
*/
for (HyperLink l : items) {
if (filterOut(l)) {
continue;
}
if (this.isAllowCurrentSiteOnly() && !(l.isCurrentSite() || l.isCurrentHost())) {
// Page represented by link, l, is on another website.
log.debug("Not on current site: {}", l);
continue;
}
if (this.isAllowCurrentDirOnly() && !l.isCurrentPage()) {
// Page represented by link, l, is on another directory on same or site.
log.debug("Not on current directory: {}", l);
continue;
}
/* TODO: fix "key", as it represents not just path, but unique URLs
* different URLs with same path would collide.
* TODO: in general fix the ability to crawl off requested site.
* If that is really needed, this is not the crawling capability you want.
*
*/
String key = l.getNormalPath();
if (key == null) {
key = l.getAbsoluteURL();
}
if (found.containsKey(key)) {
// We already did this.
continue;
}
if (userFilteredOut(key)) {
// We don't want to do this.
log.debug("Filtered Out by User: {}", key);
continue;
}
found.put(key, l);
if (saved.contains(l.getId())) {
// ignore.
continue;
}
// Download artifacts
if (l.isFile() || l.isWebPage()) {
pause();
log.info("Pulling page {}", l);
try {
//
try {
if (listener != null && listener.exists(l.getId())) {
// You already collected this. So it will be ignored.
continue;
}
} catch (Exception err1) {
log.error("Collection Listener error", err1);
continue;
}
// create URL for link and download artifact.
HttpResponse itemPage = getPage(l.getURL());
// the relative path.
if (itemPage.getStatusLine().getStatusCode() >= 400) {
this.errorPages.add(l.getAbsoluteURL());
log.error("Failing on this request, HTTP status>=400, LINK={}", l.getURL());
continue;
}
/*
* Identify the correct type of file this item is, from HTTP headers & MIME, not just the link
*/
Header contentType = itemPage.getEntity().getContentType();
if (contentType != null) {
l.setMIMEType(contentType.getValue());
}
/*
* Create a non-trivial path for the item.
*
*/
String fpath = l.getNormalPath();
if (l.isDynamic()) {
if (!fpath.endsWith(".html")) {
fpath = fpath + ".html";
}
}
File itemSaved = createArchiveFile(fpath, false);
File dir = new File(itemSaved.getParentFile().getAbsolutePath());
FileUtility.makeDirectory(dir);
l.setFilepath(itemSaved);
// CACHE the identify of this URL.
saved.add(l.getId());
WebClient.downloadFile(itemPage.getEntity(), itemSaved.getAbsolutePath());
convertContent(itemSaved, l);
//
if (l.isWebPage() && depth <= MAX_DEPTH) {
collectItems(l.getAbsoluteURL(), site);
}
} catch (Exception fileErr) {
log.error("Item for URL {} was not saved due to a net or IO issue.", l.getAbsoluteURL(), fileErr);
}
}
}
--depth;
}
use of org.apache.http.Header in project android_frameworks_base by ResurrectionRemix.
the class MultipartTest method testParts.
public void testParts() throws Exception {
StringBuffer filebuffer = new StringBuffer();
String filepartStr = "this is file part";
filebuffer.append(filepartStr);
File upload = File.createTempFile("Multipart", "test");
FileWriter outFile = new FileWriter(upload);
BufferedWriter out = new BufferedWriter(outFile);
try {
out.write(filebuffer.toString());
out.flush();
} finally {
out.close();
}
Part[] parts = new Part[3];
parts[0] = new StringPart("stringpart", "PART1!!");
parts[1] = new FilePart(upload.getName(), upload);
parts[2] = new StringPart("stringpart", "PART2!!");
MultipartEntity me = new MultipartEntity(parts);
ByteArrayOutputStream os = new ByteArrayOutputStream();
me.writeTo(os);
Header h = me.getContentType();
String boundry = EncodingUtils.getAsciiString(me.getMultipartBoundary());
StringBuffer contentType = new StringBuffer("multipart/form-data");
contentType.append("; boundary=");
contentType.append(boundry);
assertEquals("Multipart content type error", contentType.toString(), h.getValue());
final String CRLF = "\r\n";
StringBuffer output = new StringBuffer();
output.append("--");
output.append(boundry);
output.append(CRLF);
output.append("Content-Disposition: form-data; name=\"stringpart\"");
output.append(CRLF);
output.append("Content-Type: text/plain; charset=US-ASCII");
output.append(CRLF);
output.append("Content-Transfer-Encoding: 8bit");
output.append(CRLF);
output.append(CRLF);
output.append("PART1!!");
output.append(CRLF);
output.append("--");
output.append(boundry);
output.append(CRLF);
output.append("Content-Disposition: form-data; name=\"");
output.append(upload.getName());
output.append("\"; filename=\"");
output.append(upload.getName());
output.append("\"");
output.append(CRLF);
output.append("Content-Type: application/octet-stream; charset=ISO-8859-1");
output.append(CRLF);
output.append("Content-Transfer-Encoding: binary");
output.append(CRLF);
output.append(CRLF);
output.append(filepartStr);
output.append(CRLF);
output.append("--");
output.append(boundry);
output.append(CRLF);
output.append("Content-Disposition: form-data; name=\"stringpart\"");
output.append(CRLF);
output.append("Content-Type: text/plain; charset=US-ASCII");
output.append(CRLF);
output.append("Content-Transfer-Encoding: 8bit");
output.append(CRLF);
output.append(CRLF);
output.append("PART2!!");
output.append(CRLF);
output.append("--");
output.append(boundry);
output.append("--");
output.append(CRLF);
// System.out.print(output.toString());
assertEquals("Multipart content error", output.toString(), os.toString());
// System.out.print(os.toString());
}
use of org.apache.http.Header in project hadoop-pcap by RIPE-NCC.
the class HttpPcapReader method propagateHeaders.
private void propagateHeaders(HttpPacket packet, Header[] headers) {
LinkedList<String> headerKeys = new LinkedList<String>();
for (Header header : headers) {
String headerKey = HEADER_PREFIX + header.getName().toLowerCase();
headerKeys.add(headerKey);
packet.put(headerKey, header.getValue());
}
packet.put(HttpPacket.HTTP_HEADERS, Joiner.on(',').join(headerKeys));
}
use of org.apache.http.Header in project ats-framework by Axway.
the class HttpClient method addRequestHeaders.
private void addRequestHeaders(HttpRequestBase httpMethod) throws FileTransferException {
// pass user credentials with the very first headers
if (preemptiveBasicAuthentication) {
if (this.username == null) {
throw new FileTransferException("We cannot set user credentials as the user name is not set");
}
try {
BasicScheme schema = new BasicScheme(Charset.forName("US-ASCII"));
Header authenticationHeader = schema.authenticate(// here we make 'empty' http request, just so we could authenticate the credentials
new UsernamePasswordCredentials(this.username, this.userpass), new HttpGet(), httpContext);
httpMethod.addHeader(authenticationHeader);
} catch (AuthenticationException ae) {
throw new FileTransferException("Unable to add Basic Authentication header", ae);
}
}
// Add the rest of the request headers
for (Header header : requestHeaders) {
httpMethod.setHeader(header);
}
}
use of org.apache.http.Header in project nhin-d by DirectProject.
the class UsecuredServiceRequestBase_checkContentTypeTest method testCheckContentType_incompatibleType_assertServiceException.
@Test
public void testCheckContentType_incompatibleType_assertServiceException() throws Exception {
MockServiceRequest req = new MockServiceRequest(null, "http://service/svc", "Test");
Header hdr = mock(Header.class);
when(hdr.getName()).thenReturn("Content-Type");
when(hdr.getValue()).thenReturn("text/xml");
HttpEntity entity = mock(HttpEntity.class);
when(entity.getContentType()).thenReturn(hdr);
boolean exceptionOccured = false;
try {
req.checkContentType("text/plain", entity);
} catch (ServiceException e) {
exceptionOccured = true;
}
assertTrue(exceptionOccured);
}
Aggregations