use of net.htmlparser.jericho.Source in project zaproxy by zaproxy.
the class SpiderHtmlParserUnitTest method shouldFindUrlsInMetaElements.
@Test
void shouldFindUrlsInMetaElements() {
// Given
SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam());
TestSpiderParserListener listener = createTestSpiderParserListener();
htmlParser.addSpiderParserListener(listener);
HttpMessage messageHtmlResponse = createMessageWith("MetaElementsSpiderHtmlParser.html");
Source source = createSource(messageHtmlResponse);
// When
boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH);
// Then
assertThat(completelyParsed, is(equalTo(false)));
assertThat(listener.getNumberOfUrlsFound(), is(equalTo(12)));
assertThat(listener.getUrlsFound(), contains("http://meta.example.com:8443/refresh/base/scheme", "https://meta.example.com/refresh", "http://example.com/sample/meta/refresh/relative", "http://example.com/meta/refresh/absolute", "http://meta.example.com/refresh/url/quoted/single", "http://meta.example.com/refresh/url/quoted/double", "ftp://meta.example.com/refresh", "http://meta.example.com:8080/location/base/scheme", "https://meta.example.com/location", "http://example.com/sample/meta/location/relative", "http://example.com/meta/location/absolute", "ftp://meta.example.com/location"));
}
use of net.htmlparser.jericho.Source in project zaproxy by zaproxy.
the class SpiderHtmlParserUnitTest method shouldFindUrlsInAreaPingElements.
@Test
void shouldFindUrlsInAreaPingElements() {
// Given
SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam());
TestSpiderParserListener listener = createTestSpiderParserListener();
htmlParser.addSpiderParserListener(listener);
HttpMessage messageHtmlResponse = createMessageWith("AreaElementsWithPingSpiderHtmlParser.html");
Source source = createSource(messageHtmlResponse);
// When
boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH);
// Then
assertThat(completelyParsed, is(equalTo(false)));
assertThat(listener.getNumberOfUrlsFound(), is(equalTo(23)));
assertThat(listener.getUrlsFound(), contains(// area URLs followed by ping URLs
"http://a.example.com/base/scheme", "http://ping.example.com/base/scheme", "http://a.example.com:8000/b", "http://ping.example.com:8000/b", "https://a.example.com/c?a=b", "https://ping.example.com/c?a=b", "http://example.com/sample/a/relative", "http://example.com/sample/a/relative/ping", "http://example.com/a/absolute", "http://example.com/a/absolute/ping", "ftp://a.example.com/", "https://ping.example.com/ping", // Ping first, is parsed href before ping
"http://b.example.com/", "https://ping.first.com/", // Ignored anchors but picked pings
"http://ping.example.com/mailping", "http://ping.example.com/jsping", "http://ping.example.com/ping", // Multiple ping URLs
"http://a.example.com/", "http://ping.example.com/", "http://pong.example.com/", // Multiple ping URLs with tab in the middle
"http://a.example.com/", "http://ping.example.com/", // Trailing slash is added on host
"http://pong.example.com/"));
}
use of net.htmlparser.jericho.Source in project zaproxy by zaproxy.
the class SpiderHtmlParserUnitTest method shouldFindUrlsInScriptElements.
@Test
void shouldFindUrlsInScriptElements() {
// Given
SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam());
TestSpiderParserListener listener = createTestSpiderParserListener();
htmlParser.addSpiderParserListener(listener);
HttpMessage messageHtmlResponse = createMessageWith("ScriptElementsSpiderHtmlParser.html");
Source source = createSource(messageHtmlResponse);
// When
boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH);
// Then
assertThat(completelyParsed, is(equalTo(false)));
assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7)));
assertThat(listener.getUrlsFound(), contains("http://script.example.com/base/scheme", "http://script.example.com:8000/b", "https://script.example.com/c?a=b", "http://example.com/sample/script/relative", "http://example.com/sample/", "http://example.com/script/absolute", "ftp://script.example.com/"));
}
use of net.htmlparser.jericho.Source in project zaproxy by zaproxy.
the class SpiderHtmlParserUnitTest method shouldUseMessageUriIfNoBaseElement.
@Test
void shouldUseMessageUriIfNoBaseElement() {
// Given
SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam());
TestSpiderParserListener listener = createTestSpiderParserListener();
htmlParser.addSpiderParserListener(listener);
HttpMessage messageHtmlResponse = createMessageWith("NoBaseWithAElementSpiderHtmlParser.html");
Source source = createSource(messageHtmlResponse);
// When
boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH);
// Then
assertThat(completelyParsed, is(equalTo(false)));
assertThat(listener.getNumberOfUrlsFound(), is(equalTo(1)));
assertThat(listener.getUrlsFound(), contains("http://example.com/relative/no/base"));
}
use of net.htmlparser.jericho.Source in project zaproxy by zaproxy.
the class SpiderHtmlParserUnitTest method shouldFindUrlsInLinkElements.
@Test
void shouldFindUrlsInLinkElements() {
// Given
SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam());
TestSpiderParserListener listener = createTestSpiderParserListener();
htmlParser.addSpiderParserListener(listener);
HttpMessage messageHtmlResponse = createMessageWith("LinkElementsSpiderHtmlParser.html");
Source source = createSource(messageHtmlResponse);
// When
boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH);
// Then
assertThat(completelyParsed, is(equalTo(false)));
assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7)));
assertThat(listener.getUrlsFound(), contains("http://link.example.com/base/scheme", "http://link.example.com:8000/b", "https://link.example.com/c?a=b", "http://example.com/sample/link/relative", "http://example.com/sample/", "http://example.com/link/absolute", "ftp://link.example.com/"));
}
Aggregations