use of nl.basjes.parse.core.ParsedField in project logparser by nielsbasjes.
the class StrfTimeStampDissector method dissect.
@Override
public void dissect(Parsable<?> parsable, String inputname) throws DissectionFailure {
final ParsedField field = parsable.getParsableField(inputType, inputname);
timeStampDissector.dissect(field, parsable, inputname);
}
use of nl.basjes.parse.core.ParsedField in project logparser by nielsbasjes.
the class HttpFirstLineProtocolDissector method dissect.
// --------------------------------------------
@Override
public void dissect(final Parsable<?> parsable, final String inputname) throws DissectionFailure {
final ParsedField field = parsable.getParsableField(INPUT_TYPE, inputname);
final String fieldValue = field.getValue().getString();
if (fieldValue == null || fieldValue.isEmpty() || "-".equals(fieldValue)) {
// Nothing to do here
return;
}
String[] protocol = fieldValue.split("/", 2);
if (protocol.length == 2) {
outputDissection(parsable, inputname, "HTTP.PROTOCOL", "", protocol[0]);
outputDissection(parsable, inputname, "HTTP.PROTOCOL.VERSION", "version", protocol[1]);
return;
}
// In the scenario that the actual URI is too long the last part ("HTTP/1.1") may have been cut off by the
// Apache HTTPD webserver. To still be able to parse these we try that pattern too
parsable.addDissection(inputname, "HTTP.PROTOCOL", "", (String) null);
parsable.addDissection(inputname, "HTTP.PROTOCOL.VERSION", "version", (String) null);
}
use of nl.basjes.parse.core.ParsedField in project yauaa by nielsbasjes.
the class UserAgentDissector method dissect.
@Override
public void dissect(Parsable<?> parsable, String inputname) throws DissectionFailure {
final ParsedField agentField = parsable.getParsableField(INPUT_TYPE, inputname);
String userAgentString = agentField.getValue().getString();
if (userAgentString == null) {
// Weird, but it happens
return;
}
UserAgent agent = userAgentAnalyzer.parse(userAgentString);
for (String fieldName : requestedFieldNames) {
parsable.addDissection(inputname, getFieldOutputType(fieldName), fieldNameToDissectionName(fieldName), agent.getValue(fieldName));
}
}
use of nl.basjes.parse.core.ParsedField in project logparser by nielsbasjes.
the class HttpFirstLineDissector method dissect.
// --------------------------------------------
@Override
public void dissect(final Parsable<?> parsable, final String inputname) throws DissectionFailure {
final ParsedField field = parsable.getParsableField(HTTP_FIRSTLINE, inputname);
final String fieldValue = field.getValue().getString();
if (fieldValue == null || fieldValue.isEmpty() || "-".equals(fieldValue)) {
// Nothing to do here
return;
}
// Now we create a matcher for this line
Matcher matcher = firstlineSplitter.matcher(fieldValue);
// Is it all as expected?
boolean matches = matcher.find();
if (matches && matcher.groupCount() == 3) {
outputDissection(parsable, inputname, "HTTP.METHOD", "method", matcher, 1);
outputDissection(parsable, inputname, "HTTP.URI", "uri", matcher, 2);
outputDissection(parsable, inputname, "HTTP.PROTOCOL_VERSION", "protocol", matcher, 3);
return;
}
// In the scenario that the actual URI is too long the last part ("HTTP/1.1") may have been cut off by the
// Apache HTTPD webserver. To still be able to parse these we try that pattern too
// Now we create a matcher for this line
matcher = tooLongFirstlineSplitter.matcher(fieldValue);
// Is it all as expected?
matches = matcher.find();
if (matches && matcher.groupCount() == 2) {
outputDissection(parsable, inputname, "HTTP.METHOD", "method", matcher, 1);
outputDissection(parsable, inputname, "HTTP.URI", "uri", matcher, 2);
parsable.addDissection(inputname, "HTTP.PROTOCOL_VERSION", "protocol", (String) null);
}
}
use of nl.basjes.parse.core.ParsedField in project logparser by nielsbasjes.
the class HttpUriDissector method dissect.
@Override
public void dissect(final Parsable<?> parsable, final String inputname) throws DissectionFailure {
final ParsedField field = parsable.getParsableField(INPUT_TYPE, inputname);
String uriString = field.getValue().getString();
if (uriString == null || uriString.isEmpty()) {
// Nothing to do here
return;
}
// First we cleanup the URI so we fail less often over 'garbage' URIs.
// See: https://stackoverflow.com/questions/11038967/brackets-in-a-request-url-are-legal-but-not-in-a-uri-java
uriString = new String(URLCodec.encodeUrl(BAD_URI_CHARS, uriString.getBytes(UTF_8)), US_ASCII);
// Now we translate any HTML encoded entities/characters into URL UTF-8 encoded characters
uriString = makeHTMLEncodedInert(uriString);
// Before we hand it to the standard parser we hack it around a bit so we can parse
// nasty edge cases that are illegal yet do occur in real clickstreams.
// Also we force the query string to start with ?& so the returned query string starts with &
// Which leads to more consistent output after parsing.
int firstQuestionMark = uriString.indexOf('?');
int firstAmpersand = uriString.indexOf('&');
// to: ?&x=x&y=y&z=z
if (firstAmpersand != -1 || firstQuestionMark != -1) {
uriString = uriString.replaceAll("\\?", "&");
uriString = uriString.replaceFirst("&", "?&");
}
// We find that people muck up the URL by putting % signs in the URLs that are NOT escape sequences
// So any % that is not followed by a two 'hex' letters is fixed
uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1");
uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1");
// We have URIs with fragments like this:
// /path/?_requestid=1234#x3D;12341234&Referrer=blablabla
// So first we repair the broken encoded char
uriString = ALMOST_HTML_ENCODED.matcher(uriString).replaceAll("$1&$2");
uriString = StringEscapeUtils.unescapeHtml4(uriString);
// And we see URIs with this:
// /path/?Referrer=ADV1234#&f=API&subid=#&name=12341234
uriString = EQUALS_HASH_PATTERN.matcher(uriString).replaceAll("=");
uriString = HASH_AMP_PATTERN.matcher(uriString).replaceAll("&");
// If we still have multiple '#' in here we replace them with something else: '~'
while (true) {
Matcher doubleHashMatcher = DOUBLE_HASH_PATTERN.matcher(uriString);
if (!doubleHashMatcher.find()) {
break;
}
uriString = doubleHashMatcher.replaceAll("~$1#");
}
boolean isUrl = true;
URI uri;
try {
if (uriString.charAt(0) == '/') {
uri = URI.create("dummy-protocol://dummy.host.name" + uriString);
// I.e. we do not return the values we just faked.
isUrl = false;
} else {
uri = URI.create(uriString);
}
} catch (IllegalArgumentException e) {
throw new DissectionFailure("Failed to parse URI >>" + field.getValue().getString() + "<< because of : " + e.getMessage());
}
if (wantQuery || wantPath || wantRef) {
if (wantQuery) {
String value = uri.getRawQuery();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.QUERYSTRING", "query", value);
}
}
if (wantPath) {
String value = uri.getPath();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.PATH", "path", value);
}
}
if (wantRef) {
String value = uri.getFragment();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.REF", "ref", value);
}
}
}
if (isUrl) {
if (wantProtocol) {
String value = uri.getScheme();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.PROTOCOL", "protocol", value);
}
}
if (wantUserinfo) {
String value = uri.getUserInfo();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.USERINFO", "userinfo", value);
}
}
if (wantHost) {
String value = uri.getHost();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.HOST", "host", value);
}
}
if (wantPort) {
int value = uri.getPort();
if (value != -1) {
parsable.addDissection(inputname, "HTTP.PORT", "port", value);
}
}
}
}
Aggregations