use of nl.basjes.parse.core.exceptions.DissectionFailure in project logparser by nielsbasjes.
the class TokenFormatDissector method dissect.
@Override
public void dissect(final Parsable<?> parsable, final String inputname) throws DissectionFailure {
if (!isUsable) {
throw new DissectionFailure("Dissector in unusable state");
}
final ParsedField line = parsable.getParsableField(inputType, inputname);
// Now we create a matcher for this line
final Matcher matcher = logFormatPattern.matcher(line.getValue().getString());
// Is it all as expected?
final boolean matches = matcher.find();
if (matches) {
for (int i = 1; i <= matcher.groupCount(); i++) {
String matchedStr = matcher.group(i);
Token token = logFormatUsedTokens.get(i - 1);
for (TokenOutputField tokenOutputField : token.getOutputFields()) {
final String matchedName = tokenOutputField.getName();
final String matchedType = tokenOutputField.getType();
parsable.addDissection(inputname, matchedType, matchedName, decodeExtractedValue(matchedName, matchedStr));
}
}
} else {
throw new DissectionFailure("The input line does not match the specified log format." + "Line : " + line.getValue() + "\n" + "LogFormat: " + logFormat + "\n" + "RegEx : " + logFormatRegEx);
}
}
use of nl.basjes.parse.core.exceptions.DissectionFailure in project logparser by nielsbasjes.
the class RequestCookieListDissector method dissect.
@Override
public void dissect(final Parsable<?> parsable, final String inputname) throws DissectionFailure {
final ParsedField field = parsable.getParsableField(INPUT_TYPE, inputname);
final String fieldValue = field.getValue().getString();
if (fieldValue == null || fieldValue.isEmpty()) {
// Nothing to do here
return;
}
String[] allValues = fieldSeparatorPattern.split(fieldValue);
for (String value : allValues) {
int equalPos = value.indexOf('=');
if (equalPos == -1) {
if (!"".equals(value)) {
// Just a name, no value
String theName = value.trim().toLowerCase();
if (wantAllCookies || requestedCookies.contains(theName)) {
parsable.addDissection(inputname, "HTTP.COOKIE", theName, "");
}
}
} else {
String theName = value.substring(0, equalPos).trim().toLowerCase();
if (wantAllCookies || requestedCookies.contains(theName)) {
String theValue = value.substring(equalPos + 1, value.length()).trim();
try {
parsable.addDissection(inputname, "HTTP.COOKIE", theName, Utils.resilientUrlDecode(theValue));
} catch (IllegalArgumentException e) {
// This usually means that there was invalid encoding in the line
throw new DissectionFailure(e.getMessage());
}
}
}
}
}
use of nl.basjes.parse.core.exceptions.DissectionFailure in project logparser by nielsbasjes.
the class TimeStampDissector method dissect.
protected void dissect(ParsedField field, final Parsable<?> parsable, final String inputname) throws DissectionFailure {
String fieldValue = field.getValue().getString();
if (fieldValue == null || fieldValue.isEmpty()) {
// Nothing to do here
return;
}
ZonedDateTime dateTime;
try {
dateTime = parse(fieldValue);
} catch (DateTimeParseException dtpe) {
throw new DissectionFailure(dtpe.getMessage() + "\n 10 20 30 40 50 60 70 80 90 100 110 120" + "\n_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_" + "\n" + fieldValue + "\n\n" + formatter.toString(), dtpe);
}
if (wantAnyTZIndependent) {
// Timezone independent
if (wantTimezone) {
parsable.addDissection(inputname, "TIME.ZONE", "timezone", dateTime.getZone().getDisplayName(TextStyle.FULL, locale));
}
if (wantEpoch) {
parsable.addDissection(inputname, "TIME.EPOCH", "epoch", dateTime.toInstant().toEpochMilli());
}
}
if (wantAnyAsParsed) {
LocalDateTime localDateTime = dateTime.toLocalDateTime();
// As parsed
if (wantDay) {
parsable.addDissection(inputname, "TIME.DAY", "day", localDateTime.getDayOfMonth());
}
if (wantMonthname) {
parsable.addDissection(inputname, "TIME.MONTHNAME", "monthname", localDateTime.getMonth().getDisplayName(TextStyle.FULL, locale));
}
if (wantMonth) {
parsable.addDissection(inputname, "TIME.MONTH", "month", localDateTime.getMonth().getValue());
}
if (wantWeekOfWeekYear) {
parsable.addDissection(inputname, "TIME.WEEK", "weekofweekyear", localDateTime.get(WeekFields.of(locale).weekOfWeekBasedYear()));
}
if (wantWeekYear) {
parsable.addDissection(inputname, "TIME.YEAR", "weekyear", localDateTime.get(WeekFields.of(locale).weekBasedYear()));
}
if (wantYear) {
parsable.addDissection(inputname, "TIME.YEAR", "year", localDateTime.getYear());
}
if (wantHour) {
parsable.addDissection(inputname, "TIME.HOUR", "hour", localDateTime.getHour());
}
if (wantMinute) {
parsable.addDissection(inputname, "TIME.MINUTE", "minute", localDateTime.getMinute());
}
if (wantSecond) {
parsable.addDissection(inputname, "TIME.SECOND", "second", localDateTime.getSecond());
}
if (wantMillisecond) {
parsable.addDissection(inputname, "TIME.MILLISECOND", "millisecond", localDateTime.getNano() / 1000000L);
}
if (wantMicrosecond) {
parsable.addDissection(inputname, "TIME.MICROSECOND", "microsecond", localDateTime.getNano() / 1000L);
}
if (wantNanosecond) {
parsable.addDissection(inputname, "TIME.NANOSECOND", "nanosecond", localDateTime.getNano());
}
if (wantDate) {
parsable.addDissection(inputname, "TIME.DATE", "date", localDateTime.format(ISO_DATE_FORMATTER));
}
if (wantTime) {
parsable.addDissection(inputname, "TIME.TIME", "time", localDateTime.format(ISO_TIME_FORMATTER));
}
}
if (wantAnyUTC) {
// In UTC timezone
ZonedDateTime zonedDateTime = dateTime.withZoneSameInstant(ZoneOffset.UTC);
if (wantDayUTC) {
parsable.addDissection(inputname, "TIME.DAY", "day_utc", zonedDateTime.getDayOfMonth());
}
if (wantMonthnameUTC) {
parsable.addDissection(inputname, "TIME.MONTHNAME", "monthname_utc", zonedDateTime.getMonth().getDisplayName(TextStyle.FULL, locale));
}
if (wantMonthUTC) {
parsable.addDissection(inputname, "TIME.MONTH", "month_utc", zonedDateTime.getMonthValue());
}
if (wantWeekOfWeekYearUTC) {
parsable.addDissection(inputname, "TIME.WEEK", "weekofweekyear_utc", zonedDateTime.get(WeekFields.ISO.weekOfWeekBasedYear()));
}
if (wantWeekYearUTC) {
parsable.addDissection(inputname, "TIME.YEAR", "weekyear_utc", zonedDateTime.get(WeekFields.ISO.weekBasedYear()));
}
if (wantYearUTC) {
parsable.addDissection(inputname, "TIME.YEAR", "year_utc", zonedDateTime.getYear());
}
if (wantHourUTC) {
parsable.addDissection(inputname, "TIME.HOUR", "hour_utc", zonedDateTime.getHour());
}
if (wantMinuteUTC) {
parsable.addDissection(inputname, "TIME.MINUTE", "minute_utc", zonedDateTime.getMinute());
}
if (wantSecondUTC) {
parsable.addDissection(inputname, "TIME.SECOND", "second_utc", zonedDateTime.getSecond());
}
if (wantMillisecondUTC) {
parsable.addDissection(inputname, "TIME.MILLISECOND", "millisecond_utc", zonedDateTime.getNano() / 1000000L);
}
if (wantMicrosecondUTC) {
parsable.addDissection(inputname, "TIME.MICROSECOND", "microsecond_utc", zonedDateTime.getNano() / 1000L);
}
if (wantNanosecondUTC) {
parsable.addDissection(inputname, "TIME.NANOSECOND", "nanosecond_utc", zonedDateTime.getNano());
}
if (wantDateUTC) {
parsable.addDissection(inputname, "TIME.DATE", "date_utc", zonedDateTime.format(ISO_DATE_FORMATTER));
}
if (wantTimeUTC) {
parsable.addDissection(inputname, "TIME.TIME", "time_utc", zonedDateTime.format(ISO_TIME_FORMATTER));
}
}
}
use of nl.basjes.parse.core.exceptions.DissectionFailure in project logparser by nielsbasjes.
the class ApacheHttpdlogDeserializer method deserialize.
@Override
public Object deserialize(Writable writable) throws SerDeException {
if (!(writable instanceof Text)) {
throw new SerDeException("The input MUST be a Text line.");
}
linesInput++;
try {
currentValue.clear();
parser.parse(currentValue, writable.toString());
} catch (DissectionFailure dissectionFailure) {
linesBad++;
if (linesInput >= MINIMAL_FAIL_LINES) {
if (100 * linesBad > MINIMAL_FAIL_PERCENTAGE * linesInput) {
throw new SerDeException("To many bad lines: " + linesBad + " of " + linesInput + " are bad.");
}
}
// Just return that this line is nothing.
return null;
} catch (InvalidDissectorException | MissingDissectorsException e) {
throw new SerDeException("Cannot continue; Fix the Dissectors before retrying", e);
}
for (ColumnToGetterMapping ctgm : columnToGetterMappings) {
switch(ctgm.casts) {
case STRING:
String currentValueString = currentValue.getString(ctgm.fieldValue);
row.set(ctgm.index, currentValueString);
break;
case LONG:
Long currentValueLong = currentValue.getLong(ctgm.fieldValue);
row.set(ctgm.index, currentValueLong);
break;
case DOUBLE:
Double currentValueDouble = currentValue.getDouble(ctgm.fieldValue);
row.set(ctgm.index, currentValueDouble);
break;
default:
}
}
return row;
}
use of nl.basjes.parse.core.exceptions.DissectionFailure in project logparser by nielsbasjes.
the class HttpUriDissector method dissect.
@Override
public void dissect(final Parsable<?> parsable, final String inputname) throws DissectionFailure {
final ParsedField field = parsable.getParsableField(INPUT_TYPE, inputname);
String uriString = field.getValue().getString();
if (uriString == null || uriString.isEmpty()) {
// Nothing to do here
return;
}
// First we cleanup the URI so we fail less often over 'garbage' URIs.
// See: https://stackoverflow.com/questions/11038967/brackets-in-a-request-url-are-legal-but-not-in-a-uri-java
uriString = new String(URLCodec.encodeUrl(BAD_URI_CHARS, uriString.getBytes(UTF_8)), US_ASCII);
// Now we translate any HTML encoded entities/characters into URL UTF-8 encoded characters
uriString = makeHTMLEncodedInert(uriString);
// Before we hand it to the standard parser we hack it around a bit so we can parse
// nasty edge cases that are illegal yet do occur in real clickstreams.
// Also we force the query string to start with ?& so the returned query string starts with &
// Which leads to more consistent output after parsing.
int firstQuestionMark = uriString.indexOf('?');
int firstAmpersand = uriString.indexOf('&');
// to: ?&x=x&y=y&z=z
if (firstAmpersand != -1 || firstQuestionMark != -1) {
uriString = uriString.replaceAll("\\?", "&");
uriString = uriString.replaceFirst("&", "?&");
}
// We find that people muck up the URL by putting % signs in the URLs that are NOT escape sequences
// So any % that is not followed by a two 'hex' letters is fixed
uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1");
uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1");
// We have URIs with fragments like this:
// /path/?_requestid=1234#x3D;12341234&Referrer=blablabla
// So first we repair the broken encoded char
uriString = ALMOST_HTML_ENCODED.matcher(uriString).replaceAll("$1&$2");
uriString = StringEscapeUtils.unescapeHtml4(uriString);
// And we see URIs with this:
// /path/?Referrer=ADV1234#&f=API&subid=#&name=12341234
uriString = EQUALS_HASH_PATTERN.matcher(uriString).replaceAll("=");
uriString = HASH_AMP_PATTERN.matcher(uriString).replaceAll("&");
// If we still have multiple '#' in here we replace them with something else: '~'
while (true) {
Matcher doubleHashMatcher = DOUBLE_HASH_PATTERN.matcher(uriString);
if (!doubleHashMatcher.find()) {
break;
}
uriString = doubleHashMatcher.replaceAll("~$1#");
}
boolean isUrl = true;
URI uri;
try {
if (uriString.charAt(0) == '/') {
uri = URI.create("dummy-protocol://dummy.host.name" + uriString);
// I.e. we do not return the values we just faked.
isUrl = false;
} else {
uri = URI.create(uriString);
}
} catch (IllegalArgumentException e) {
throw new DissectionFailure("Failed to parse URI >>" + field.getValue().getString() + "<< because of : " + e.getMessage());
}
if (wantQuery || wantPath || wantRef) {
if (wantQuery) {
String value = uri.getRawQuery();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.QUERYSTRING", "query", value);
}
}
if (wantPath) {
String value = uri.getPath();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.PATH", "path", value);
}
}
if (wantRef) {
String value = uri.getFragment();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.REF", "ref", value);
}
}
}
if (isUrl) {
if (wantProtocol) {
String value = uri.getScheme();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.PROTOCOL", "protocol", value);
}
}
if (wantUserinfo) {
String value = uri.getUserInfo();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.USERINFO", "userinfo", value);
}
}
if (wantHost) {
String value = uri.getHost();
if (value != null && !value.isEmpty()) {
parsable.addDissection(inputname, "HTTP.HOST", "host", value);
}
}
if (wantPort) {
int value = uri.getPort();
if (value != -1) {
parsable.addDissection(inputname, "HTTP.PORT", "port", value);
}
}
}
}
Aggregations