use of com.maxmind.geoip2.model.Omni in project druid by druid-io.
the class WikipediaIrcDecoder method decodeMessage.
@Override
public InputRow decodeMessage(final DateTime timestamp, String channel, String msg) {
final Map<String, String> dimensions = Maps.newHashMap();
final Map<String, Float> metrics = Maps.newHashMap();
Matcher m = pattern.matcher(msg);
if (!m.matches()) {
throw new IllegalArgumentException("Invalid input format");
}
Matcher shortname = shortnamePattern.matcher(channel);
if (shortname.matches()) {
dimensions.put("language", shortname.group(1));
}
String page = m.group(1);
String pageUrl = page.replaceAll("\\s", "_");
dimensions.put("page", pageUrl);
String user = m.group(4);
Matcher ipMatch = ipPattern.matcher(user);
boolean anonymous = ipMatch.matches();
if (anonymous) {
try {
final InetAddress ip = InetAddress.getByName(ipMatch.group());
final Omni lookup = geoLookup.omni(ip);
dimensions.put("continent", lookup.getContinent().getName());
dimensions.put("country", lookup.getCountry().getName());
dimensions.put("region", lookup.getMostSpecificSubdivision().getName());
dimensions.put("city", lookup.getCity().getName());
} catch (UnknownHostException e) {
log.error(e, "invalid ip [%s]", ipMatch.group());
} catch (IOException e) {
log.error(e, "error looking up geo ip");
} catch (GeoIp2Exception e) {
log.error(e, "error looking up geo ip");
}
}
dimensions.put("user", user);
final String flags = m.group(2);
dimensions.put("unpatrolled", Boolean.toString(flags.contains("!")));
dimensions.put("newPage", Boolean.toString(flags.contains("N")));
dimensions.put("robot", Boolean.toString(flags.contains("B")));
dimensions.put("anonymous", Boolean.toString(anonymous));
String[] parts = page.split(":");
if (parts.length > 1 && !parts[1].startsWith(" ")) {
Map<String, String> channelNamespaces = namespaces.get(channel);
if (channelNamespaces != null && channelNamespaces.containsKey(parts[0])) {
dimensions.put("namespace", channelNamespaces.get(parts[0]));
} else {
dimensions.put("namespace", "wikipedia");
}
} else {
dimensions.put("namespace", "article");
}
float delta = m.group(6) != null ? Float.parseFloat(m.group(6)) : 0;
metrics.put("delta", delta);
metrics.put("added", Math.max(delta, 0));
metrics.put("deleted", Math.min(delta, 0));
return new InputRow() {
@Override
public List<String> getDimensions() {
return dimensionList;
}
@Override
public long getTimestampFromEpoch() {
return timestamp.getMillis();
}
@Override
public DateTime getTimestamp() {
return timestamp;
}
@Override
public List<String> getDimension(String dimension) {
final String value = dimensions.get(dimension);
if (value != null) {
return ImmutableList.of(value);
} else {
return ImmutableList.of();
}
}
@Override
public Object getRaw(String dimension) {
return dimensions.get(dimension);
}
@Override
public float getFloatMetric(String metric) {
return metrics.get(metric);
}
@Override
public long getLongMetric(String metric) {
return new Float(metrics.get(metric)).longValue();
}
@Override
public int compareTo(Row o) {
return timestamp.compareTo(o.getTimestamp());
}
@Override
public String toString() {
return "WikipediaRow{" + "timestamp=" + timestamp + ", dimensions=" + dimensions + ", metrics=" + metrics + '}';
}
};
}
Aggregations