Search in sources :

Example 1 with Omni

use of com.maxmind.geoip2.model.Omni in project druid by druid-io.

the class WikipediaIrcDecoder method decodeMessage.

@Override
public InputRow decodeMessage(final DateTime timestamp, String channel, String msg) {
    final Map<String, String> dimensions = Maps.newHashMap();
    final Map<String, Float> metrics = Maps.newHashMap();
    Matcher m = pattern.matcher(msg);
    if (!m.matches()) {
        throw new IllegalArgumentException("Invalid input format");
    }
    Matcher shortname = shortnamePattern.matcher(channel);
    if (shortname.matches()) {
        dimensions.put("language", shortname.group(1));
    }
    String page = m.group(1);
    String pageUrl = page.replaceAll("\\s", "_");
    dimensions.put("page", pageUrl);
    String user = m.group(4);
    Matcher ipMatch = ipPattern.matcher(user);
    boolean anonymous = ipMatch.matches();
    if (anonymous) {
        try {
            final InetAddress ip = InetAddress.getByName(ipMatch.group());
            final Omni lookup = geoLookup.omni(ip);
            dimensions.put("continent", lookup.getContinent().getName());
            dimensions.put("country", lookup.getCountry().getName());
            dimensions.put("region", lookup.getMostSpecificSubdivision().getName());
            dimensions.put("city", lookup.getCity().getName());
        } catch (UnknownHostException e) {
            log.error(e, "invalid ip [%s]", ipMatch.group());
        } catch (IOException e) {
            log.error(e, "error looking up geo ip");
        } catch (GeoIp2Exception e) {
            log.error(e, "error looking up geo ip");
        }
    }
    dimensions.put("user", user);
    final String flags = m.group(2);
    dimensions.put("unpatrolled", Boolean.toString(flags.contains("!")));
    dimensions.put("newPage", Boolean.toString(flags.contains("N")));
    dimensions.put("robot", Boolean.toString(flags.contains("B")));
    dimensions.put("anonymous", Boolean.toString(anonymous));
    String[] parts = page.split(":");
    if (parts.length > 1 && !parts[1].startsWith(" ")) {
        Map<String, String> channelNamespaces = namespaces.get(channel);
        if (channelNamespaces != null && channelNamespaces.containsKey(parts[0])) {
            dimensions.put("namespace", channelNamespaces.get(parts[0]));
        } else {
            dimensions.put("namespace", "wikipedia");
        }
    } else {
        dimensions.put("namespace", "article");
    }
    float delta = m.group(6) != null ? Float.parseFloat(m.group(6)) : 0;
    metrics.put("delta", delta);
    metrics.put("added", Math.max(delta, 0));
    metrics.put("deleted", Math.min(delta, 0));
    return new InputRow() {

        @Override
        public List<String> getDimensions() {
            return dimensionList;
        }

        @Override
        public long getTimestampFromEpoch() {
            return timestamp.getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return timestamp;
        }

        @Override
        public List<String> getDimension(String dimension) {
            final String value = dimensions.get(dimension);
            if (value != null) {
                return ImmutableList.of(value);
            } else {
                return ImmutableList.of();
            }
        }

        @Override
        public Object getRaw(String dimension) {
            return dimensions.get(dimension);
        }

        @Override
        public float getFloatMetric(String metric) {
            return metrics.get(metric);
        }

        @Override
        public long getLongMetric(String metric) {
            return new Float(metrics.get(metric)).longValue();
        }

        @Override
        public int compareTo(Row o) {
            return timestamp.compareTo(o.getTimestamp());
        }

        @Override
        public String toString() {
            return "WikipediaRow{" + "timestamp=" + timestamp + ", dimensions=" + dimensions + ", metrics=" + metrics + '}';
        }
    };
}
Also used : UnknownHostException(java.net.UnknownHostException) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) GeoIp2Exception(com.maxmind.geoip2.exception.GeoIp2Exception) InputRow(io.druid.data.input.InputRow) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) Omni(com.maxmind.geoip2.model.Omni) InetAddress(java.net.InetAddress)

Aggregations

GeoIp2Exception (com.maxmind.geoip2.exception.GeoIp2Exception)1 Omni (com.maxmind.geoip2.model.Omni)1 InputRow (io.druid.data.input.InputRow)1 Row (io.druid.data.input.Row)1 IOException (java.io.IOException)1 InetAddress (java.net.InetAddress)1 UnknownHostException (java.net.UnknownHostException)1 Matcher (java.util.regex.Matcher)1