Search in sources :

Example 6 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class DistributedLogManager method printResult.

private void printResult(Result<Long, Pageview> result) throws Exception {
    while (result.next()) {
        //advances the Result object and breaks if at end
        //obtain current key
        long resultKey = result.getKey();
        //obtain current value object
        Pageview resultPageview = result.get();
        log.info("{} :", resultKey);
        printPageview(resultPageview);
    }
    log.info("Number of pageviews from the query: {}", result.getOffset());
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview)

Example 7 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class DistributedLogManager method get.

/**
   * Fetches a single pageview object with required fields and prints it
   */
private void get(long key, String[] fields) throws Exception {
    Pageview pageview = cacheStore.get(key, fields);
    printPageview(pageview);
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview)

Example 8 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class DistributedLogManager method get.

/**
   * Fetches a single pageview object and prints it
   */
private void get(long key, boolean isCacheEnabled) throws Exception {
    if (!isCacheEnabled) {
        Pageview pageview = dataStore.get(key);
        printPageview(pageview);
    } else {
        Pageview pageview = cacheStore.get(key);
        printPageview(pageview);
    }
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview)

Example 9 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class DistributedLogManager method parseLine.

/**
   * Parses a single log line in combined log format using StringTokenizers
   */
private Pageview parseLine(String line) throws ParseException {
    StringTokenizer matcher = new StringTokenizer(line);
    //parse the log line
    String ip = matcher.nextToken();
    //discard
    matcher.nextToken();
    matcher.nextToken();
    long timestamp = dateFormat.parse(matcher.nextToken("]").substring(2)).getTime();
    matcher.nextToken("\"");
    String request = matcher.nextToken("\"");
    String[] requestParts = request.split(" ");
    String httpMethod = requestParts[0];
    String url = requestParts[1];
    matcher.nextToken(" ");
    int httpStatusCode = Integer.parseInt(matcher.nextToken());
    int responseSize = Integer.parseInt(matcher.nextToken());
    matcher.nextToken("\"");
    String referrer = matcher.nextToken("\"");
    matcher.nextToken("\"");
    String userAgent = matcher.nextToken("\"");
    //construct and return pageview object
    Pageview pageview = new Pageview();
    pageview.setIp(new Utf8(ip));
    pageview.setTimestamp(timestamp);
    pageview.setHttpMethod(new Utf8(httpMethod));
    pageview.setUrl(new Utf8(url));
    pageview.setHttpStatusCode(httpStatusCode);
    pageview.setResponseSize(responseSize);
    pageview.setReferrer(new Utf8(referrer));
    pageview.setUserAgent(new Utf8(userAgent));
    return pageview;
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview) StringTokenizer(java.util.StringTokenizer) Utf8(org.apache.avro.util.Utf8)

Example 10 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class DistributedLogManager method parse.

/**
   * Parses a log file and store the contents at the data store.
   *
   * @param input the input file location
   */
private void parse(String input, boolean isCacheEnabled) throws Exception {
    log.info("Parsing file: {}", input);
    long lineCount = 0;
    try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(input), Charset.defaultCharset()))) {
        String line = reader.readLine();
        do {
            Pageview pageview = parseLine(line);
            if (pageview != null) {
                //store the pageview
                storePageview(lineCount++, pageview, isCacheEnabled);
            }
            line = reader.readLine();
        } while (line != null);
    }
    log.info("finished parsing file. Total number of log lines: {}", lineCount);
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileInputStream(java.io.FileInputStream)

Aggregations

Pageview (org.apache.gora.tutorial.log.generated.Pageview)12 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 InputStreamReader (java.io.InputStreamReader)2 StringTokenizer (java.util.StringTokenizer)2 Utf8 (org.apache.avro.util.Utf8)2 MetricDatum (org.apache.gora.tutorial.log.generated.MetricDatum)2 Configuration (org.apache.hadoop.conf.Configuration)2 GoraSparkEngine (org.apache.gora.spark.GoraSparkEngine)1 Job (org.apache.hadoop.mapreduce.Job)1 SparkConf (org.apache.spark.SparkConf)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1 Tuple2 (scala.Tuple2)1