Search in sources :

Example 1 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class LogAnalytics method run.

@Override
public int run(String[] args) throws Exception {
    DataStore<Long, Pageview> inStore;
    DataStore<String, MetricDatum> outStore;
    Configuration conf = new Configuration();
    if (args.length > 0) {
        String dataStoreClass = args[0];
        inStore = DataStoreFactory.getDataStore(dataStoreClass, Long.class, Pageview.class, conf);
        if (args.length > 1) {
            dataStoreClass = args[1];
        }
        outStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, MetricDatum.class, conf);
    } else {
        inStore = DataStoreFactory.getDataStore(Long.class, Pageview.class, conf);
        outStore = DataStoreFactory.getDataStore(String.class, MetricDatum.class, conf);
    }
    Job job = createJob(inStore, outStore, 3);
    boolean success = job.waitForCompletion(true);
    inStore.close();
    outStore.close();
    log.info("Log completed with {}", (success ? "success" : "failure"));
    return success ? 0 : 1;
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview) Configuration(org.apache.hadoop.conf.Configuration) MetricDatum(org.apache.gora.tutorial.log.generated.MetricDatum) Job(org.apache.hadoop.mapreduce.Job)

Example 2 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class LogManager method parse.

/**
   * Parses a log file and store the contents at the data store.
   * @param input the input file location
   */
private void parse(String input) throws Exception {
    log.info("Parsing file: {}", input);
    long lineCount = 0;
    try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(input), Charset.defaultCharset()))) {
        String line = reader.readLine();
        do {
            Pageview pageview = parseLine(line);
            if (pageview != null) {
                //store the pageview
                storePageview(lineCount++, pageview);
            }
            line = reader.readLine();
        } while (line != null);
    }
    log.info("finished parsing file. Total number of log lines: {}", lineCount);
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileInputStream(java.io.FileInputStream)

Example 3 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class LogManager method parseLine.

/** Parses a single log line in combined log format using StringTokenizers */
private Pageview parseLine(String line) throws ParseException {
    StringTokenizer matcher = new StringTokenizer(line);
    //parse the log line
    String ip = matcher.nextToken();
    //discard
    matcher.nextToken();
    matcher.nextToken();
    long timestamp = dateFormat.parse(matcher.nextToken("]").substring(2)).getTime();
    matcher.nextToken("\"");
    String request = matcher.nextToken("\"");
    String[] requestParts = request.split(" ");
    String httpMethod = requestParts[0];
    String url = requestParts[1];
    matcher.nextToken(" ");
    int httpStatusCode = Integer.parseInt(matcher.nextToken());
    int responseSize = Integer.parseInt(matcher.nextToken());
    matcher.nextToken("\"");
    String referrer = matcher.nextToken("\"");
    matcher.nextToken("\"");
    String userAgent = matcher.nextToken("\"");
    //construct and return pageview object
    Pageview pageview = new Pageview();
    pageview.setIp(new Utf8(ip));
    pageview.setTimestamp(timestamp);
    pageview.setHttpMethod(new Utf8(httpMethod));
    pageview.setUrl(new Utf8(url));
    pageview.setHttpStatusCode(httpStatusCode);
    pageview.setResponseSize(responseSize);
    pageview.setReferrer(new Utf8(referrer));
    pageview.setUserAgent(new Utf8(userAgent));
    return pageview;
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview) StringTokenizer(java.util.StringTokenizer) Utf8(org.apache.avro.util.Utf8)

Example 4 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class LogManager method get.

/** Fetches a single pageview object and prints it*/
private void get(long key) throws Exception {
    Pageview pageview = dataStore.get(key);
    printPageview(pageview);
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview)

Example 5 with Pageview

use of org.apache.gora.tutorial.log.generated.Pageview in project gora by apache.

the class LogManager method printResult.

private void printResult(Result<Long, Pageview> result) throws Exception {
    while (result.next()) {
        //advances the Result object and breaks if at end
        //obtain current key
        long resultKey = result.getKey();
        //obtain current value object
        Pageview resultPageview = result.get();
        log.info("{} :", resultKey);
        printPageview(resultPageview);
    }
    log.info("Number of pageviews from the query: {}", result.getOffset());
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview)

Aggregations

Pageview (org.apache.gora.tutorial.log.generated.Pageview)12 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 InputStreamReader (java.io.InputStreamReader)2 StringTokenizer (java.util.StringTokenizer)2 Utf8 (org.apache.avro.util.Utf8)2 MetricDatum (org.apache.gora.tutorial.log.generated.MetricDatum)2 Configuration (org.apache.hadoop.conf.Configuration)2 GoraSparkEngine (org.apache.gora.spark.GoraSparkEngine)1 Job (org.apache.hadoop.mapreduce.Job)1 SparkConf (org.apache.spark.SparkConf)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1 Tuple2 (scala.Tuple2)1