Search in sources :

Example 1 with MultiWindowDimensionAggregation

use of org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation in project apex-malhar by apache.

the class Application method getMysqlAggregationOper.

private MultiWindowDimensionAggregation getMysqlAggregationOper(String name, DAG dag) {
    MultiWindowDimensionAggregation oper = dag.addOperator(name, MultiWindowDimensionAggregation.class);
    oper.setWindowSize(3);
    List<int[]> dimensionArrayList = new ArrayList<int[]>();
    int[] dimensionArray = { 0 };
    dimensionArrayList.add(dimensionArray);
    oper.setDimensionArray(dimensionArrayList);
    oper.setTimeBucket(TIME_BUCKETS.m.name());
    oper.setDimensionKeyVal("1");
    return oper;
}
Also used : MultiWindowDimensionAggregation(org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation) ArrayList(java.util.ArrayList)

Example 2 with MultiWindowDimensionAggregation

use of org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation in project apex-malhar by apache.

the class Application method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    // set app name
    dag.setAttribute(DAG.APPLICATION_NAME, "Logstream Application");
    dag.setAttribute(DAG.STREAMING_WINDOW_SIZE_MILLIS, 1000);
    ConsoleOutputOperator mysqlConsole = dag.addOperator("MysqlConsole", ConsoleOutputOperator.class);
    ConsoleOutputOperator syslogConsole = dag.addOperator("SyslogConsole", ConsoleOutputOperator.class);
    ConsoleOutputOperator systemConsole = dag.addOperator("SystemlogConsole", ConsoleOutputOperator.class);
    LogScoreOperator logScoreOperator = dag.addOperator("logscore", new LogScoreOperator());
    /*
     * Read log file messages from a messaging system (Redis, RabbitMQ, etc)
     * Typically one message equates to a single line in a log file, but in
     * some cases may be multiple lines such as java stack trace, etc.
     */
    // Get logs from RabbitMQ
    RabbitMQLogsInputOperator apacheLogInput = dag.addOperator("ApacheLogInput", RabbitMQLogsInputOperator.class);
    RabbitMQLogsInputOperator mysqlLogInput = dag.addOperator("MysqlLogInput", RabbitMQLogsInputOperator.class);
    RabbitMQLogsInputOperator syslogLogInput = dag.addOperator("SyslogLogInput", RabbitMQLogsInputOperator.class);
    RabbitMQLogsInputOperator systemLogInput = dag.addOperator("SystemLogInput", RabbitMQLogsInputOperator.class);
    /*
     * Convert incoming JSON structures to flattened map objects
     */
    JsonByteArrayOperator apacheLogJsonToMap = dag.addOperator("ApacheLogJsonToMap", JsonByteArrayOperator.class);
    dag.addStream("apache_convert_type", apacheLogInput.outputPort, apacheLogJsonToMap.input);
    JsonByteArrayOperator mysqlLogJsonToMap = dag.addOperator("MysqlLogJsonToMap", JsonByteArrayOperator.class);
    dag.addStream("mysql_convert_type", mysqlLogInput.outputPort, mysqlLogJsonToMap.input);
    JsonByteArrayOperator syslogLogJsonToMap = dag.addOperator("SyslogLogJsonToMap", JsonByteArrayOperator.class);
    dag.addStream("syslog_convert_type", syslogLogInput.outputPort, syslogLogJsonToMap.input);
    JsonByteArrayOperator systemLogJsonToMap = dag.addOperator("SystemLogJsonToMap", JsonByteArrayOperator.class);
    dag.addStream("system_convert_type", systemLogInput.outputPort, systemLogJsonToMap.input);
    // operator for tuple counter
    Counter apacheLogCounter = dag.addOperator("ApacheLogCounter", new Counter());
    /*
     * operators for sum of bytes
     */
    SumItemFromMapOperator<String, Object> apacheLogBytesSum = dag.addOperator("ApacheLogBytesSum", new SumItemFromMapOperator<String, Object>());
    apacheLogBytesSum.setSumDimension("bytes");
    /*
     * opeartor filter 404 logs
     */
    SelectOperator filter404 = getFilteredMessagesOperator("Filter404", dag);
    /*
     * Explode dimensions based on log types ( apache, mysql, syslog, etc)
     */
    DimensionTimeBucketSumOperator apacheDimensionOperator = getApacheDimensionTimeBucketSumOperator("ApacheLogDimension", dag);
    DimensionTimeBucketSumOperator apacheFilteredDimensionOperator = getFilteredApacheDimensionTimeBucketSumOperator("ApacheFilteredLogDimension", dag);
    dag.addStream("apache_dimension_in", apacheLogJsonToMap.outputFlatMap, apacheDimensionOperator.in, apacheLogCounter.input, apacheLogBytesSum.mapInput, filter404.inport);
    dag.addStream("apache_filtered_dimension_in", filter404.outport, apacheFilteredDimensionOperator.in);
    DimensionTimeBucketSumOperator mysqlDimensionOperator = getMysqlDimensionTimeBucketSumOperator("MysqlLogDimension", dag);
    dag.addStream("mysql_dimension_in", mysqlLogJsonToMap.outputFlatMap, mysqlDimensionOperator.in);
    DimensionTimeBucketSumOperator syslogDimensionOperator = getSyslogDimensionTimeBucketSumOperator("syslogLogDimension", dag);
    dag.addStream("syslog_dimension_in", syslogLogJsonToMap.outputFlatMap, syslogDimensionOperator.in);
    // DimensionTimeBucketSumOperator systemDimensionOperator = getSystemDimensionTimeBucketSumOperator("systemLogDimension", dag);
    // dag.addStream("system_dimension_in", systemLogJsonToMap.outputMap, systemDimensionOperator.in);
    /*
     * Calculate average, min, max, etc from dimensions ( based on log types )
     */
    // aggregating over sliding window
    MultiWindowDimensionAggregation apacheMultiWindowAggCountOpr = getApacheAggregationCountOper("apache_sliding_window_count", dag);
    MultiWindowDimensionAggregation apacheMultiWindowAggSumOpr = getApacheAggregationSumOper("apache_sliding_window_sum", dag);
    dag.addStream("apache_dimension_out", apacheDimensionOperator.out, apacheMultiWindowAggCountOpr.data, apacheMultiWindowAggSumOpr.data);
    MultiWindowDimensionAggregation apacheFilteredMultiWindowAggCountOpr = getFilteredApacheAggregationCountOper("apache_filtered_sliding_window_count", dag);
    dag.addStream("apache_filtered_dimension_out", apacheFilteredDimensionOperator.out, apacheFilteredMultiWindowAggCountOpr.data);
    MultiWindowDimensionAggregation mysqlMultiWindowAggOpr = getMysqlAggregationOper("mysql_sliding_window", dag);
    dag.addStream("mysql_dimension_out", mysqlDimensionOperator.out, mysqlMultiWindowAggOpr.data);
    MultiWindowDimensionAggregation syslogMultiWindowAggOpr = getSyslogAggregationOper("syslog_sliding_window", dag);
    dag.addStream("syslog_dimension_out", syslogDimensionOperator.out, syslogMultiWindowAggOpr.data);
    // MultiWindowDimensionAggregation systemMultiWindowAggOpr = getSystemAggregationOper("system_sliding_window", dag);
    // dag.addStream("system_dimension_out", systemDimensionOperator.out, systemMultiWindowAggOpr.data);
    // adding top N operator
    TopN<String, DimensionObject<String>> apacheTopNCountOpr = dag.addOperator("apache_topN_count", new TopN<String, DimensionObject<String>>());
    apacheTopNCountOpr.setN(10);
    TopN<String, DimensionObject<String>> apacheFilteredTopNCountOpr = dag.addOperator("apache_filtered_topN_count", new TopN<String, DimensionObject<String>>());
    apacheFilteredTopNCountOpr.setN(10);
    TopN<String, DimensionObject<String>> apacheTopNSumOpr = dag.addOperator("apache_topN_sum", new TopN<String, DimensionObject<String>>());
    apacheTopNSumOpr.setN(10);
    TopN<String, DimensionObject<String>> mysqlTopNOpr = dag.addOperator("mysql_topN", new TopN<String, DimensionObject<String>>());
    mysqlTopNOpr.setN(5);
    TopN<String, DimensionObject<String>> syslogTopNOpr = dag.addOperator("syslog_topN", new TopN<String, DimensionObject<String>>());
    syslogTopNOpr.setN(5);
    // TopN<String, DimensionObject<String>> systemTopNOpr = dag.addOperator("system_topN", new TopN<String, DimensionObject<String>>());
    // systemTopNOpr.setN(5);
    /*
     * Analytics Engine
     */
    dag.addStream("ApacheLogScoreCount", apacheMultiWindowAggCountOpr.output, apacheTopNCountOpr.data, logScoreOperator.apacheLogs);
    dag.addStream("ApacheFilteredLogScoreCount", apacheFilteredMultiWindowAggCountOpr.output, apacheFilteredTopNCountOpr.data);
    dag.addStream("ApacheLogScoreSum", apacheMultiWindowAggSumOpr.output, apacheTopNSumOpr.data);
    dag.addStream("MysqlLogScore", mysqlMultiWindowAggOpr.output, mysqlTopNOpr.data, logScoreOperator.mysqlLogs);
    dag.addStream("SyslogLogScore", syslogMultiWindowAggOpr.output, syslogTopNOpr.data, logScoreOperator.syslogLogs);
    // dag.addStream("SystemLogScore", systemLogJsonToMap.outputMap, logScoreOperator.systemLogs);
    /*
     * Alerts
     */
    // TODO
    /*
     * Console output for debugging purposes
     */
    // ConsoleOutputOperator console = dag.addOperator("console", ConsoleOutputOperator.class);
    // dag.addStream("topn_output", apacheTopNOpr.top);entry.getValue()
    /*
     * write to redis for siteops
     */
    // prepare operators to convert output streams to redis output format
    AggregationsToRedisOperator<Integer, Integer> apacheLogCounterToRedis = dag.addOperator("ApacheLogCounterToRedis", new AggregationsToRedisOperator<Integer, Integer>());
    apacheLogCounterToRedis.setKeyIndex(11);
    AggregationsToRedisOperator<String, String> apacheLogBytesSumToRedis = dag.addOperator("ApacheLogBytesSumToRedis", new AggregationsToRedisOperator<String, String>());
    apacheLogBytesSumToRedis.setKeyIndex(9);
    AggregationsToRedisOperator<String, DimensionObject<String>> topNCountToRedis = getApacheTopNToRedisOperatorCountAggregation("topNCountToRedis", dag);
    AggregationsToRedisOperator<String, DimensionObject<String>> filteredTopNCountToRedis = getFilteredApacheTopNToRedisOperatorCountAggregation("filteredTopNCountToRedis", dag);
    AggregationsToRedisOperator<String, DimensionObject<String>> topNSumToRedis = getApacheTopNToRedisOperatorSumAggregation("topNSumToRedis", dag);
    // convert ouputs for redis output operator and send output to web socket output
    dag.addStream("apache_log_counter", apacheLogCounter.output, apacheLogCounterToRedis.valueInput);
    dag.addStream("apache_log_bytes_sum", apacheLogBytesSum.output, apacheLogBytesSumToRedis.valueInput);
    dag.addStream("topn_redis_count", apacheTopNCountOpr.top, topNCountToRedis.multiWindowDimensionInput, wsOutput(dag, "apacheTopAggrs"));
    dag.addStream("topn_redis_sum", apacheTopNSumOpr.top, topNSumToRedis.multiWindowDimensionInput, wsOutput(dag, "apacheTopSumAggrs"));
    dag.addStream("filtered_topn_redis_count", apacheFilteredTopNCountOpr.top, filteredTopNCountToRedis.multiWindowDimensionInput);
    // redis output operators
    RedisKeyValPairOutputOperator<String, String> redisOutTotalCount = dag.addOperator("RedisOutTotalCount", new RedisKeyValPairOutputOperator<String, String>());
    redisOutTotalCount.getStore().setDbIndex(15);
    RedisKeyValPairOutputOperator<String, String> redisOutTotalSumBytes = dag.addOperator("RedisOutTotalSumBytes", new RedisKeyValPairOutputOperator<String, String>());
    redisOutTotalSumBytes.getStore().setDbIndex(15);
    RedisMapOutputOperator<String, String> redisOutTopNCount = dag.addOperator("RedisOutTopNCount", new RedisMapOutputOperator<String, String>());
    redisOutTopNCount.getStore().setDbIndex(15);
    RedisMapOutputOperator<String, String> redisOutTopNSum = dag.addOperator("RedisOutTopNSum", new RedisMapOutputOperator<String, String>());
    redisOutTopNSum.getStore().setDbIndex(15);
    RedisMapOutputOperator<String, String> redisOutFilteredTopNSum = dag.addOperator("RedisOutFilteredTopNSum", new RedisMapOutputOperator<String, String>());
    redisOutFilteredTopNSum.getStore().setDbIndex(15);
    // redis output streams
    dag.addStream("apache_log_counter_to_redis", apacheLogCounterToRedis.keyValPairOutput, redisOutTotalCount.input);
    dag.addStream("apache_log_bytes_sum_to_redis", apacheLogBytesSumToRedis.keyValPairOutput, redisOutTotalSumBytes.input);
    dag.addStream("apache_log_dimension_counter_to_redis", topNCountToRedis.keyValueMapOutput, redisOutTopNCount.input);
    dag.addStream("apache_log_dimension_sum_to_redis", topNSumToRedis.keyValueMapOutput, redisOutTopNSum.input);
    dag.addStream("apache_log_filtered_dimension_count_to_redis", filteredTopNCountToRedis.keyValueMapOutput, redisOutFilteredTopNSum.input);
    /*
     * Websocket output to UI from calculated aggregations
     */
    dag.addStream("MysqlTopAggregations", mysqlTopNOpr.top, wsOutput(dag, "mysqlTopAggrs"), mysqlConsole.input);
    dag.addStream("SyslogTopAggregations", syslogTopNOpr.top, wsOutput(dag, "syslogTopAggrs"), syslogConsole.input);
    dag.addStream("SystemData", systemLogJsonToMap.outputMap, wsOutput(dag, "systemData"), logScoreOperator.systemLogs, systemConsole.input);
}
Also used : JsonByteArrayOperator(org.apache.apex.malhar.lib.stream.JsonByteArrayOperator) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) ConsoleOutputOperator(org.apache.apex.malhar.lib.io.ConsoleOutputOperator) MultiWindowDimensionAggregation(org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation) Counter(org.apache.apex.malhar.lib.stream.Counter) SelectOperator(org.apache.apex.malhar.contrib.misc.streamquery.SelectOperator) DimensionTimeBucketSumOperator(org.apache.apex.malhar.lib.util.DimensionTimeBucketSumOperator) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject)

Example 3 with MultiWindowDimensionAggregation

use of org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation in project apex-malhar by apache.

the class Application method getFilteredApacheAggregationCountOper.

private MultiWindowDimensionAggregation getFilteredApacheAggregationCountOper(String name, DAG dag) {
    MultiWindowDimensionAggregation oper = dag.addOperator(name, MultiWindowDimensionAggregation.class);
    oper.setWindowSize(3);
    List<int[]> dimensionArrayList = new ArrayList<int[]>();
    int[] dimensionArray1 = { 0 };
    int[] dimensionArray2 = { 1 };
    dimensionArrayList.add(dimensionArray1);
    dimensionArrayList.add(dimensionArray2);
    oper.setDimensionArray(dimensionArrayList);
    oper.setTimeBucket(TIME_BUCKETS.m.name());
    // aggregate on count
    oper.setDimensionKeyVal("0");
    // 1 sec window
    oper.setWindowSize(2);
    return oper;
}
Also used : MultiWindowDimensionAggregation(org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation) ArrayList(java.util.ArrayList)

Example 4 with MultiWindowDimensionAggregation

use of org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation in project apex-malhar by apache.

the class Application method getSyslogAggregationOper.

private MultiWindowDimensionAggregation getSyslogAggregationOper(String name, DAG dag) {
    MultiWindowDimensionAggregation oper = dag.addOperator(name, MultiWindowDimensionAggregation.class);
    oper.setWindowSize(3);
    List<int[]> dimensionArrayList = new ArrayList<int[]>();
    int[] dimensionArray = { 0 };
    dimensionArrayList.add(dimensionArray);
    oper.setDimensionArray(dimensionArrayList);
    oper.setTimeBucket(TIME_BUCKETS.m.name());
    oper.setDimensionKeyVal("1");
    return oper;
}
Also used : MultiWindowDimensionAggregation(org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation) ArrayList(java.util.ArrayList)

Example 5 with MultiWindowDimensionAggregation

use of org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation in project apex-malhar by apache.

the class Application method getApacheAggregationCountOper.

private MultiWindowDimensionAggregation getApacheAggregationCountOper(String name, DAG dag) {
    MultiWindowDimensionAggregation oper = dag.addOperator(name, MultiWindowDimensionAggregation.class);
    oper.setWindowSize(3);
    List<int[]> dimensionArrayList = new ArrayList<int[]>();
    int[] dimensionArray1 = { 2 };
    int[] dimensionArray2 = { 1 };
    int[] dimensionArray3 = { 1, 2 };
    int[] dimensionArray4 = { 7, 5, 6 };
    int[] dimensionArray5 = { 2, 5 };
    int[] dimensionArray6 = { 7, 6 };
    int[] dimensionArray7 = { 6 };
    int[] dimensionArray8 = { 7 };
    int[] dimensionArray9 = { 0 };
    int[] dimensionArray10 = { 3, 2 };
    int[] dimensioArray11 = { 3, 0 };
    int[] dimensioArray12 = { 3 };
    dimensionArrayList.add(dimensionArray1);
    dimensionArrayList.add(dimensionArray2);
    dimensionArrayList.add(dimensionArray3);
    dimensionArrayList.add(dimensionArray4);
    dimensionArrayList.add(dimensionArray5);
    dimensionArrayList.add(dimensionArray6);
    dimensionArrayList.add(dimensionArray7);
    dimensionArrayList.add(dimensionArray8);
    dimensionArrayList.add(dimensionArray9);
    dimensionArrayList.add(dimensionArray10);
    dimensionArrayList.add(dimensioArray11);
    dimensionArrayList.add(dimensioArray12);
    oper.setDimensionArray(dimensionArrayList);
    oper.setTimeBucket(TIME_BUCKETS.m.name());
    // aggregate on count
    oper.setDimensionKeyVal("0");
    // 1 sec window
    oper.setWindowSize(2);
    return oper;
}
Also used : MultiWindowDimensionAggregation(org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation) ArrayList(java.util.ArrayList)

Aggregations

MultiWindowDimensionAggregation (org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation)7 ArrayList (java.util.ArrayList)6 SelectOperator (org.apache.apex.malhar.contrib.misc.streamquery.SelectOperator)1 ConsoleOutputOperator (org.apache.apex.malhar.lib.io.ConsoleOutputOperator)1 DimensionObject (org.apache.apex.malhar.lib.logs.DimensionObject)1 Counter (org.apache.apex.malhar.lib.stream.Counter)1 JsonByteArrayOperator (org.apache.apex.malhar.lib.stream.JsonByteArrayOperator)1 DimensionTimeBucketSumOperator (org.apache.apex.malhar.lib.util.DimensionTimeBucketSumOperator)1