Search in sources :

Example 1 with DimensionObject

use of org.apache.apex.malhar.lib.logs.DimensionObject in project apex-malhar by apache.

the class Application method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    // set app name
    dag.setAttribute(DAG.APPLICATION_NAME, "Logstream Application");
    dag.setAttribute(DAG.STREAMING_WINDOW_SIZE_MILLIS, 1000);
    ConsoleOutputOperator mysqlConsole = dag.addOperator("MysqlConsole", ConsoleOutputOperator.class);
    ConsoleOutputOperator syslogConsole = dag.addOperator("SyslogConsole", ConsoleOutputOperator.class);
    ConsoleOutputOperator systemConsole = dag.addOperator("SystemlogConsole", ConsoleOutputOperator.class);
    LogScoreOperator logScoreOperator = dag.addOperator("logscore", new LogScoreOperator());
    /*
     * Read log file messages from a messaging system (Redis, RabbitMQ, etc)
     * Typically one message equates to a single line in a log file, but in
     * some cases may be multiple lines such as java stack trace, etc.
     */
    // Get logs from RabbitMQ
    RabbitMQLogsInputOperator apacheLogInput = dag.addOperator("ApacheLogInput", RabbitMQLogsInputOperator.class);
    RabbitMQLogsInputOperator mysqlLogInput = dag.addOperator("MysqlLogInput", RabbitMQLogsInputOperator.class);
    RabbitMQLogsInputOperator syslogLogInput = dag.addOperator("SyslogLogInput", RabbitMQLogsInputOperator.class);
    RabbitMQLogsInputOperator systemLogInput = dag.addOperator("SystemLogInput", RabbitMQLogsInputOperator.class);
    /*
     * Convert incoming JSON structures to flattened map objects
     */
    JsonByteArrayOperator apacheLogJsonToMap = dag.addOperator("ApacheLogJsonToMap", JsonByteArrayOperator.class);
    dag.addStream("apache_convert_type", apacheLogInput.outputPort, apacheLogJsonToMap.input);
    JsonByteArrayOperator mysqlLogJsonToMap = dag.addOperator("MysqlLogJsonToMap", JsonByteArrayOperator.class);
    dag.addStream("mysql_convert_type", mysqlLogInput.outputPort, mysqlLogJsonToMap.input);
    JsonByteArrayOperator syslogLogJsonToMap = dag.addOperator("SyslogLogJsonToMap", JsonByteArrayOperator.class);
    dag.addStream("syslog_convert_type", syslogLogInput.outputPort, syslogLogJsonToMap.input);
    JsonByteArrayOperator systemLogJsonToMap = dag.addOperator("SystemLogJsonToMap", JsonByteArrayOperator.class);
    dag.addStream("system_convert_type", systemLogInput.outputPort, systemLogJsonToMap.input);
    // operator for tuple counter
    Counter apacheLogCounter = dag.addOperator("ApacheLogCounter", new Counter());
    /*
     * operators for sum of bytes
     */
    SumItemFromMapOperator<String, Object> apacheLogBytesSum = dag.addOperator("ApacheLogBytesSum", new SumItemFromMapOperator<String, Object>());
    apacheLogBytesSum.setSumDimension("bytes");
    /*
     * opeartor filter 404 logs
     */
    SelectOperator filter404 = getFilteredMessagesOperator("Filter404", dag);
    /*
     * Explode dimensions based on log types ( apache, mysql, syslog, etc)
     */
    DimensionTimeBucketSumOperator apacheDimensionOperator = getApacheDimensionTimeBucketSumOperator("ApacheLogDimension", dag);
    DimensionTimeBucketSumOperator apacheFilteredDimensionOperator = getFilteredApacheDimensionTimeBucketSumOperator("ApacheFilteredLogDimension", dag);
    dag.addStream("apache_dimension_in", apacheLogJsonToMap.outputFlatMap, apacheDimensionOperator.in, apacheLogCounter.input, apacheLogBytesSum.mapInput, filter404.inport);
    dag.addStream("apache_filtered_dimension_in", filter404.outport, apacheFilteredDimensionOperator.in);
    DimensionTimeBucketSumOperator mysqlDimensionOperator = getMysqlDimensionTimeBucketSumOperator("MysqlLogDimension", dag);
    dag.addStream("mysql_dimension_in", mysqlLogJsonToMap.outputFlatMap, mysqlDimensionOperator.in);
    DimensionTimeBucketSumOperator syslogDimensionOperator = getSyslogDimensionTimeBucketSumOperator("syslogLogDimension", dag);
    dag.addStream("syslog_dimension_in", syslogLogJsonToMap.outputFlatMap, syslogDimensionOperator.in);
    // DimensionTimeBucketSumOperator systemDimensionOperator = getSystemDimensionTimeBucketSumOperator("systemLogDimension", dag);
    // dag.addStream("system_dimension_in", systemLogJsonToMap.outputMap, systemDimensionOperator.in);
    /*
     * Calculate average, min, max, etc from dimensions ( based on log types )
     */
    // aggregating over sliding window
    MultiWindowDimensionAggregation apacheMultiWindowAggCountOpr = getApacheAggregationCountOper("apache_sliding_window_count", dag);
    MultiWindowDimensionAggregation apacheMultiWindowAggSumOpr = getApacheAggregationSumOper("apache_sliding_window_sum", dag);
    dag.addStream("apache_dimension_out", apacheDimensionOperator.out, apacheMultiWindowAggCountOpr.data, apacheMultiWindowAggSumOpr.data);
    MultiWindowDimensionAggregation apacheFilteredMultiWindowAggCountOpr = getFilteredApacheAggregationCountOper("apache_filtered_sliding_window_count", dag);
    dag.addStream("apache_filtered_dimension_out", apacheFilteredDimensionOperator.out, apacheFilteredMultiWindowAggCountOpr.data);
    MultiWindowDimensionAggregation mysqlMultiWindowAggOpr = getMysqlAggregationOper("mysql_sliding_window", dag);
    dag.addStream("mysql_dimension_out", mysqlDimensionOperator.out, mysqlMultiWindowAggOpr.data);
    MultiWindowDimensionAggregation syslogMultiWindowAggOpr = getSyslogAggregationOper("syslog_sliding_window", dag);
    dag.addStream("syslog_dimension_out", syslogDimensionOperator.out, syslogMultiWindowAggOpr.data);
    // MultiWindowDimensionAggregation systemMultiWindowAggOpr = getSystemAggregationOper("system_sliding_window", dag);
    // dag.addStream("system_dimension_out", systemDimensionOperator.out, systemMultiWindowAggOpr.data);
    // adding top N operator
    TopN<String, DimensionObject<String>> apacheTopNCountOpr = dag.addOperator("apache_topN_count", new TopN<String, DimensionObject<String>>());
    apacheTopNCountOpr.setN(10);
    TopN<String, DimensionObject<String>> apacheFilteredTopNCountOpr = dag.addOperator("apache_filtered_topN_count", new TopN<String, DimensionObject<String>>());
    apacheFilteredTopNCountOpr.setN(10);
    TopN<String, DimensionObject<String>> apacheTopNSumOpr = dag.addOperator("apache_topN_sum", new TopN<String, DimensionObject<String>>());
    apacheTopNSumOpr.setN(10);
    TopN<String, DimensionObject<String>> mysqlTopNOpr = dag.addOperator("mysql_topN", new TopN<String, DimensionObject<String>>());
    mysqlTopNOpr.setN(5);
    TopN<String, DimensionObject<String>> syslogTopNOpr = dag.addOperator("syslog_topN", new TopN<String, DimensionObject<String>>());
    syslogTopNOpr.setN(5);
    // TopN<String, DimensionObject<String>> systemTopNOpr = dag.addOperator("system_topN", new TopN<String, DimensionObject<String>>());
    // systemTopNOpr.setN(5);
    /*
     * Analytics Engine
     */
    dag.addStream("ApacheLogScoreCount", apacheMultiWindowAggCountOpr.output, apacheTopNCountOpr.data, logScoreOperator.apacheLogs);
    dag.addStream("ApacheFilteredLogScoreCount", apacheFilteredMultiWindowAggCountOpr.output, apacheFilteredTopNCountOpr.data);
    dag.addStream("ApacheLogScoreSum", apacheMultiWindowAggSumOpr.output, apacheTopNSumOpr.data);
    dag.addStream("MysqlLogScore", mysqlMultiWindowAggOpr.output, mysqlTopNOpr.data, logScoreOperator.mysqlLogs);
    dag.addStream("SyslogLogScore", syslogMultiWindowAggOpr.output, syslogTopNOpr.data, logScoreOperator.syslogLogs);
    // dag.addStream("SystemLogScore", systemLogJsonToMap.outputMap, logScoreOperator.systemLogs);
    /*
     * Alerts
     */
    // TODO
    /*
     * Console output for debugging purposes
     */
    // ConsoleOutputOperator console = dag.addOperator("console", ConsoleOutputOperator.class);
    // dag.addStream("topn_output", apacheTopNOpr.top);entry.getValue()
    /*
     * write to redis for siteops
     */
    // prepare operators to convert output streams to redis output format
    AggregationsToRedisOperator<Integer, Integer> apacheLogCounterToRedis = dag.addOperator("ApacheLogCounterToRedis", new AggregationsToRedisOperator<Integer, Integer>());
    apacheLogCounterToRedis.setKeyIndex(11);
    AggregationsToRedisOperator<String, String> apacheLogBytesSumToRedis = dag.addOperator("ApacheLogBytesSumToRedis", new AggregationsToRedisOperator<String, String>());
    apacheLogBytesSumToRedis.setKeyIndex(9);
    AggregationsToRedisOperator<String, DimensionObject<String>> topNCountToRedis = getApacheTopNToRedisOperatorCountAggregation("topNCountToRedis", dag);
    AggregationsToRedisOperator<String, DimensionObject<String>> filteredTopNCountToRedis = getFilteredApacheTopNToRedisOperatorCountAggregation("filteredTopNCountToRedis", dag);
    AggregationsToRedisOperator<String, DimensionObject<String>> topNSumToRedis = getApacheTopNToRedisOperatorSumAggregation("topNSumToRedis", dag);
    // convert ouputs for redis output operator and send output to web socket output
    dag.addStream("apache_log_counter", apacheLogCounter.output, apacheLogCounterToRedis.valueInput);
    dag.addStream("apache_log_bytes_sum", apacheLogBytesSum.output, apacheLogBytesSumToRedis.valueInput);
    dag.addStream("topn_redis_count", apacheTopNCountOpr.top, topNCountToRedis.multiWindowDimensionInput, wsOutput(dag, "apacheTopAggrs"));
    dag.addStream("topn_redis_sum", apacheTopNSumOpr.top, topNSumToRedis.multiWindowDimensionInput, wsOutput(dag, "apacheTopSumAggrs"));
    dag.addStream("filtered_topn_redis_count", apacheFilteredTopNCountOpr.top, filteredTopNCountToRedis.multiWindowDimensionInput);
    // redis output operators
    RedisKeyValPairOutputOperator<String, String> redisOutTotalCount = dag.addOperator("RedisOutTotalCount", new RedisKeyValPairOutputOperator<String, String>());
    redisOutTotalCount.getStore().setDbIndex(15);
    RedisKeyValPairOutputOperator<String, String> redisOutTotalSumBytes = dag.addOperator("RedisOutTotalSumBytes", new RedisKeyValPairOutputOperator<String, String>());
    redisOutTotalSumBytes.getStore().setDbIndex(15);
    RedisMapOutputOperator<String, String> redisOutTopNCount = dag.addOperator("RedisOutTopNCount", new RedisMapOutputOperator<String, String>());
    redisOutTopNCount.getStore().setDbIndex(15);
    RedisMapOutputOperator<String, String> redisOutTopNSum = dag.addOperator("RedisOutTopNSum", new RedisMapOutputOperator<String, String>());
    redisOutTopNSum.getStore().setDbIndex(15);
    RedisMapOutputOperator<String, String> redisOutFilteredTopNSum = dag.addOperator("RedisOutFilteredTopNSum", new RedisMapOutputOperator<String, String>());
    redisOutFilteredTopNSum.getStore().setDbIndex(15);
    // redis output streams
    dag.addStream("apache_log_counter_to_redis", apacheLogCounterToRedis.keyValPairOutput, redisOutTotalCount.input);
    dag.addStream("apache_log_bytes_sum_to_redis", apacheLogBytesSumToRedis.keyValPairOutput, redisOutTotalSumBytes.input);
    dag.addStream("apache_log_dimension_counter_to_redis", topNCountToRedis.keyValueMapOutput, redisOutTopNCount.input);
    dag.addStream("apache_log_dimension_sum_to_redis", topNSumToRedis.keyValueMapOutput, redisOutTopNSum.input);
    dag.addStream("apache_log_filtered_dimension_count_to_redis", filteredTopNCountToRedis.keyValueMapOutput, redisOutFilteredTopNSum.input);
    /*
     * Websocket output to UI from calculated aggregations
     */
    dag.addStream("MysqlTopAggregations", mysqlTopNOpr.top, wsOutput(dag, "mysqlTopAggrs"), mysqlConsole.input);
    dag.addStream("SyslogTopAggregations", syslogTopNOpr.top, wsOutput(dag, "syslogTopAggrs"), syslogConsole.input);
    dag.addStream("SystemData", systemLogJsonToMap.outputMap, wsOutput(dag, "systemData"), logScoreOperator.systemLogs, systemConsole.input);
}
Also used : JsonByteArrayOperator(org.apache.apex.malhar.lib.stream.JsonByteArrayOperator) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) ConsoleOutputOperator(org.apache.apex.malhar.lib.io.ConsoleOutputOperator) MultiWindowDimensionAggregation(org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation) Counter(org.apache.apex.malhar.lib.stream.Counter) SelectOperator(org.apache.apex.malhar.contrib.misc.streamquery.SelectOperator) DimensionTimeBucketSumOperator(org.apache.apex.malhar.lib.util.DimensionTimeBucketSumOperator) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject)

Example 2 with DimensionObject

use of org.apache.apex.malhar.lib.logs.DimensionObject in project apex-malhar by apache.

the class DimensionOperatorUnifierTest method testOperator.

@Test
@SuppressWarnings("unchecked")
public void testOperator() {
    DimensionOperatorUnifier unifier = new DimensionOperatorUnifier();
    CollectorTestSink sink = new CollectorTestSink();
    unifier.aggregationsOutput.setSink(sink);
    unifier.beginWindow(1);
    Map<String, DimensionObject<String>> tuple1 = new HashMap<String, DimensionObject<String>>();
    tuple1.put("m|201402121900|0|65537|131074|bytes.AVERAGE", new DimensionObject<String>(new MutableDouble(75), "a"));
    tuple1.put("m|201402121900|0|65537|131074|bytes.COUNT", new DimensionObject<String>(new MutableDouble(3.0), "a"));
    tuple1.put("m|201402121900|0|65537|131074|bytes.SUM", new DimensionObject<String>(new MutableDouble(225), "a"));
    Map<String, DimensionObject<String>> tuple2 = new HashMap<String, DimensionObject<String>>();
    tuple2.put("m|201402121900|0|65537|131074|bytes.AVERAGE", new DimensionObject<String>(new MutableDouble(50), "a"));
    tuple2.put("m|201402121900|0|65537|131074|bytes.COUNT", new DimensionObject<String>(new MutableDouble(2.0), "a"));
    tuple2.put("m|201402121900|0|65537|131074|bytes.SUM", new DimensionObject<String>(new MutableDouble(100), "a"));
    Map<String, DimensionObject<String>> tuple3 = new HashMap<String, DimensionObject<String>>();
    tuple3.put("m|201402121900|0|65537|131074|bytes.AVERAGE", new DimensionObject<String>(new MutableDouble(50), "z"));
    tuple3.put("m|201402121900|0|65537|131074|bytes.COUNT", new DimensionObject<String>(new MutableDouble(2.0), "z"));
    tuple3.put("m|201402121900|0|65537|131074|bytes.SUM", new DimensionObject<String>(new MutableDouble(100), "z"));
    Map<String, DimensionObject<String>> tuple4 = new HashMap<String, DimensionObject<String>>();
    tuple4.put("m|201402121900|0|65537|131075|bytes.AVERAGE", new DimensionObject<String>(new MutableDouble(14290.5), "b"));
    tuple4.put("m|201402121900|0|65537|131075|bytes.COUNT", new DimensionObject<String>(new MutableDouble(2.0), "b"));
    tuple4.put("m|201402121900|0|65537|131075|bytes.SUM", new DimensionObject<String>(new MutableDouble(28581.0), "b"));
    Map<String, DimensionObject<String>> tuple5 = new HashMap<String, DimensionObject<String>>();
    tuple5.put("m|201402121900|0|65537|131076|bytes.AVERAGE", new DimensionObject<String>(new MutableDouble(290.75), "c"));
    tuple5.put("m|201402121900|0|65537|131076|bytes.COUNT", new DimensionObject<String>(new MutableDouble(10.0), "c"));
    tuple5.put("m|201402121900|0|65537|131076|bytes.SUM", new DimensionObject<String>(new MutableDouble(8581.0), "c"));
    unifier.process(tuple1);
    unifier.process(tuple2);
    unifier.process(tuple3);
    unifier.process(tuple4);
    unifier.process(tuple5);
    unifier.endWindow();
    @SuppressWarnings("unchecked") List<Map<String, DimensionObject<String>>> tuples = sink.collectedTuples;
    Assert.assertEquals("Tuple Count", 4, tuples.size());
    for (Map<String, DimensionObject<String>> map : tuples) {
        for (Entry<String, DimensionObject<String>> entry : map.entrySet()) {
            String key = entry.getKey();
            DimensionObject<String> dimObj = entry.getValue();
            if (key.equals("m|201402121900|0|65537|131074|bytes.AVERAGE") && dimObj.getVal().equals("a")) {
                Assert.assertEquals("average for key " + key + " and dimension key " + "a", new MutableDouble(65), dimObj.getCount());
            }
            if (key.equals("m|201402121900|0|65537|131074|bytes.SUM") && dimObj.getVal().equals("z")) {
                Assert.assertEquals("sum for key " + key + " and dimension key " + "z", new MutableDouble(100), dimObj.getCount());
            }
            if (key.equals("m|201402121900|0|65537|131076|bytes.COUNT") && dimObj.getVal().equals("c")) {
                Assert.assertEquals("count for key " + key + " and dimension key " + "c", new MutableDouble(10), dimObj.getCount());
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) MutableDouble(org.apache.commons.lang.mutable.MutableDouble) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) Map(java.util.Map) HashMap(java.util.HashMap) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 3 with DimensionObject

use of org.apache.apex.malhar.lib.logs.DimensionObject in project apex-malhar by apache.

the class DimensionOperator method endWindow.

@Override
public void endWindow() {
    if (outTimeBuckets == null || outTimeBuckets.isEmpty()) {
        return;
    }
    long time = LogstreamUtil.extractTime(currentWindowId, windowWidth);
    // get time buckets for current window id
    List<String> timeBucketList = getTimeBucketList(time);
    // get list of timebuckets to be emitted and replace them in outTimeBuckets with next time bucket to be emitted
    ArrayList<String> emitTimeBucketList = new ArrayList<String>();
    for (int i = 0; i < timeBucketList.size(); i++) {
        String timeBucket = timeBucketList.get(i);
        if (!timeBucket.equals(outTimeBuckets.get(i))) {
            emitTimeBucketList.add(outTimeBuckets.get(i));
            outTimeBuckets.set(i, timeBucket);
        }
    }
    // emit the computations for each time bucket in emitTimeBucketList and remove those buckets from the cache since they are now already processed
    if (!emitTimeBucketList.isEmpty()) {
        ArrayList<String> obsoleteKeys = new ArrayList<String>();
        for (String outTimeStr : emitTimeBucketList) {
            HashMap<String, DimensionObject<String>> outputAggregationsObject;
            for (Entry<String, Map<String, Map<AggregateOperation, Number>>> keys : cacheObject.entrySet()) {
                String key = keys.getKey();
                if (key.startsWith(outTimeStr)) {
                    Map<String, Map<AggregateOperation, Number>> dimValues = keys.getValue();
                    for (Entry<String, Map<AggregateOperation, Number>> dimValue : dimValues.entrySet()) {
                        String dimValueName = dimValue.getKey();
                        Map<AggregateOperation, Number> operations = dimValue.getValue();
                        outputAggregationsObject = new HashMap<String, DimensionObject<String>>();
                        for (Entry<AggregateOperation, Number> operation : operations.entrySet()) {
                            AggregateOperation aggrOperationType = operation.getKey();
                            Number aggr = operation.getValue();
                            String outKey = key + "." + aggrOperationType.name();
                            DimensionObject<String> outDimObj = new DimensionObject<String>((MutableDouble) aggr, dimValueName);
                            outputAggregationsObject.put(outKey, outDimObj);
                        }
                        aggregationsOutput.emit(outputAggregationsObject);
                    }
                    // remove emitted key
                    obsoleteKeys.add(key);
                }
            }
            for (String key : obsoleteKeys) {
                cacheObject.remove(key);
            }
        }
    }
}
Also used : DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) AggregateOperation(com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation)

Example 4 with DimensionObject

use of org.apache.apex.malhar.lib.logs.DimensionObject in project apex-malhar by apache.

the class LogstreamTopNTest method testOperator.

@Test
@SuppressWarnings("unchecked")
public void testOperator() {
    LogstreamTopN oper = new LogstreamTopN();
    LogstreamPropertyRegistry registry = new LogstreamPropertyRegistry();
    registry.bind(LogstreamUtil.LOG_TYPE, "apache");
    registry.bind(LogstreamUtil.FILTER, "default");
    oper.setRegistry(registry);
    oper.setN(5);
    CollectorTestSink mapSink = new CollectorTestSink();
    oper.top.setSink(mapSink);
    oper.beginWindow(0);
    Map<String, DimensionObject<String>> tuple1 = new HashMap<String, DimensionObject<String>>();
    DimensionObject<String> dimObja = new DimensionObject<String>(new MutableDouble(10), "a");
    tuple1.put("m|201402121900|0|65535|131075|val.COUNT", dimObja);
    oper.data.process(tuple1);
    DimensionObject<String> dimObjb = new DimensionObject<String>(new MutableDouble(1), "b");
    tuple1.put("m|201402121900|0|65535|131075|val.COUNT", dimObjb);
    oper.data.process(tuple1);
    DimensionObject<String> dimObjc = new DimensionObject<String>(new MutableDouble(5), "c");
    tuple1.put("m|201402121900|0|65535|131075|val.COUNT", dimObjc);
    oper.data.process(tuple1);
    DimensionObject<String> dimObjd = new DimensionObject<String>(new MutableDouble(2), "d");
    tuple1.put("m|201402121900|0|65535|131075|val.COUNT", dimObjd);
    oper.data.process(tuple1);
    DimensionObject<String> dimObje = new DimensionObject<String>(new MutableDouble(15), "e");
    tuple1.put("m|201402121900|0|65535|131075|val.COUNT", dimObje);
    oper.data.process(tuple1);
    DimensionObject<String> dimObjf = new DimensionObject<String>(new MutableDouble(4), "f");
    tuple1.put("m|201402121900|0|65535|131075|val.COUNT", dimObjf);
    oper.data.process(tuple1);
    oper.endWindow();
    @SuppressWarnings("unchecked") Map<String, List<DimensionObject<String>>> tuples = (Map<String, List<DimensionObject<String>>>) mapSink.collectedTuples.get(0);
    List<DimensionObject<String>> outList = tuples.get("m|201402121900|0|65535|131075|val.COUNT");
    List<DimensionObject<String>> expectedList = new ArrayList<DimensionObject<String>>();
    expectedList.add(dimObje);
    expectedList.add(dimObja);
    expectedList.add(dimObjc);
    expectedList.add(dimObjf);
    expectedList.add(dimObjd);
    Assert.assertEquals("Size", expectedList.size(), outList.size());
    Assert.assertEquals("compare list", expectedList, outList);
}
Also used : HashMap(java.util.HashMap) MutableDouble(org.apache.commons.lang.mutable.MutableDouble) ArrayList(java.util.ArrayList) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) List(java.util.List) ArrayList(java.util.ArrayList) LogstreamPropertyRegistry(com.datatorrent.apps.logstream.PropertyRegistry.LogstreamPropertyRegistry) Map(java.util.Map) HashMap(java.util.HashMap) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 5 with DimensionObject

use of org.apache.apex.malhar.lib.logs.DimensionObject in project apex-malhar by apache.

the class DimensionOperatorTest method testOperator.

@Test
@SuppressWarnings("unchecked")
public void testOperator() {
    DimensionOperator oper = new DimensionOperator();
    LogstreamPropertyRegistry registry = new LogstreamPropertyRegistry();
    registry.bind(LogstreamUtil.LOG_TYPE, "apache");
    registry.bind(LogstreamUtil.FILTER, "ALL");
    oper.setRegistry(registry);
    // user input example::
    // type=apache,timebucket=m,timebucket=h,dimensions=a:b:c,dimensions=b:c,dimensions=b,dimensions=d,values=x.sum:y.sum:y.avg
    oper.addPropertiesFromString(new String[] { "type=apache", "timebucket=s", "dimensions=name", "dimensions=url", "dimensions=name:url", "values=value.sum:value.avg" });
    HashMap<String, Object> inMap1 = new HashMap<String, Object>();
    inMap1.put(LogstreamUtil.LOG_TYPE, registry.getIndex(LogstreamUtil.LOG_TYPE, "apache"));
    inMap1.put(LogstreamUtil.FILTER, registry.getIndex(LogstreamUtil.FILTER, "ALL"));
    inMap1.put("name", "abc");
    inMap1.put("url", "http://www.t.co");
    inMap1.put("value", 25);
    inMap1.put("response", "404");
    HashMap<String, Object> inMap2 = new HashMap<String, Object>();
    inMap2.put(LogstreamUtil.LOG_TYPE, registry.getIndex(LogstreamUtil.LOG_TYPE, "apache"));
    inMap2.put(LogstreamUtil.FILTER, registry.getIndex(LogstreamUtil.FILTER, "ALL"));
    inMap2.put("name", "xyz");
    inMap2.put("url", "http://www.t.co");
    inMap2.put("value", 25);
    inMap2.put("response", "404");
    HashMap<String, Object> inMap3 = new HashMap<String, Object>();
    inMap3.put(LogstreamUtil.LOG_TYPE, registry.getIndex(LogstreamUtil.LOG_TYPE, "apache"));
    inMap3.put(LogstreamUtil.FILTER, registry.getIndex(LogstreamUtil.FILTER, "ALL"));
    inMap3.put("name", "abc");
    inMap3.put("url", "http://www.t.co");
    inMap3.put("value", 25);
    inMap3.put("response", "404");
    HashMap<String, Object> inMap4 = new HashMap<String, Object>();
    inMap4.put(LogstreamUtil.LOG_TYPE, registry.getIndex(LogstreamUtil.LOG_TYPE, "apache"));
    inMap4.put(LogstreamUtil.FILTER, registry.getIndex(LogstreamUtil.FILTER, "ALL"));
    inMap4.put("name", "abc");
    inMap4.put("url", "http://www.t.co");
    inMap4.put("value", 25);
    inMap4.put("response", "404");
    CollectorTestSink mapSink = new CollectorTestSink();
    oper.aggregationsOutput.setSink(mapSink);
    long now = System.currentTimeMillis();
    long currentId = 0L;
    long windowId = (now / 1000) << 32 | currentId;
    oper.beginWindow(windowId);
    oper.in.process(inMap1);
    oper.in.process(inMap2);
    oper.in.process(inMap3);
    oper.in.process(inMap4);
    try {
        Thread.sleep(1000);
    } catch (Throwable ex) {
        DTThrowable.rethrow(ex);
    }
    oper.endWindow();
    currentId++;
    now = System.currentTimeMillis();
    windowId = (now / 1000) << 32 | currentId;
    oper.beginWindow(windowId);
    oper.endWindow();
    @SuppressWarnings("unchecked") List<Map<String, DimensionObject<String>>> tuples = mapSink.collectedTuples;
    for (Map<String, DimensionObject<String>> map : tuples) {
        for (Entry<String, DimensionObject<String>> entry : map.entrySet()) {
            String key = entry.getKey();
            DimensionObject<String> dimObj = entry.getValue();
            if (key.contains("COUNT")) {
                if (dimObj.getVal().equals("xyz")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(1), dimObj.getCount());
                } else if (dimObj.getVal().equals("abc")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(3), dimObj.getCount());
                } else if (dimObj.getVal().equals("abc,http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(3), dimObj.getCount());
                } else if (dimObj.getVal().equals("xyz,http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(1), dimObj.getCount());
                } else if (dimObj.getVal().equals("http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(4), dimObj.getCount());
                } else {
                    Assert.fail("Unexpected dimension object received: " + dimObj + " for key: " + key);
                }
            } else if (key.contains("AVERAGE")) {
                if (dimObj.getVal().equals("xyz")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(25), dimObj.getCount());
                } else if (dimObj.getVal().equals("abc")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(25), dimObj.getCount());
                } else if (dimObj.getVal().equals("abc,http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(25), dimObj.getCount());
                } else if (dimObj.getVal().equals("xyz,http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(25), dimObj.getCount());
                } else if (dimObj.getVal().equals("http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(25), dimObj.getCount());
                } else {
                    Assert.fail("Unexpected dimension object received: " + dimObj + " for key: " + key);
                }
            } else if (key.contains("SUM")) {
                if (dimObj.getVal().equals("xyz")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(25), dimObj.getCount());
                } else if (dimObj.getVal().equals("abc")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(75), dimObj.getCount());
                } else if (dimObj.getVal().equals("abc,http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(75), dimObj.getCount());
                } else if (dimObj.getVal().equals("xyz,http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(25), dimObj.getCount());
                } else if (dimObj.getVal().equals("http://www.t.co")) {
                    Assert.assertEquals("Count for key " + key, new MutableDouble(100), dimObj.getCount());
                } else {
                    Assert.fail("Unexpected dimension object received: " + dimObj + " for key: " + key);
                }
            } else {
                Assert.fail("Unexpected key received: " + key);
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) MutableDouble(org.apache.commons.lang.mutable.MutableDouble) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) DTThrowable(com.datatorrent.netlet.util.DTThrowable) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) LogstreamPropertyRegistry(com.datatorrent.apps.logstream.PropertyRegistry.LogstreamPropertyRegistry) Map(java.util.Map) HashMap(java.util.HashMap) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Aggregations

DimensionObject (org.apache.apex.malhar.lib.logs.DimensionObject)6 HashMap (java.util.HashMap)4 Map (java.util.Map)4 MutableDouble (org.apache.commons.lang.mutable.MutableDouble)4 CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)3 Test (org.junit.Test)3 AggregateOperation (com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation)2 LogstreamPropertyRegistry (com.datatorrent.apps.logstream.PropertyRegistry.LogstreamPropertyRegistry)2 DTThrowable (com.datatorrent.netlet.util.DTThrowable)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 SelectOperator (org.apache.apex.malhar.contrib.misc.streamquery.SelectOperator)1 ConsoleOutputOperator (org.apache.apex.malhar.lib.io.ConsoleOutputOperator)1 MultiWindowDimensionAggregation (org.apache.apex.malhar.lib.logs.MultiWindowDimensionAggregation)1 Counter (org.apache.apex.malhar.lib.stream.Counter)1 JsonByteArrayOperator (org.apache.apex.malhar.lib.stream.JsonByteArrayOperator)1 DimensionTimeBucketSumOperator (org.apache.apex.malhar.lib.util.DimensionTimeBucketSumOperator)1