Search in sources :

Example 1 with AggregateOperation

use of com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation in project apex-malhar by apache.

the class DimensionOperator method doComputations.

/**
 * Does computations for the given dimension and its value names on the given value key name
 *
 * @param timeBucket time bucket
 * @param dimensionCombinationId id of dimension combination
 * @param dimValueName values of the dimension combination
 * @param valueKeyName name of the value key on which operations are performed
 * @param value value of the value key
 */
private void doComputations(String timeBucket, Integer dimensionCombinationId, String dimValueName, String valueKeyName, Number value) {
    StringBuilder sb = new StringBuilder();
    sb.append(timeBucket).append("|").append(recordType.get(LogstreamUtil.LOG_TYPE)).append("|").append(recordType.get(LogstreamUtil.FILTER)).append("|").append(dimensionCombinationId).append("|").append(valueKeyName);
    // final key format --> timebucket|type|filter|dimId|val
    // eg: m|201311230108|1|4|10|bytes
    String key = sb.toString();
    Map<AggregateOperation, Number> aggregations;
    if (cacheObject.containsKey(key)) {
        Map<String, Map<AggregateOperation, Number>> dimValueNames = cacheObject.get(key);
        if (dimValueNames.containsKey(dimValueName)) {
            aggregations = dimValueNames.get(dimValueName);
        } else {
            aggregations = new HashMap<AggregateOperation, Number>();
            for (AggregateOperation aggregationType : valueOperationTypes.get(valueKeyName)) {
                aggregations.put(aggregationType, new MutableDouble(0));
            }
            dimValueNames.put(dimValueName, aggregations);
        }
    } else {
        Map<String, Map<AggregateOperation, Number>> newDimValueNames = new HashMap<String, Map<AggregateOperation, Number>>();
        aggregations = new HashMap<AggregateOperation, Number>();
        for (AggregateOperation aggregationType : valueOperationTypes.get(valueKeyName)) {
            aggregations.put(aggregationType, new MutableDouble(0));
        }
        newDimValueNames.put(dimValueName, aggregations);
        cacheObject.put(key, newDimValueNames);
    }
    if (aggregations.containsKey(AggregateOperation.SUM)) {
        MutableDouble aggrVal = (MutableDouble) aggregations.get(AggregateOperation.SUM);
        aggrVal.add(value);
    }
    if (aggregations.containsKey(AggregateOperation.COUNT)) {
        MutableDouble aggrVal = (MutableDouble) aggregations.get(AggregateOperation.COUNT);
        aggrVal.add(1);
    }
    if (aggregations.containsKey(AggregateOperation.AVERAGE)) {
        double avgVal = aggregations.get(AggregateOperation.AVERAGE).doubleValue();
        double countVal = aggregations.get(AggregateOperation.COUNT).doubleValue();
        double newAvg = ((avgVal * (countVal - 1)) + value.doubleValue()) / countVal;
        aggregations.put(AggregateOperation.AVERAGE, new MutableDouble(newAvg));
    }
}
Also used : MutableDouble(org.apache.commons.lang.mutable.MutableDouble) AggregateOperation(com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation)

Example 2 with AggregateOperation

use of com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation in project apex-malhar by apache.

the class DimensionOperator method endWindow.

@Override
public void endWindow() {
    if (outTimeBuckets == null || outTimeBuckets.isEmpty()) {
        return;
    }
    long time = LogstreamUtil.extractTime(currentWindowId, windowWidth);
    // get time buckets for current window id
    List<String> timeBucketList = getTimeBucketList(time);
    // get list of timebuckets to be emitted and replace them in outTimeBuckets with next time bucket to be emitted
    ArrayList<String> emitTimeBucketList = new ArrayList<String>();
    for (int i = 0; i < timeBucketList.size(); i++) {
        String timeBucket = timeBucketList.get(i);
        if (!timeBucket.equals(outTimeBuckets.get(i))) {
            emitTimeBucketList.add(outTimeBuckets.get(i));
            outTimeBuckets.set(i, timeBucket);
        }
    }
    // emit the computations for each time bucket in emitTimeBucketList and remove those buckets from the cache since they are now already processed
    if (!emitTimeBucketList.isEmpty()) {
        ArrayList<String> obsoleteKeys = new ArrayList<String>();
        for (String outTimeStr : emitTimeBucketList) {
            HashMap<String, DimensionObject<String>> outputAggregationsObject;
            for (Entry<String, Map<String, Map<AggregateOperation, Number>>> keys : cacheObject.entrySet()) {
                String key = keys.getKey();
                if (key.startsWith(outTimeStr)) {
                    Map<String, Map<AggregateOperation, Number>> dimValues = keys.getValue();
                    for (Entry<String, Map<AggregateOperation, Number>> dimValue : dimValues.entrySet()) {
                        String dimValueName = dimValue.getKey();
                        Map<AggregateOperation, Number> operations = dimValue.getValue();
                        outputAggregationsObject = new HashMap<String, DimensionObject<String>>();
                        for (Entry<AggregateOperation, Number> operation : operations.entrySet()) {
                            AggregateOperation aggrOperationType = operation.getKey();
                            Number aggr = operation.getValue();
                            String outKey = key + "." + aggrOperationType.name();
                            DimensionObject<String> outDimObj = new DimensionObject<String>((MutableDouble) aggr, dimValueName);
                            outputAggregationsObject.put(outKey, outDimObj);
                        }
                        aggregationsOutput.emit(outputAggregationsObject);
                    }
                    // remove emitted key
                    obsoleteKeys.add(key);
                }
            }
            for (String key : obsoleteKeys) {
                cacheObject.remove(key);
            }
        }
    }
}
Also used : DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) AggregateOperation(com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation)

Example 3 with AggregateOperation

use of com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation in project apex-malhar by apache.

the class DimensionOperator method addPropertiesFromString.

/**
 * Supply the properties to the operator.
 * The properties include type, timebucket, dimensioncombinations, values and operations on them
 * Input includes following properties:
 * type=logtype // input logtype for which the properties are to be set
 * timebucket= time bucket character //time bucket can be one of the following values s(for second)/m(for minute)/h(for hour)/D(for day)/W(for week)/M(for month)/Y(for year)
 * dimensions=a:b:c //colon separated dimension combination combination for which computations are expected
 * values=value.metric[:value.metric] //list of dot concatenated value name and metric separated by colon
 * eg: type=apache,timebucket=m,timebucket=h,dimensions=a:b:c,dimensions=b:c,dimensions=b,dimensions=d,values=x.sum:y.sum:y.avg
 *
 * @param properties
 */
public void addPropertiesFromString(String[] properties) {
    try {
        ArrayList<Integer> dimCombinations = new ArrayList<Integer>();
        HashMap<String, HashSet<AggregateOperation>> valOpTypes = new HashMap<String, HashSet<AggregateOperation>>();
        String type = null;
        // type=apache,timebucket=m,timebucket=h,dimensions=a:b:c,dimensions=b:c,dimensions=b,dimensions=d,values=x.sum:y.sum:y.avg
        for (String inputs : properties) {
            String[] split = inputs.split("=", 2);
            if (split[0].toLowerCase().equals("timebucket")) {
                int timeBucket = LogstreamUtil.extractTimeBucket(split[1]);
                if (timeBucket == 0) {
                    logger.error("invalid time bucket", split[1]);
                }
                timeBucketFlags |= timeBucket;
            } else if (split[0].toLowerCase().equals("values")) {
                String[] values = split[1].split(":");
                for (String value : values) {
                    String[] valueNames = value.split("\\.");
                    String valueName = valueNames[0];
                    String valueType = valueNames[1];
                    if (valueType.toLowerCase().equals("sum")) {
                        if (valOpTypes.containsKey(valueName)) {
                            valOpTypes.get(valueName).add(AggregateOperation.SUM);
                        } else {
                            HashSet<AggregateOperation> valueTypeList = new HashSet<AggregateOperation>();
                            valueTypeList.add(AggregateOperation.SUM);
                            valOpTypes.put(valueName, valueTypeList);
                        }
                    } else if (valueType.equals("avg") || valueType.equals("average")) {
                        if (valOpTypes.containsKey(valueName)) {
                            valOpTypes.get(valueName).add(AggregateOperation.AVERAGE);
                            valOpTypes.get(valueName).add(AggregateOperation.COUNT);
                        } else {
                            HashSet<AggregateOperation> valueTypeList = new HashSet<AggregateOperation>();
                            valueTypeList.add(AggregateOperation.AVERAGE);
                            valueTypeList.add(AggregateOperation.COUNT);
                            valOpTypes.put(valueName, valueTypeList);
                        }
                    } else if (valueType.equals("count")) {
                        if (valOpTypes.containsKey(valueName)) {
                            valOpTypes.get(valueName).add(AggregateOperation.COUNT);
                        } else {
                            HashSet<AggregateOperation> valueTypeList = new HashSet<AggregateOperation>();
                            valueTypeList.add(AggregateOperation.COUNT);
                            valOpTypes.put(valueName, valueTypeList);
                        }
                    }
                }
            } else if (split[0].toLowerCase().equals("type")) {
                type = split[1];
            } else if (split[0].toLowerCase().equals("dimensions")) {
                // dimensions
                String dimensions = split[1];
                int dim = registry.bind("DIMENSION", dimensions);
                dimCombinations.add(dim);
            } else {
                throw new ValidationException("Invalid input property string " + Arrays.toString(properties));
            }
        }
        dimensionCombinationList.put(registry.getIndex(LogstreamUtil.LOG_TYPE, type), dimCombinations);
        valueOperations.put(registry.getIndex(LogstreamUtil.LOG_TYPE, type), valOpTypes);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : ValidationException(javax.validation.ValidationException) ValidationException(javax.validation.ValidationException) AggregateOperation(com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation)

Example 4 with AggregateOperation

use of com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation in project apex-malhar by apache.

the class DimensionOperatorUnifier method endWindow.

@Override
public void endWindow() {
    Map<String, DimensionObject<String>> outputAggregationsObject;
    for (Entry<String, Map<String, Map<AggregateOperation, Number>>> keys : unifiedCache.entrySet()) {
        String key = keys.getKey();
        Map<String, Map<AggregateOperation, Number>> dimValues = keys.getValue();
        for (Entry<String, Map<AggregateOperation, Number>> dimValue : dimValues.entrySet()) {
            String dimValueName = dimValue.getKey();
            Map<AggregateOperation, Number> operations = dimValue.getValue();
            outputAggregationsObject = new HashMap<String, DimensionObject<String>>();
            for (Entry<AggregateOperation, Number> operation : operations.entrySet()) {
                AggregateOperation aggrOperationType = operation.getKey();
                Number aggr = operation.getValue();
                String outKey = key + "." + aggrOperationType.name();
                DimensionObject<String> outDimObj = new DimensionObject<String>(new MutableDouble(aggr), dimValueName);
                outputAggregationsObject.put(outKey, outDimObj);
            }
            aggregationsOutput.emit(outputAggregationsObject);
        }
    }
}
Also used : MutableDouble(org.apache.commons.lang.mutable.MutableDouble) DimensionObject(org.apache.apex.malhar.lib.logs.DimensionObject) Map(java.util.Map) HashMap(java.util.HashMap) AggregateOperation(com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation)

Aggregations

AggregateOperation (com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation)4 DimensionObject (org.apache.apex.malhar.lib.logs.DimensionObject)2 MutableDouble (org.apache.commons.lang.mutable.MutableDouble)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ValidationException (javax.validation.ValidationException)1