use of com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation in project apex-malhar by apache.
the class DimensionOperator method doComputations.
/**
* Does computations for the given dimension and its value names on the given value key name
*
* @param timeBucket time bucket
* @param dimensionCombinationId id of dimension combination
* @param dimValueName values of the dimension combination
* @param valueKeyName name of the value key on which operations are performed
* @param value value of the value key
*/
private void doComputations(String timeBucket, Integer dimensionCombinationId, String dimValueName, String valueKeyName, Number value) {
StringBuilder sb = new StringBuilder();
sb.append(timeBucket).append("|").append(recordType.get(LogstreamUtil.LOG_TYPE)).append("|").append(recordType.get(LogstreamUtil.FILTER)).append("|").append(dimensionCombinationId).append("|").append(valueKeyName);
// final key format --> timebucket|type|filter|dimId|val
// eg: m|201311230108|1|4|10|bytes
String key = sb.toString();
Map<AggregateOperation, Number> aggregations;
if (cacheObject.containsKey(key)) {
Map<String, Map<AggregateOperation, Number>> dimValueNames = cacheObject.get(key);
if (dimValueNames.containsKey(dimValueName)) {
aggregations = dimValueNames.get(dimValueName);
} else {
aggregations = new HashMap<AggregateOperation, Number>();
for (AggregateOperation aggregationType : valueOperationTypes.get(valueKeyName)) {
aggregations.put(aggregationType, new MutableDouble(0));
}
dimValueNames.put(dimValueName, aggregations);
}
} else {
Map<String, Map<AggregateOperation, Number>> newDimValueNames = new HashMap<String, Map<AggregateOperation, Number>>();
aggregations = new HashMap<AggregateOperation, Number>();
for (AggregateOperation aggregationType : valueOperationTypes.get(valueKeyName)) {
aggregations.put(aggregationType, new MutableDouble(0));
}
newDimValueNames.put(dimValueName, aggregations);
cacheObject.put(key, newDimValueNames);
}
if (aggregations.containsKey(AggregateOperation.SUM)) {
MutableDouble aggrVal = (MutableDouble) aggregations.get(AggregateOperation.SUM);
aggrVal.add(value);
}
if (aggregations.containsKey(AggregateOperation.COUNT)) {
MutableDouble aggrVal = (MutableDouble) aggregations.get(AggregateOperation.COUNT);
aggrVal.add(1);
}
if (aggregations.containsKey(AggregateOperation.AVERAGE)) {
double avgVal = aggregations.get(AggregateOperation.AVERAGE).doubleValue();
double countVal = aggregations.get(AggregateOperation.COUNT).doubleValue();
double newAvg = ((avgVal * (countVal - 1)) + value.doubleValue()) / countVal;
aggregations.put(AggregateOperation.AVERAGE, new MutableDouble(newAvg));
}
}
use of com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation in project apex-malhar by apache.
the class DimensionOperator method endWindow.
@Override
public void endWindow() {
if (outTimeBuckets == null || outTimeBuckets.isEmpty()) {
return;
}
long time = LogstreamUtil.extractTime(currentWindowId, windowWidth);
// get time buckets for current window id
List<String> timeBucketList = getTimeBucketList(time);
// get list of timebuckets to be emitted and replace them in outTimeBuckets with next time bucket to be emitted
ArrayList<String> emitTimeBucketList = new ArrayList<String>();
for (int i = 0; i < timeBucketList.size(); i++) {
String timeBucket = timeBucketList.get(i);
if (!timeBucket.equals(outTimeBuckets.get(i))) {
emitTimeBucketList.add(outTimeBuckets.get(i));
outTimeBuckets.set(i, timeBucket);
}
}
// emit the computations for each time bucket in emitTimeBucketList and remove those buckets from the cache since they are now already processed
if (!emitTimeBucketList.isEmpty()) {
ArrayList<String> obsoleteKeys = new ArrayList<String>();
for (String outTimeStr : emitTimeBucketList) {
HashMap<String, DimensionObject<String>> outputAggregationsObject;
for (Entry<String, Map<String, Map<AggregateOperation, Number>>> keys : cacheObject.entrySet()) {
String key = keys.getKey();
if (key.startsWith(outTimeStr)) {
Map<String, Map<AggregateOperation, Number>> dimValues = keys.getValue();
for (Entry<String, Map<AggregateOperation, Number>> dimValue : dimValues.entrySet()) {
String dimValueName = dimValue.getKey();
Map<AggregateOperation, Number> operations = dimValue.getValue();
outputAggregationsObject = new HashMap<String, DimensionObject<String>>();
for (Entry<AggregateOperation, Number> operation : operations.entrySet()) {
AggregateOperation aggrOperationType = operation.getKey();
Number aggr = operation.getValue();
String outKey = key + "." + aggrOperationType.name();
DimensionObject<String> outDimObj = new DimensionObject<String>((MutableDouble) aggr, dimValueName);
outputAggregationsObject.put(outKey, outDimObj);
}
aggregationsOutput.emit(outputAggregationsObject);
}
// remove emitted key
obsoleteKeys.add(key);
}
}
for (String key : obsoleteKeys) {
cacheObject.remove(key);
}
}
}
}
use of com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation in project apex-malhar by apache.
the class DimensionOperator method addPropertiesFromString.
/**
* Supply the properties to the operator.
* The properties include type, timebucket, dimensioncombinations, values and operations on them
* Input includes following properties:
* type=logtype // input logtype for which the properties are to be set
* timebucket= time bucket character //time bucket can be one of the following values s(for second)/m(for minute)/h(for hour)/D(for day)/W(for week)/M(for month)/Y(for year)
* dimensions=a:b:c //colon separated dimension combination combination for which computations are expected
* values=value.metric[:value.metric] //list of dot concatenated value name and metric separated by colon
* eg: type=apache,timebucket=m,timebucket=h,dimensions=a:b:c,dimensions=b:c,dimensions=b,dimensions=d,values=x.sum:y.sum:y.avg
*
* @param properties
*/
public void addPropertiesFromString(String[] properties) {
try {
ArrayList<Integer> dimCombinations = new ArrayList<Integer>();
HashMap<String, HashSet<AggregateOperation>> valOpTypes = new HashMap<String, HashSet<AggregateOperation>>();
String type = null;
// type=apache,timebucket=m,timebucket=h,dimensions=a:b:c,dimensions=b:c,dimensions=b,dimensions=d,values=x.sum:y.sum:y.avg
for (String inputs : properties) {
String[] split = inputs.split("=", 2);
if (split[0].toLowerCase().equals("timebucket")) {
int timeBucket = LogstreamUtil.extractTimeBucket(split[1]);
if (timeBucket == 0) {
logger.error("invalid time bucket", split[1]);
}
timeBucketFlags |= timeBucket;
} else if (split[0].toLowerCase().equals("values")) {
String[] values = split[1].split(":");
for (String value : values) {
String[] valueNames = value.split("\\.");
String valueName = valueNames[0];
String valueType = valueNames[1];
if (valueType.toLowerCase().equals("sum")) {
if (valOpTypes.containsKey(valueName)) {
valOpTypes.get(valueName).add(AggregateOperation.SUM);
} else {
HashSet<AggregateOperation> valueTypeList = new HashSet<AggregateOperation>();
valueTypeList.add(AggregateOperation.SUM);
valOpTypes.put(valueName, valueTypeList);
}
} else if (valueType.equals("avg") || valueType.equals("average")) {
if (valOpTypes.containsKey(valueName)) {
valOpTypes.get(valueName).add(AggregateOperation.AVERAGE);
valOpTypes.get(valueName).add(AggregateOperation.COUNT);
} else {
HashSet<AggregateOperation> valueTypeList = new HashSet<AggregateOperation>();
valueTypeList.add(AggregateOperation.AVERAGE);
valueTypeList.add(AggregateOperation.COUNT);
valOpTypes.put(valueName, valueTypeList);
}
} else if (valueType.equals("count")) {
if (valOpTypes.containsKey(valueName)) {
valOpTypes.get(valueName).add(AggregateOperation.COUNT);
} else {
HashSet<AggregateOperation> valueTypeList = new HashSet<AggregateOperation>();
valueTypeList.add(AggregateOperation.COUNT);
valOpTypes.put(valueName, valueTypeList);
}
}
}
} else if (split[0].toLowerCase().equals("type")) {
type = split[1];
} else if (split[0].toLowerCase().equals("dimensions")) {
// dimensions
String dimensions = split[1];
int dim = registry.bind("DIMENSION", dimensions);
dimCombinations.add(dim);
} else {
throw new ValidationException("Invalid input property string " + Arrays.toString(properties));
}
}
dimensionCombinationList.put(registry.getIndex(LogstreamUtil.LOG_TYPE, type), dimCombinations);
valueOperations.put(registry.getIndex(LogstreamUtil.LOG_TYPE, type), valOpTypes);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of com.datatorrent.apps.logstream.LogstreamUtil.AggregateOperation in project apex-malhar by apache.
the class DimensionOperatorUnifier method endWindow.
@Override
public void endWindow() {
Map<String, DimensionObject<String>> outputAggregationsObject;
for (Entry<String, Map<String, Map<AggregateOperation, Number>>> keys : unifiedCache.entrySet()) {
String key = keys.getKey();
Map<String, Map<AggregateOperation, Number>> dimValues = keys.getValue();
for (Entry<String, Map<AggregateOperation, Number>> dimValue : dimValues.entrySet()) {
String dimValueName = dimValue.getKey();
Map<AggregateOperation, Number> operations = dimValue.getValue();
outputAggregationsObject = new HashMap<String, DimensionObject<String>>();
for (Entry<AggregateOperation, Number> operation : operations.entrySet()) {
AggregateOperation aggrOperationType = operation.getKey();
Number aggr = operation.getValue();
String outKey = key + "." + aggrOperationType.name();
DimensionObject<String> outDimObj = new DimensionObject<String>(new MutableDouble(aggr), dimValueName);
outputAggregationsObject.put(outKey, outDimObj);
}
aggregationsOutput.emit(outputAggregationsObject);
}
}
}
Aggregations