use of org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator in project apex-malhar by apache.
the class DimensionalConfigurationSchema method initialize.
/**
* This is a helper method which initializes the metadata for the {@link DimensionalConfigurationSchema}.
*
* @param keys The key objects to use when creating this configuration schema.
* @param values The value objects to use when creating this configuration schema.
* @param timeBuckets The time buckets to use when creating this configuration schema.
* @param dimensionsCombinations The dimensionsCombinations to use when creating this configuration schema.
*/
private void initialize(List<Key> keys, List<Value> values, List<TimeBucket> timeBuckets, List<DimensionsCombination> dimensionsCombinations) {
tags = Lists.newArrayList();
keyToTags = Maps.newHashMap();
for (Key key : keys) {
keyToTags.put(key.getName(), new ArrayList<String>());
}
valueToTags = Maps.newHashMap();
for (Value value : values) {
valueToTags.put(value.getName(), new ArrayList<String>());
}
// time buckets
this.timeBuckets = timeBuckets;
this.customTimeBuckets = new ArrayList<>();
customTimeBucketRegistry = new CustomTimeBucketRegistry(STARTING_TIMEBUCKET_ID);
for (TimeBucket timeBucket : timeBuckets) {
CustomTimeBucket customTimeBucket = new CustomTimeBucket(timeBucket);
customTimeBuckets.add(customTimeBucket);
customTimeBucketRegistry.register(customTimeBucket, timeBucket.ordinal());
}
// Input aggregate values
Map<String, Type> valueFieldToType = Maps.newHashMap();
for (Value value : values) {
valueFieldToType.put(value.getName(), value.getType());
}
inputValuesDescriptor = new FieldsDescriptor(valueFieldToType);
// Input keys
Map<String, Type> keyFieldToType = Maps.newHashMap();
keysToEnumValuesList = Maps.newHashMap();
for (Key key : keys) {
keyFieldToType.put(key.getName(), key.getType());
keysToEnumValuesList.put(key.getName(), key.getEnumValues());
}
keyDescriptor = new FieldsDescriptor(keyFieldToType);
Map<String, Type> fieldToTypeWithTime = Maps.newHashMap(keyFieldToType);
keyDescriptorWithTime = keyDescriptorWithTime(fieldToTypeWithTime, customTimeBuckets);
// schemaAllValueToAggregatorToType
schemaAllValueToAggregatorToType = Maps.newHashMap();
Map<String, Set<String>> specificValueToAggregator = Maps.newHashMap();
Map<String, Set<String>> specificValueToOTFAggregator = Maps.newHashMap();
Map<String, Set<String>> allSpecificValueToAggregator = Maps.newHashMap();
for (Value value : values) {
String valueName = value.getName();
Set<String> aggregators = value.getAggregators();
Set<String> specificAggregatorSet = Sets.newHashSet();
Set<String> allAggregatorSet = Sets.newHashSet();
Set<String> otfAggregators = Sets.newHashSet();
for (String aggregatorName : aggregators) {
if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
specificAggregatorSet.add(aggregatorName);
allAggregatorSet.add(aggregatorName);
} else {
otfAggregators.add(aggregatorName);
List<String> aggregatorNames = aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName);
specificAggregatorSet.addAll(aggregatorNames);
allAggregatorSet.addAll(aggregatorNames);
allAggregatorSet.add(aggregatorName);
}
}
specificValueToOTFAggregator.put(valueName, otfAggregators);
specificValueToAggregator.put(valueName, specificAggregatorSet);
allSpecificValueToAggregator.put(valueName, allAggregatorSet);
}
for (Map.Entry<String, Set<String>> entry : allSpecificValueToAggregator.entrySet()) {
String valueName = entry.getKey();
Type inputType = inputValuesDescriptor.getType(valueName);
Set<String> aggregators = entry.getValue();
Map<String, Type> aggregatorToType = Maps.newHashMap();
for (String aggregatorName : aggregators) {
if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
aggregatorToType.put(aggregatorName, aggregator.getOutputType(inputType));
} else {
OTFAggregator otfAggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
aggregatorToType.put(aggregatorName, otfAggregator.getOutputType());
}
}
schemaAllValueToAggregatorToType.put(valueName, aggregatorToType);
}
// ddID
dimensionsDescriptorIDToDimensionsDescriptor = Lists.newArrayList();
dimensionsDescriptorIDToKeyDescriptor = Lists.newArrayList();
dimensionsDescriptorToID = Maps.newHashMap();
dimensionsDescriptorIDToValueToAggregator = Lists.newArrayList();
dimensionsDescriptorIDToValueToOTFAggregator = Lists.newArrayList();
int ddID = 0;
for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
for (TimeBucket timeBucket : timeBuckets) {
DimensionsDescriptor dd = new DimensionsDescriptor(timeBucket, dimensionsCombination.getFields());
dimensionsDescriptorIDToDimensionsDescriptor.add(dd);
dimensionsDescriptorIDToKeyDescriptor.add(dd.createFieldsDescriptor(keyDescriptor));
dimensionsDescriptorToID.put(dd, ddID);
Map<String, Set<String>> valueToAggregator = Maps.newHashMap();
Map<String, Set<String>> valueToOTFAggregator = Maps.newHashMap();
Map<String, Set<String>> tempValueToAggregator = dimensionsCombination.getValueToAggregators();
for (Map.Entry<String, Set<String>> entry : tempValueToAggregator.entrySet()) {
String value = entry.getKey();
Set<String> staticAggregatorNames = Sets.newHashSet();
Set<String> otfAggregatorNames = Sets.newHashSet();
Set<String> aggregatorNames = entry.getValue();
for (String aggregatorName : aggregatorNames) {
if (!aggregatorRegistry.isAggregator(aggregatorName)) {
throw new UnsupportedOperationException("The aggregator " + aggregatorName + " is not valid.");
}
if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
staticAggregatorNames.add(aggregatorName);
} else {
staticAggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
otfAggregatorNames.add(aggregatorName);
}
}
valueToAggregator.put(value, staticAggregatorNames);
valueToOTFAggregator.put(value, otfAggregatorNames);
}
mergeMaps(valueToAggregator, specificValueToAggregator);
mergeMaps(valueToOTFAggregator, specificValueToOTFAggregator);
dimensionsDescriptorIDToValueToAggregator.add(valueToAggregator);
dimensionsDescriptorIDToValueToOTFAggregator.add(valueToOTFAggregator);
ddID++;
}
}
for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToAggregator) {
if (specificValueToAggregator.isEmpty()) {
continue;
}
for (Map.Entry<String, Set<String>> entry : specificValueToAggregator.entrySet()) {
String valueName = entry.getKey();
Set<String> aggName = entry.getValue();
if (aggName.isEmpty()) {
continue;
}
Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
if (ddAggregatorSet == null) {
ddAggregatorSet = Sets.newHashSet();
valueToAggregator.put(valueName, ddAggregatorSet);
}
ddAggregatorSet.addAll(aggName);
}
}
for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToOTFAggregator) {
if (specificValueToOTFAggregator.isEmpty()) {
continue;
}
for (Map.Entry<String, Set<String>> entry : specificValueToOTFAggregator.entrySet()) {
String valueName = entry.getKey();
Set<String> aggName = entry.getValue();
if (aggName.isEmpty()) {
continue;
}
Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
if (ddAggregatorSet == null) {
ddAggregatorSet = Sets.newHashSet();
valueToAggregator.put(valueName, ddAggregatorSet);
}
ddAggregatorSet.addAll(aggName);
}
}
// ddIDToAggregatorToAggregateDescriptor
dimensionsDescriptorIDToAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToAggregator);
dimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToOTFAggregator);
// combination ID values
dimensionsDescriptorIDToFieldToAggregatorAdditionalValues = Lists.newArrayList();
dimensionsDescriptorIDToKeys = Lists.newArrayList();
for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(dimensionsCombination.getValueToAggregators());
dimensionsDescriptorIDToKeys.add(dimensionsCombination.getFields());
}
// Build keyString
JSONArray keyArray = new JSONArray();
for (Key key : keys) {
JSONObject jo = new JSONObject();
try {
jo.put(FIELD_KEYS_NAME, key.getName());
jo.put(FIELD_KEYS_TYPE, key.getType().getName());
JSONArray enumArray = new JSONArray();
for (Object enumVal : key.getEnumValues()) {
enumArray.put(enumVal);
}
jo.put(FIELD_KEYS_ENUMVALUES, enumArray);
} catch (JSONException ex) {
throw new RuntimeException(ex);
}
keyArray.put(jo);
}
keysString = keyArray.toString();
// Build time buckets
JSONArray timeBucketArray = new JSONArray();
for (CustomTimeBucket timeBucket : customTimeBuckets) {
timeBucketArray.put(timeBucket.getText());
}
bucketsString = timeBucketArray.toString();
// buildDDIDAggID
// composite aggregator are not supported in this method. add empty list to avoid unit test error.
dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor = Lists.newArrayList();
buildDimensionsDescriptorIDAggregatorIDMaps();
}
use of org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator in project apex-malhar by apache.
the class DimensionalConfigurationSchema method addNonCompositeAggregator.
protected Object addNonCompositeAggregator(String aggregatorName, Map<String, Set<String>> allValueToAggregator, Map<String, Set<String>> allValueToOTFAggregator, String valueName, Set<String> aggregatorSet, Map<String, Type> aggregatorToType, Type typeT, Set<String> aggregatorOTFSet, boolean checkDuplicate) {
if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
Set<String> aggregatorNames = allValueToAggregator.get(valueName);
if (aggregatorNames == null) {
aggregatorNames = Sets.newHashSet();
allValueToAggregator.put(valueName, aggregatorNames);
}
aggregatorNames.add(aggregatorName);
if (!aggregatorSet.add(aggregatorName) && checkDuplicate) {
throw new IllegalArgumentException("An aggregator " + aggregatorName + " cannot be specified twice for a value");
}
IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
aggregatorToType.put(aggregatorName, aggregator.getOutputType(typeT));
return aggregator;
}
if (aggregatorRegistry.isOTFAggregator(aggregatorName)) {
// Check for duplicate on the fly aggregators
Set<String> aggregatorNames = allValueToOTFAggregator.get(valueName);
if (aggregatorNames == null) {
aggregatorNames = Sets.newHashSet();
allValueToOTFAggregator.put(valueName, aggregatorNames);
}
if (!aggregatorNames.add(aggregatorName) && checkDuplicate) {
throw new IllegalArgumentException("An aggregator " + aggregatorName + " cannot be specified twice for a value");
}
aggregatorOTFSet.add(aggregatorName);
// Add child aggregators
aggregatorNames = allValueToAggregator.get(valueName);
if (aggregatorNames == null) {
aggregatorNames = Sets.newHashSet();
allValueToAggregator.put(valueName, aggregatorNames);
}
OTFAggregator aggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
aggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
aggregatorSet.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
aggregatorToType.put(aggregatorName, aggregator.getOutputType());
LOG.debug("field name {} and adding aggregator names {}:", valueName, aggregatorNames);
return aggregator;
}
throw new IllegalArgumentException(aggregatorName + " is not a valid non-composit aggregator.");
}
Aggregations