use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.
the class DimensionalConfigurationSchema method fulfillCompositeAggregatorExtraInfo.
/**
* fulfill the embed ddids of composite aggregators
* get the dimensional descriptor of composite aggregator; genereate field keys of embed aggregator
*/
protected void fulfillCompositeAggregatorExtraInfo() {
Map<Set<String>, Integer> keysToCombinationId = getKeysToCombinationId();
final int timeBucketSize = customTimeBuckets.size();
for (int index = 0; index < dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor.size(); ++index) {
Map<String, FieldsDescriptor> compositeAggregatorNameToDescriptor = dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor.get(index);
for (String compositeAggregatorName : compositeAggregatorNameToDescriptor.keySet()) {
AbstractTopBottomAggregator compositeAggregator = aggregatorRegistry.getNameToTopBottomAggregator().get(compositeAggregatorName);
// set DimensionDescriptorID
compositeAggregator.setDimensionDescriptorID(index);
// aggregator id
compositeAggregator.setAggregatorID(aggregatorRegistry.getTopBottomAggregatorNameToID().get(compositeAggregatorName));
// keys for embed aggregator
Set<String> keys = Sets.newHashSet();
DimensionsDescriptor dd = dimensionsDescriptorIDToDimensionsDescriptor.get(index);
keys.addAll(dd.getFields().getFieldsList());
{
Set<String> compositeKeys = Sets.newHashSet();
compositeKeys.addAll(keys);
compositeAggregator.setFields(compositeKeys);
compositeAggregator.setAggregateDescriptor(compositeAggregatorNameToDescriptor.get(compositeAggregatorName));
}
keys.addAll(compositeAggregator.getSubCombinations());
Integer combinationId = keysToCombinationId.get(keys);
if (combinationId == null) {
throw new RuntimeException("Can't find combination id for keys: " + keys);
}
for (int ddid = combinationId * timeBucketSize; ddid < (combinationId + 1) * timeBucketSize; ++ddid) {
compositeAggregator.addEmbedAggregatorDdId(ddid);
}
}
}
}
use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.
the class DimensionalConfigurationSchema method initialize.
/**
* This is a helper method which initializes the metadata for the {@link DimensionalConfigurationSchema}.
*
* @param keys The key objects to use when creating this configuration schema.
* @param values The value objects to use when creating this configuration schema.
* @param timeBuckets The time buckets to use when creating this configuration schema.
* @param dimensionsCombinations The dimensionsCombinations to use when creating this configuration schema.
*/
private void initialize(List<Key> keys, List<Value> values, List<TimeBucket> timeBuckets, List<DimensionsCombination> dimensionsCombinations) {
tags = Lists.newArrayList();
keyToTags = Maps.newHashMap();
for (Key key : keys) {
keyToTags.put(key.getName(), new ArrayList<String>());
}
valueToTags = Maps.newHashMap();
for (Value value : values) {
valueToTags.put(value.getName(), new ArrayList<String>());
}
// time buckets
this.timeBuckets = timeBuckets;
this.customTimeBuckets = new ArrayList<>();
customTimeBucketRegistry = new CustomTimeBucketRegistry(STARTING_TIMEBUCKET_ID);
for (TimeBucket timeBucket : timeBuckets) {
CustomTimeBucket customTimeBucket = new CustomTimeBucket(timeBucket);
customTimeBuckets.add(customTimeBucket);
customTimeBucketRegistry.register(customTimeBucket, timeBucket.ordinal());
}
// Input aggregate values
Map<String, Type> valueFieldToType = Maps.newHashMap();
for (Value value : values) {
valueFieldToType.put(value.getName(), value.getType());
}
inputValuesDescriptor = new FieldsDescriptor(valueFieldToType);
// Input keys
Map<String, Type> keyFieldToType = Maps.newHashMap();
keysToEnumValuesList = Maps.newHashMap();
for (Key key : keys) {
keyFieldToType.put(key.getName(), key.getType());
keysToEnumValuesList.put(key.getName(), key.getEnumValues());
}
keyDescriptor = new FieldsDescriptor(keyFieldToType);
Map<String, Type> fieldToTypeWithTime = Maps.newHashMap(keyFieldToType);
keyDescriptorWithTime = keyDescriptorWithTime(fieldToTypeWithTime, customTimeBuckets);
// schemaAllValueToAggregatorToType
schemaAllValueToAggregatorToType = Maps.newHashMap();
Map<String, Set<String>> specificValueToAggregator = Maps.newHashMap();
Map<String, Set<String>> specificValueToOTFAggregator = Maps.newHashMap();
Map<String, Set<String>> allSpecificValueToAggregator = Maps.newHashMap();
for (Value value : values) {
String valueName = value.getName();
Set<String> aggregators = value.getAggregators();
Set<String> specificAggregatorSet = Sets.newHashSet();
Set<String> allAggregatorSet = Sets.newHashSet();
Set<String> otfAggregators = Sets.newHashSet();
for (String aggregatorName : aggregators) {
if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
specificAggregatorSet.add(aggregatorName);
allAggregatorSet.add(aggregatorName);
} else {
otfAggregators.add(aggregatorName);
List<String> aggregatorNames = aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName);
specificAggregatorSet.addAll(aggregatorNames);
allAggregatorSet.addAll(aggregatorNames);
allAggregatorSet.add(aggregatorName);
}
}
specificValueToOTFAggregator.put(valueName, otfAggregators);
specificValueToAggregator.put(valueName, specificAggregatorSet);
allSpecificValueToAggregator.put(valueName, allAggregatorSet);
}
for (Map.Entry<String, Set<String>> entry : allSpecificValueToAggregator.entrySet()) {
String valueName = entry.getKey();
Type inputType = inputValuesDescriptor.getType(valueName);
Set<String> aggregators = entry.getValue();
Map<String, Type> aggregatorToType = Maps.newHashMap();
for (String aggregatorName : aggregators) {
if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
aggregatorToType.put(aggregatorName, aggregator.getOutputType(inputType));
} else {
OTFAggregator otfAggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
aggregatorToType.put(aggregatorName, otfAggregator.getOutputType());
}
}
schemaAllValueToAggregatorToType.put(valueName, aggregatorToType);
}
// ddID
dimensionsDescriptorIDToDimensionsDescriptor = Lists.newArrayList();
dimensionsDescriptorIDToKeyDescriptor = Lists.newArrayList();
dimensionsDescriptorToID = Maps.newHashMap();
dimensionsDescriptorIDToValueToAggregator = Lists.newArrayList();
dimensionsDescriptorIDToValueToOTFAggregator = Lists.newArrayList();
int ddID = 0;
for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
for (TimeBucket timeBucket : timeBuckets) {
DimensionsDescriptor dd = new DimensionsDescriptor(timeBucket, dimensionsCombination.getFields());
dimensionsDescriptorIDToDimensionsDescriptor.add(dd);
dimensionsDescriptorIDToKeyDescriptor.add(dd.createFieldsDescriptor(keyDescriptor));
dimensionsDescriptorToID.put(dd, ddID);
Map<String, Set<String>> valueToAggregator = Maps.newHashMap();
Map<String, Set<String>> valueToOTFAggregator = Maps.newHashMap();
Map<String, Set<String>> tempValueToAggregator = dimensionsCombination.getValueToAggregators();
for (Map.Entry<String, Set<String>> entry : tempValueToAggregator.entrySet()) {
String value = entry.getKey();
Set<String> staticAggregatorNames = Sets.newHashSet();
Set<String> otfAggregatorNames = Sets.newHashSet();
Set<String> aggregatorNames = entry.getValue();
for (String aggregatorName : aggregatorNames) {
if (!aggregatorRegistry.isAggregator(aggregatorName)) {
throw new UnsupportedOperationException("The aggregator " + aggregatorName + " is not valid.");
}
if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
staticAggregatorNames.add(aggregatorName);
} else {
staticAggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
otfAggregatorNames.add(aggregatorName);
}
}
valueToAggregator.put(value, staticAggregatorNames);
valueToOTFAggregator.put(value, otfAggregatorNames);
}
mergeMaps(valueToAggregator, specificValueToAggregator);
mergeMaps(valueToOTFAggregator, specificValueToOTFAggregator);
dimensionsDescriptorIDToValueToAggregator.add(valueToAggregator);
dimensionsDescriptorIDToValueToOTFAggregator.add(valueToOTFAggregator);
ddID++;
}
}
for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToAggregator) {
if (specificValueToAggregator.isEmpty()) {
continue;
}
for (Map.Entry<String, Set<String>> entry : specificValueToAggregator.entrySet()) {
String valueName = entry.getKey();
Set<String> aggName = entry.getValue();
if (aggName.isEmpty()) {
continue;
}
Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
if (ddAggregatorSet == null) {
ddAggregatorSet = Sets.newHashSet();
valueToAggregator.put(valueName, ddAggregatorSet);
}
ddAggregatorSet.addAll(aggName);
}
}
for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToOTFAggregator) {
if (specificValueToOTFAggregator.isEmpty()) {
continue;
}
for (Map.Entry<String, Set<String>> entry : specificValueToOTFAggregator.entrySet()) {
String valueName = entry.getKey();
Set<String> aggName = entry.getValue();
if (aggName.isEmpty()) {
continue;
}
Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
if (ddAggregatorSet == null) {
ddAggregatorSet = Sets.newHashSet();
valueToAggregator.put(valueName, ddAggregatorSet);
}
ddAggregatorSet.addAll(aggName);
}
}
// ddIDToAggregatorToAggregateDescriptor
dimensionsDescriptorIDToAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToAggregator);
dimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToOTFAggregator);
// combination ID values
dimensionsDescriptorIDToFieldToAggregatorAdditionalValues = Lists.newArrayList();
dimensionsDescriptorIDToKeys = Lists.newArrayList();
for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(dimensionsCombination.getValueToAggregators());
dimensionsDescriptorIDToKeys.add(dimensionsCombination.getFields());
}
// Build keyString
JSONArray keyArray = new JSONArray();
for (Key key : keys) {
JSONObject jo = new JSONObject();
try {
jo.put(FIELD_KEYS_NAME, key.getName());
jo.put(FIELD_KEYS_TYPE, key.getType().getName());
JSONArray enumArray = new JSONArray();
for (Object enumVal : key.getEnumValues()) {
enumArray.put(enumVal);
}
jo.put(FIELD_KEYS_ENUMVALUES, enumArray);
} catch (JSONException ex) {
throw new RuntimeException(ex);
}
keyArray.put(jo);
}
keysString = keyArray.toString();
// Build time buckets
JSONArray timeBucketArray = new JSONArray();
for (CustomTimeBucket timeBucket : customTimeBuckets) {
timeBucketArray.put(timeBucket.getText());
}
bucketsString = timeBucketArray.toString();
// buildDDIDAggID
// composite aggregator are not supported in this method. add empty list to avoid unit test error.
dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor = Lists.newArrayList();
buildDimensionsDescriptorIDAggregatorIDMaps();
}
use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.
the class DimensionalConfigurationSchemaTest method testAllCombinationsGeneration.
@Test
public void testAllCombinationsGeneration() {
DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSimpleAllCombinations.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY);
Set<DimensionsDescriptor> dimensionsDescriptors = Sets.newHashSet();
dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(new HashSet<String>())));
dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher"))));
dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("advertiser"))));
dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("location"))));
dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "advertiser"))));
dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "location"))));
dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("advertiser", "location"))));
dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "advertiser", "location"))));
Set<DimensionsDescriptor> actualDimensionsDescriptors = Sets.newHashSet();
for (DimensionsDescriptor dimensionsDescriptor : schema.getDimensionsDescriptorToID().keySet()) {
actualDimensionsDescriptors.add(dimensionsDescriptor);
}
List<DimensionsDescriptor> ddList = Lists.newArrayList(dimensionsDescriptors);
List<DimensionsDescriptor> ddActualList = Lists.newArrayList(actualDimensionsDescriptors);
Collections.sort(ddList);
Collections.sort(ddActualList);
Assert.assertEquals(dimensionsDescriptors, actualDimensionsDescriptors);
}
use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.
the class DimensionalConfigurationSchemaTest method testCustomTimeBuckets.
@Test
public void testCustomTimeBuckets() {
DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaCustomTimeBuckets.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY);
Assert.assertEquals(3, schema.getTimeBuckets().size());
Assert.assertEquals(5, schema.getCustomTimeBuckets().size());
List<CustomTimeBucket> customTimeBuckets = Lists.newArrayList(new CustomTimeBucket(TimeBucket.MINUTE), new CustomTimeBucket(TimeBucket.HOUR), new CustomTimeBucket(TimeBucket.DAY), new CustomTimeBucket(TimeBucket.MINUTE, 5), new CustomTimeBucket(TimeBucket.HOUR, 3));
Assert.assertEquals(customTimeBuckets, schema.getCustomTimeBuckets());
Assert.assertEquals(40, schema.getDimensionsDescriptorIDToKeyDescriptor().size());
JSONArray timeBucketsArray = new JSONArray();
timeBucketsArray.put("1m").put("1h").put("1d").put("5m").put("3h");
Assert.assertEquals(timeBucketsArray.toString(), schema.getBucketsString());
CustomTimeBucket customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.MINUTE.ordinal());
Assert.assertTrue(customTimeBucket.isUnit());
Assert.assertEquals(TimeBucket.MINUTE, customTimeBucket.getTimeBucket());
customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.HOUR.ordinal());
Assert.assertTrue(customTimeBucket.isUnit());
Assert.assertEquals(TimeBucket.HOUR, customTimeBucket.getTimeBucket());
customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.DAY.ordinal());
Assert.assertTrue(customTimeBucket.isUnit());
Assert.assertEquals(TimeBucket.DAY, customTimeBucket.getTimeBucket());
int id5m = schema.getCustomTimeBucketRegistry().getTimeBucketId(new CustomTimeBucket(TimeBucket.MINUTE, 5));
int id3h = schema.getCustomTimeBucketRegistry().getTimeBucketId(new CustomTimeBucket(TimeBucket.HOUR, 3));
Assert.assertEquals(256, id5m);
Assert.assertEquals(257, id3h);
for (int ddID = 0; ddID < schema.getDimensionsDescriptorIDToDimensionsDescriptor().size(); ddID++) {
DimensionsDescriptor dd = schema.getDimensionsDescriptorIDToDimensionsDescriptor().get(ddID);
Assert.assertEquals(customTimeBuckets.get(ddID % 5), dd.getCustomTimeBucket());
}
}
use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.
the class DimensionalConfigurationSchema method addValueToAggregatorToCombination.
/**
* @param values the fields of value to be computed
* @param allKeys the allKeys represents an combination
* @param aggregatorName the name of the aggregator(incremental or OTF) to be added to this combination
*/
protected void addValueToAggregatorToCombination(Set<String> values, Set<String> allKeys, String aggregatorName) {
Map<String, Set<String>> valueToIncrementalAggregators = Maps.newHashMap();
Map<String, Set<String>> valueToOTFAggregators = Maps.newHashMap();
Set<String> incrementalAggregatorNames;
boolean isOTFAggregator = false;
if (!isIncrementalAggregator(aggregatorName)) {
// For OTF aggregator, need to and its depended incremental aggregators
incrementalAggregatorNames = getOTFDependedIncrementalAggregatorNames(aggregatorName);
isOTFAggregator = true;
} else {
incrementalAggregatorNames = Sets.newHashSet();
incrementalAggregatorNames.add(aggregatorName);
}
for (String value : values) {
valueToIncrementalAggregators.put(value, incrementalAggregatorNames);
if (isOTFAggregator) {
valueToOTFAggregators.put(value, Sets.newHashSet(aggregatorName));
}
}
for (CustomTimeBucket customTimeBucket : customTimeBucketsCombination) {
dimensionsDescriptorIDToValueToAggregator.add(valueToIncrementalAggregators);
dimensionsDescriptorIDToValueToOTFAggregator.add(valueToOTFAggregators);
// add empty information just for alignment
dimensionsDescriptorIDToValueToCompositeAggregator.add(Collections.<String, Set<String>>emptyMap());
dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(Collections.<String, Set<String>>emptyMap());
// key descriptor
DimensionsDescriptor dimensionsDescriptor = new DimensionsDescriptor(customTimeBucket, new Fields(allKeys));
dimensionsDescriptorIDToDimensionsDescriptor.add(dimensionsDescriptor);
dimensionsDescriptorIDToKeyDescriptor.add(dimensionsDescriptor.createFieldsDescriptor(keyDescriptor));
}
}
Aggregations