Search in sources :

Example 1 with CustomTimeBucketRegistry

use of org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry in project apex-malhar by apache.

the class DimensionalConfigurationSchema method initialize.

/**
 * This is a helper method which initializes the metadata for the {@link DimensionalConfigurationSchema}.
 *
 * @param keys                   The key objects to use when creating this configuration schema.
 * @param values                 The value objects to use when creating this configuration schema.
 * @param timeBuckets            The time buckets to use when creating this configuration schema.
 * @param dimensionsCombinations The dimensionsCombinations to use when creating this configuration schema.
 */
private void initialize(List<Key> keys, List<Value> values, List<TimeBucket> timeBuckets, List<DimensionsCombination> dimensionsCombinations) {
    tags = Lists.newArrayList();
    keyToTags = Maps.newHashMap();
    for (Key key : keys) {
        keyToTags.put(key.getName(), new ArrayList<String>());
    }
    valueToTags = Maps.newHashMap();
    for (Value value : values) {
        valueToTags.put(value.getName(), new ArrayList<String>());
    }
    // time buckets
    this.timeBuckets = timeBuckets;
    this.customTimeBuckets = new ArrayList<>();
    customTimeBucketRegistry = new CustomTimeBucketRegistry(STARTING_TIMEBUCKET_ID);
    for (TimeBucket timeBucket : timeBuckets) {
        CustomTimeBucket customTimeBucket = new CustomTimeBucket(timeBucket);
        customTimeBuckets.add(customTimeBucket);
        customTimeBucketRegistry.register(customTimeBucket, timeBucket.ordinal());
    }
    // Input aggregate values
    Map<String, Type> valueFieldToType = Maps.newHashMap();
    for (Value value : values) {
        valueFieldToType.put(value.getName(), value.getType());
    }
    inputValuesDescriptor = new FieldsDescriptor(valueFieldToType);
    // Input keys
    Map<String, Type> keyFieldToType = Maps.newHashMap();
    keysToEnumValuesList = Maps.newHashMap();
    for (Key key : keys) {
        keyFieldToType.put(key.getName(), key.getType());
        keysToEnumValuesList.put(key.getName(), key.getEnumValues());
    }
    keyDescriptor = new FieldsDescriptor(keyFieldToType);
    Map<String, Type> fieldToTypeWithTime = Maps.newHashMap(keyFieldToType);
    keyDescriptorWithTime = keyDescriptorWithTime(fieldToTypeWithTime, customTimeBuckets);
    // schemaAllValueToAggregatorToType
    schemaAllValueToAggregatorToType = Maps.newHashMap();
    Map<String, Set<String>> specificValueToAggregator = Maps.newHashMap();
    Map<String, Set<String>> specificValueToOTFAggregator = Maps.newHashMap();
    Map<String, Set<String>> allSpecificValueToAggregator = Maps.newHashMap();
    for (Value value : values) {
        String valueName = value.getName();
        Set<String> aggregators = value.getAggregators();
        Set<String> specificAggregatorSet = Sets.newHashSet();
        Set<String> allAggregatorSet = Sets.newHashSet();
        Set<String> otfAggregators = Sets.newHashSet();
        for (String aggregatorName : aggregators) {
            if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                specificAggregatorSet.add(aggregatorName);
                allAggregatorSet.add(aggregatorName);
            } else {
                otfAggregators.add(aggregatorName);
                List<String> aggregatorNames = aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName);
                specificAggregatorSet.addAll(aggregatorNames);
                allAggregatorSet.addAll(aggregatorNames);
                allAggregatorSet.add(aggregatorName);
            }
        }
        specificValueToOTFAggregator.put(valueName, otfAggregators);
        specificValueToAggregator.put(valueName, specificAggregatorSet);
        allSpecificValueToAggregator.put(valueName, allAggregatorSet);
    }
    for (Map.Entry<String, Set<String>> entry : allSpecificValueToAggregator.entrySet()) {
        String valueName = entry.getKey();
        Type inputType = inputValuesDescriptor.getType(valueName);
        Set<String> aggregators = entry.getValue();
        Map<String, Type> aggregatorToType = Maps.newHashMap();
        for (String aggregatorName : aggregators) {
            if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
                aggregatorToType.put(aggregatorName, aggregator.getOutputType(inputType));
            } else {
                OTFAggregator otfAggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
                aggregatorToType.put(aggregatorName, otfAggregator.getOutputType());
            }
        }
        schemaAllValueToAggregatorToType.put(valueName, aggregatorToType);
    }
    // ddID
    dimensionsDescriptorIDToDimensionsDescriptor = Lists.newArrayList();
    dimensionsDescriptorIDToKeyDescriptor = Lists.newArrayList();
    dimensionsDescriptorToID = Maps.newHashMap();
    dimensionsDescriptorIDToValueToAggregator = Lists.newArrayList();
    dimensionsDescriptorIDToValueToOTFAggregator = Lists.newArrayList();
    int ddID = 0;
    for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
        for (TimeBucket timeBucket : timeBuckets) {
            DimensionsDescriptor dd = new DimensionsDescriptor(timeBucket, dimensionsCombination.getFields());
            dimensionsDescriptorIDToDimensionsDescriptor.add(dd);
            dimensionsDescriptorIDToKeyDescriptor.add(dd.createFieldsDescriptor(keyDescriptor));
            dimensionsDescriptorToID.put(dd, ddID);
            Map<String, Set<String>> valueToAggregator = Maps.newHashMap();
            Map<String, Set<String>> valueToOTFAggregator = Maps.newHashMap();
            Map<String, Set<String>> tempValueToAggregator = dimensionsCombination.getValueToAggregators();
            for (Map.Entry<String, Set<String>> entry : tempValueToAggregator.entrySet()) {
                String value = entry.getKey();
                Set<String> staticAggregatorNames = Sets.newHashSet();
                Set<String> otfAggregatorNames = Sets.newHashSet();
                Set<String> aggregatorNames = entry.getValue();
                for (String aggregatorName : aggregatorNames) {
                    if (!aggregatorRegistry.isAggregator(aggregatorName)) {
                        throw new UnsupportedOperationException("The aggregator " + aggregatorName + " is not valid.");
                    }
                    if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                        staticAggregatorNames.add(aggregatorName);
                    } else {
                        staticAggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
                        otfAggregatorNames.add(aggregatorName);
                    }
                }
                valueToAggregator.put(value, staticAggregatorNames);
                valueToOTFAggregator.put(value, otfAggregatorNames);
            }
            mergeMaps(valueToAggregator, specificValueToAggregator);
            mergeMaps(valueToOTFAggregator, specificValueToOTFAggregator);
            dimensionsDescriptorIDToValueToAggregator.add(valueToAggregator);
            dimensionsDescriptorIDToValueToOTFAggregator.add(valueToOTFAggregator);
            ddID++;
        }
    }
    for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToAggregator) {
        if (specificValueToAggregator.isEmpty()) {
            continue;
        }
        for (Map.Entry<String, Set<String>> entry : specificValueToAggregator.entrySet()) {
            String valueName = entry.getKey();
            Set<String> aggName = entry.getValue();
            if (aggName.isEmpty()) {
                continue;
            }
            Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
            if (ddAggregatorSet == null) {
                ddAggregatorSet = Sets.newHashSet();
                valueToAggregator.put(valueName, ddAggregatorSet);
            }
            ddAggregatorSet.addAll(aggName);
        }
    }
    for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToOTFAggregator) {
        if (specificValueToOTFAggregator.isEmpty()) {
            continue;
        }
        for (Map.Entry<String, Set<String>> entry : specificValueToOTFAggregator.entrySet()) {
            String valueName = entry.getKey();
            Set<String> aggName = entry.getValue();
            if (aggName.isEmpty()) {
                continue;
            }
            Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
            if (ddAggregatorSet == null) {
                ddAggregatorSet = Sets.newHashSet();
                valueToAggregator.put(valueName, ddAggregatorSet);
            }
            ddAggregatorSet.addAll(aggName);
        }
    }
    // ddIDToAggregatorToAggregateDescriptor
    dimensionsDescriptorIDToAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToAggregator);
    dimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToOTFAggregator);
    // combination ID values
    dimensionsDescriptorIDToFieldToAggregatorAdditionalValues = Lists.newArrayList();
    dimensionsDescriptorIDToKeys = Lists.newArrayList();
    for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
        dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(dimensionsCombination.getValueToAggregators());
        dimensionsDescriptorIDToKeys.add(dimensionsCombination.getFields());
    }
    // Build keyString
    JSONArray keyArray = new JSONArray();
    for (Key key : keys) {
        JSONObject jo = new JSONObject();
        try {
            jo.put(FIELD_KEYS_NAME, key.getName());
            jo.put(FIELD_KEYS_TYPE, key.getType().getName());
            JSONArray enumArray = new JSONArray();
            for (Object enumVal : key.getEnumValues()) {
                enumArray.put(enumVal);
            }
            jo.put(FIELD_KEYS_ENUMVALUES, enumArray);
        } catch (JSONException ex) {
            throw new RuntimeException(ex);
        }
        keyArray.put(jo);
    }
    keysString = keyArray.toString();
    // Build time buckets
    JSONArray timeBucketArray = new JSONArray();
    for (CustomTimeBucket timeBucket : customTimeBuckets) {
        timeBucketArray.put(timeBucket.getText());
    }
    bucketsString = timeBucketArray.toString();
    // buildDDIDAggID
    // composite aggregator are not supported in this method. add empty list to avoid unit test error.
    dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor = Lists.newArrayList();
    buildDimensionsDescriptorIDAggregatorIDMaps();
}
Also used : Set(java.util.Set) IncrementalAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator) CustomTimeBucketRegistry(org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry) OTFAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator) JSONArray(org.codehaus.jettison.json.JSONArray) JSONException(org.codehaus.jettison.json.JSONException) DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) JSONObject(org.codehaus.jettison.json.JSONObject) JSONObject(org.codehaus.jettison.json.JSONObject) Map(java.util.Map) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap)

Example 2 with CustomTimeBucketRegistry

use of org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry in project apex-malhar by apache.

the class CustomTimeBucketRegistryTest method testBuildingRegistry.

@Test
public void testBuildingRegistry() {
    CustomTimeBucketRegistry timeBucketRegistry = new CustomTimeBucketRegistry();
    CustomTimeBucket c1m = new CustomTimeBucket(TimeBucket.MINUTE);
    CustomTimeBucket c1h = new CustomTimeBucket(TimeBucket.HOUR);
    CustomTimeBucket c1d = new CustomTimeBucket(TimeBucket.DAY);
    timeBucketRegistry.register(c1m, TimeBucket.MINUTE.ordinal());
    timeBucketRegistry.register(c1h, TimeBucket.HOUR.ordinal());
    timeBucketRegistry.register(c1d, TimeBucket.DAY.ordinal());
    CustomTimeBucket customTimeBucket = timeBucketRegistry.getTimeBucket(TimeBucket.MINUTE.ordinal());
    Assert.assertTrue(customTimeBucket.isUnit());
    Assert.assertEquals(TimeBucket.MINUTE, customTimeBucket.getTimeBucket());
    customTimeBucket = timeBucketRegistry.getTimeBucket(TimeBucket.HOUR.ordinal());
    Assert.assertTrue(customTimeBucket.isUnit());
    Assert.assertEquals(TimeBucket.HOUR, customTimeBucket.getTimeBucket());
    customTimeBucket = timeBucketRegistry.getTimeBucket(TimeBucket.DAY.ordinal());
    Assert.assertTrue(customTimeBucket.isUnit());
    Assert.assertEquals(TimeBucket.DAY, customTimeBucket.getTimeBucket());
    Assert.assertEquals(TimeBucket.MINUTE.ordinal(), (int) timeBucketRegistry.getTimeBucketId(c1m));
    Assert.assertEquals(TimeBucket.HOUR.ordinal(), (int) timeBucketRegistry.getTimeBucketId(c1h));
    Assert.assertEquals(TimeBucket.DAY.ordinal(), (int) timeBucketRegistry.getTimeBucketId(c1d));
}
Also used : CustomTimeBucket(org.apache.apex.malhar.lib.appdata.schemas.CustomTimeBucket) CustomTimeBucketRegistry(org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry) Test(org.junit.Test)

Example 3 with CustomTimeBucketRegistry

use of org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry in project apex-malhar by apache.

the class CustomTimeBucketRegistryTest method testRegister.

@Test
public void testRegister() {
    CustomTimeBucketRegistry timeBucketRegistry = new CustomTimeBucketRegistry();
    CustomTimeBucket c1m = new CustomTimeBucket(TimeBucket.MINUTE);
    CustomTimeBucket c1h = new CustomTimeBucket(TimeBucket.HOUR);
    CustomTimeBucket c1d = new CustomTimeBucket(TimeBucket.DAY);
    timeBucketRegistry.register(c1m, TimeBucket.MINUTE.ordinal());
    timeBucketRegistry.register(c1h, TimeBucket.HOUR.ordinal());
    timeBucketRegistry.register(c1d, TimeBucket.DAY.ordinal());
    int max = Integer.MIN_VALUE;
    max = Math.max(max, TimeBucket.MINUTE.ordinal());
    max = Math.max(max, TimeBucket.HOUR.ordinal());
    max = Math.max(max, TimeBucket.DAY.ordinal());
    CustomTimeBucket c5m = new CustomTimeBucket(TimeBucket.MINUTE, 5L);
    timeBucketRegistry.register(c5m);
    int timeBucketId = timeBucketRegistry.getTimeBucketId(c5m);
    Assert.assertEquals(max + 1, timeBucketId);
}
Also used : CustomTimeBucket(org.apache.apex.malhar.lib.appdata.schemas.CustomTimeBucket) CustomTimeBucketRegistry(org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry) Test(org.junit.Test)

Example 4 with CustomTimeBucketRegistry

use of org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry in project apex-malhar by apache.

the class DimensionalConfigurationSchema method initialize.

/**
 * This is a helper method which initializes the {@link DimensionalConfigurationSchema} with the given
 * JSON values.
 *
 * @param json The json with which to initialize the {@link DimensionalConfigurationSchema}.
 * @throws JSONException
 */
private void initialize(String json) throws JSONException {
    JSONObject jo = new JSONObject(json);
    tags = getTags(jo);
    // Keys
    keysToEnumValuesList = Maps.newHashMap();
    JSONArray keysArray;
    if (jo.has(FIELD_KEYS)) {
        keysArray = jo.getJSONArray(FIELD_KEYS);
    } else {
        keysArray = new JSONArray();
    }
    keysString = keysArray.toString();
    keyToTags = Maps.newHashMap();
    Map<String, Type> fieldToType = Maps.newHashMap();
    for (int keyIndex = 0; keyIndex < keysArray.length(); keyIndex++) {
        JSONObject tempKeyDescriptor = keysArray.getJSONObject(keyIndex);
        SchemaUtils.checkValidKeysEx(tempKeyDescriptor, VALID_KEY_FIELDS);
        String keyName = tempKeyDescriptor.getString(FIELD_KEYS_NAME);
        String typeName = tempKeyDescriptor.getString(FIELD_KEYS_TYPE);
        try {
            String keyExpression = tempKeyDescriptor.getString(FIELD_KEYS_EXPRESSION);
            if (keyExpression != null) {
                keyToExpression.put(keyName, keyExpression);
            }
        } catch (JSONException e) {
        // do nothing
        }
        List<String> keyTags = getTags(tempKeyDescriptor);
        keyToTags.put(keyName, keyTags);
        Type type = Type.getTypeEx(typeName);
        fieldToType.put(keyName, type);
        List<Object> valuesList = Lists.newArrayList();
        keysToEnumValuesList.put(keyName, valuesList);
        if (tempKeyDescriptor.has(FIELD_KEYS_ENUMVALUES)) {
            Type maxType = null;
            JSONArray valArray = tempKeyDescriptor.getJSONArray(FIELD_KEYS_ENUMVALUES);
            // Validate the provided data types
            for (int valIndex = 0; valIndex < valArray.length(); valIndex++) {
                Object val = valArray.get(valIndex);
                valuesList.add(val);
                Preconditions.checkState(!(val instanceof JSONArray || val instanceof JSONObject), "The value must be a primitive.");
                Type currentType = Type.CLASS_TO_TYPE.get(val.getClass());
                if (maxType == null) {
                    maxType = currentType;
                } else if (maxType != currentType) {
                    if (maxType.getHigherTypes().contains(currentType)) {
                        maxType = currentType;
                    } else {
                        Preconditions.checkState(currentType.getHigherTypes().contains(maxType), "Conficting types: " + currentType.getName() + " cannot be converted to " + maxType.getName());
                    }
                }
            }
            // This is not the right thing to do, fix later
            if (!Type.areRelated(maxType, type)) {
                throw new IllegalArgumentException("The type of the values in " + valArray + " is " + maxType.getName() + " while the specified type is " + type.getName());
            }
        }
    }
    // Time Buckets
    timeBuckets = Lists.newArrayList();
    customTimeBuckets = Lists.newArrayList();
    JSONArray timeBucketsJSON;
    if (!jo.has(FIELD_TIME_BUCKETS)) {
        timeBucketsJSON = new JSONArray();
        timeBucketsJSON.put(TimeBucket.ALL.getText());
    } else {
        timeBucketsJSON = jo.getJSONArray(FIELD_TIME_BUCKETS);
        if (timeBucketsJSON.length() == 0) {
            throw new IllegalArgumentException("A time bucket must be specified.");
        }
    }
    customTimeBucketRegistry = new CustomTimeBucketRegistry(STARTING_TIMEBUCKET_ID);
    Set<CustomTimeBucket> customTimeBucketsAllSet = Sets.newHashSet();
    List<CustomTimeBucket> customTimeBucketsAll = Lists.newArrayList();
    Set<CustomTimeBucket> customTimeBucketsTotalSet = Sets.newHashSet();
    for (int timeBucketIndex = 0; timeBucketIndex < timeBucketsJSON.length(); timeBucketIndex++) {
        String timeBucketString = timeBucketsJSON.getString(timeBucketIndex);
        CustomTimeBucket customTimeBucket = new CustomTimeBucket(timeBucketString);
        if (!customTimeBucketsAllSet.add(customTimeBucket)) {
            throw new IllegalArgumentException("The bucket " + customTimeBucket.getText() + " was defined twice.");
        }
        customTimeBucketsAll.add(customTimeBucket);
        customTimeBuckets.add(customTimeBucket);
        if (customTimeBucket.isUnit() || customTimeBucket.getTimeBucket() == TimeBucket.ALL) {
            timeBuckets.add(customTimeBucket.getTimeBucket());
        }
    }
    customTimeBucketsTotalSet.addAll(customTimeBucketsAllSet);
    JSONArray customTimeBucketsJSON = new JSONArray();
    for (CustomTimeBucket customTimeBucket : customTimeBuckets) {
        customTimeBucketsJSON.put(customTimeBucket.toString());
    }
    bucketsString = customTimeBucketsJSON.toString();
    // Key descriptor all
    keyDescriptor = new FieldsDescriptor(fieldToType);
    Map<String, Type> fieldToTypeWithTime = Maps.newHashMap(fieldToType);
    keyDescriptorWithTime = keyDescriptorWithTime(fieldToTypeWithTime, customTimeBuckets);
    // Values
    Map<String, Set<String>> allValueToAggregator = Maps.newHashMap();
    Map<String, Set<String>> allValueToOTFAggregator = Maps.newHashMap();
    Map<String, Set<String>> valueToAggregators = Maps.newHashMap();
    Map<String, Set<String>> valueToOTFAggregators = Maps.newHashMap();
    Map<String, Set<String>> allValueToCompositeAggregator = Maps.newHashMap();
    Map<String, Set<String>> valueToCompositeAggregators = Maps.newHashMap();
    Map<String, Type> aggFieldToType = Maps.newHashMap();
    JSONArray valuesArray = jo.getJSONArray(FIELD_VALUES);
    schemaAllValueToAggregatorToType = Maps.newHashMap();
    valueToTags = Maps.newHashMap();
    for (int valueIndex = 0; valueIndex < valuesArray.length(); valueIndex++) {
        JSONObject value = valuesArray.getJSONObject(valueIndex);
        String name = value.getString(FIELD_VALUES_NAME);
        String type = value.getString(FIELD_VALUES_TYPE);
        try {
            String valueExpression = value.getString(FIELD_VALUES_EXPRESSION);
            if (valueExpression != null) {
                valueToExpression.put(name, valueExpression);
            }
        } catch (JSONException e) {
        // Do nothing
        }
        List<String> valueTags = getTags(value);
        valueToTags.put(name, valueTags);
        Type typeT = Type.NAME_TO_TYPE.get(type);
        if (aggFieldToType.containsKey(name)) {
            throw new IllegalArgumentException("Cannot define the value " + name + " twice.");
        }
        Map<String, Type> aggregatorToType = Maps.newHashMap();
        schemaAllValueToAggregatorToType.put(name, aggregatorToType);
        aggFieldToType.put(name, typeT);
        Set<String> aggregatorSet = Sets.newHashSet();
        Set<String> aggregatorOTFSet = Sets.newHashSet();
        Set<String> aggregateCompositeSet = Sets.newHashSet();
        if (value.has(FIELD_VALUES_AGGREGATIONS)) {
            JSONArray aggregators = value.getJSONArray(FIELD_VALUES_AGGREGATIONS);
            if (aggregators.length() == 0) {
                throw new IllegalArgumentException("Empty aggregators array for: " + name);
            }
            for (int aggregatorIndex = 0; aggregatorIndex < aggregators.length(); aggregatorIndex++) {
                // the aggrator is not only has name any more, it could be an object or a String
                // example: {"aggregator":"BOTTOMN","property":"count","value":"20","embededAggregator":"AVG"}
                String aggregatorType = null;
                aggregatorType = aggregators.getString(aggregatorIndex);
                if (isJsonSimpleString(aggregatorType)) {
                    // it's is simple aggregator
                    addNonCompositeAggregator(aggregatorType, allValueToAggregator, allValueToOTFAggregator, name, aggregatorSet, aggregatorToType, typeT, aggregatorOTFSet, true);
                } else {
                    // it is a composite aggragate
                    JSONObject jsonAggregator = aggregators.getJSONObject(aggregatorIndex);
                    aggregatorType = jsonAggregator.getString(FIELD_VALUES_AGGREGATOR);
                    Map<String, Object> propertyNameToValue = getPropertyNameToValue(jsonAggregator, aggregatorType);
                    // the steps following is for composite aggregator.
                    if (isCompositeAggregator(aggregatorType)) {
                        String embededAggregatorName = (String) propertyNameToValue.get(PROPERTY_NAME_EMBEDED_AGGREGATOR);
                        /**
                         * don't add embed aggregator here as the emebed aggregator is with different dimension as this dimension
                         * maybe haven't created yet.
                         */
                        CompositeAggregator aggregator = addCompositeAggregator(aggregatorType, allValueToCompositeAggregator, aggregateCompositeSet, name, embededAggregatorName, propertyNameToValue, aggregatorToType);
                    } else {
                        throw new IllegalArgumentException("Unknow aggregator type: " + aggregatorType + ", please check if it valid.");
                    }
                }
            }
        }
        if (!aggregatorSet.isEmpty()) {
            valueToAggregators.put(name, aggregatorSet);
            valueToOTFAggregators.put(name, aggregatorOTFSet);
        }
        if (!aggregateCompositeSet.isEmpty()) {
            valueToCompositeAggregators.put(name, aggregateCompositeSet);
        }
    }
    LOG.debug("allValueToAggregator {}", allValueToAggregator);
    LOG.debug("valueToAggregators {}", valueToAggregators);
    this.inputValuesDescriptor = new FieldsDescriptor(aggFieldToType);
    // Dimensions
    dimensionsDescriptorIDToValueToAggregator = Lists.newArrayList();
    dimensionsDescriptorIDToValueToOTFAggregator = Lists.newArrayList();
    dimensionsDescriptorIDToValueToCompositeAggregator = Lists.newArrayList();
    dimensionsDescriptorIDToKeyDescriptor = Lists.newArrayList();
    dimensionsDescriptorIDToDimensionsDescriptor = Lists.newArrayList();
    dimensionsDescriptorIDToAggregatorToAggregateDescriptor = Lists.newArrayList();
    dimensionsDescriptorIDToKeys = Lists.newArrayList();
    dimensionsDescriptorIDToFieldToAggregatorAdditionalValues = Lists.newArrayList();
    JSONArray dimensionsArray;
    if (jo.has(FIELD_DIMENSIONS)) {
        Object dimensionsVal = jo.get(FIELD_DIMENSIONS);
        if (dimensionsVal instanceof String) {
            if (!((String) dimensionsVal).equals(FIELD_DIMENSIONS_ALL_COMBINATIONS)) {
                throw new IllegalArgumentException(dimensionsVal + " is an invalid value for " + FIELD_DIMENSIONS);
            }
            dimensionsArray = new JSONArray();
            LOG.debug("Combinations size {}", fieldToType.keySet().size());
            Set<Set<String>> combinations = buildCombinations(fieldToType.keySet());
            LOG.debug("Combinations size {}", combinations.size());
            List<DimensionsDescriptor> dimensionDescriptors = Lists.newArrayList();
            for (Set<String> combination : combinations) {
                dimensionDescriptors.add(new DimensionsDescriptor(new Fields(combination)));
            }
            Collections.sort(dimensionDescriptors);
            LOG.debug("Dimensions descriptor size {}", dimensionDescriptors.size());
            for (DimensionsDescriptor dimensionsDescriptor : dimensionDescriptors) {
                JSONObject combination = new JSONObject();
                JSONArray combinationKeys = new JSONArray();
                for (String field : dimensionsDescriptor.getFields().getFields()) {
                    combinationKeys.put(field);
                }
                combination.put(FIELD_DIMENSIONS_COMBINATIONS, combinationKeys);
                dimensionsArray.put(combination);
            }
        } else if (dimensionsVal instanceof JSONArray) {
            dimensionsArray = jo.getJSONArray(FIELD_DIMENSIONS);
        } else {
            throw new IllegalArgumentException("The value for " + FIELD_DIMENSIONS + " must be a string or an array.");
        }
    } else {
        dimensionsArray = new JSONArray();
        JSONObject combination = new JSONObject();
        combination.put(FIELD_DIMENSIONS_COMBINATIONS, new JSONArray());
        dimensionsArray.put(combination);
    }
    Set<Fields> dimensionsDescriptorFields = Sets.newHashSet();
    // loop through dimension descriptors
    for (int dimensionsIndex = 0; dimensionsIndex < dimensionsArray.length(); dimensionsIndex++) {
        // Get a dimension descriptor
        JSONObject dimension = dimensionsArray.getJSONObject(dimensionsIndex);
        // Get the key fields of a descriptor
        JSONArray combinationFields = dimension.getJSONArray(FIELD_DIMENSIONS_COMBINATIONS);
        // valueName to IncrementalAggregator
        Map<String, Set<String>> specificValueToAggregator = Maps.newHashMap();
        // valueName to OTFAggregator
        Map<String, Set<String>> specificValueToOTFAggregator = Maps.newHashMap();
        // valueName to CompositeAggregator
        Map<String, Set<String>> specificValueToCompositeAggregator = Maps.newHashMap();
        for (Map.Entry<String, Set<String>> entry : valueToAggregators.entrySet()) {
            Set<String> aggregators = Sets.newHashSet();
            aggregators.addAll(entry.getValue());
            specificValueToAggregator.put(entry.getKey(), aggregators);
        }
        for (Map.Entry<String, Set<String>> entry : valueToOTFAggregators.entrySet()) {
            Set<String> aggregators = Sets.newHashSet();
            aggregators.addAll(entry.getValue());
            specificValueToOTFAggregator.put(entry.getKey(), aggregators);
        }
        for (Map.Entry<String, Set<String>> entry : valueToCompositeAggregators.entrySet()) {
            Set<String> aggregators = Sets.newHashSet();
            aggregators.addAll(entry.getValue());
            specificValueToCompositeAggregator.put(entry.getKey(), aggregators);
        }
        List<String> keyNames = Lists.newArrayList();
        // loop through the key fields of a descriptor
        for (int keyIndex = 0; keyIndex < combinationFields.length(); keyIndex++) {
            String keyName = combinationFields.getString(keyIndex);
            keyNames.add(keyName);
        }
        Fields dimensionDescriptorFields = new Fields(keyNames);
        if (!dimensionsDescriptorFields.add(dimensionDescriptorFields)) {
            throw new IllegalArgumentException("Duplicate dimension descriptor: " + dimensionDescriptorFields);
        }
        Map<String, Set<String>> fieldToAggregatorAdditionalValues = Maps.newHashMap();
        dimensionsDescriptorIDToKeys.add(dimensionDescriptorFields);
        dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(fieldToAggregatorAdditionalValues);
        Set<CustomTimeBucket> customTimeBucketsCombinationSet = Sets.newHashSet(customTimeBucketsAllSet);
        customTimeBucketsCombination = Lists.newArrayList(customTimeBucketsAll);
        if (dimension.has(DimensionalConfigurationSchema.FIELD_DIMENSIONS_TIME_BUCKETS)) {
            JSONArray timeBuckets = dimension.getJSONArray(DimensionalConfigurationSchema.FIELD_DIMENSIONS_TIME_BUCKETS);
            if (timeBuckets.length() == 0) {
                throw new IllegalArgumentException(dimensionDescriptorFields.getFields().toString());
            }
            for (int timeBucketIndex = 0; timeBucketIndex < timeBuckets.length(); timeBucketIndex++) {
                CustomTimeBucket customTimeBucket = new CustomTimeBucket(timeBuckets.getString(timeBucketIndex));
                if (!customTimeBucketsCombinationSet.add(customTimeBucket)) {
                    throw new IllegalArgumentException("The time bucket " + customTimeBucket + " is defined twice for the dimensions combination " + dimensionDescriptorFields.getFields().toString());
                }
                customTimeBucketsCombinationSet.add(customTimeBucket);
                customTimeBucketsCombination.add(customTimeBucket);
            }
        }
        customTimeBucketsTotalSet.addAll(customTimeBucketsCombinationSet);
        // Loop through time to generate dimension descriptors
        for (CustomTimeBucket timeBucket : customTimeBucketsCombination) {
            DimensionsDescriptor dimensionsDescriptor = new DimensionsDescriptor(timeBucket, dimensionDescriptorFields);
            dimensionsDescriptorIDToKeyDescriptor.add(dimensionsDescriptor.createFieldsDescriptor(keyDescriptor));
            dimensionsDescriptorIDToDimensionsDescriptor.add(dimensionsDescriptor);
        }
        if (dimension.has(FIELD_DIMENSIONS_ADDITIONAL_VALUES)) {
            JSONArray additionalValues = dimension.getJSONArray(FIELD_DIMENSIONS_ADDITIONAL_VALUES);
            // iterate over additional values
            for (int additionalValueIndex = 0; additionalValueIndex < additionalValues.length(); additionalValueIndex++) {
                String additionalValue = additionalValues.getString(additionalValueIndex);
                if (isJsonSimpleString(additionalValue)) {
                    String[] components = additionalValue.split(ADDITIONAL_VALUE_SEPERATOR);
                    if (components.length != ADDITIONAL_VALUE_NUM_COMPONENTS) {
                        throw new IllegalArgumentException("The number of component values " + "in an additional value must be " + ADDITIONAL_VALUE_NUM_COMPONENTS + " not " + components.length);
                    }
                    String valueName = components[ADDITIONAL_VALUE_VALUE_INDEX];
                    verifyValueDefined(valueName, aggFieldToType.keySet());
                    String aggregatorName = components[ADDITIONAL_VALUE_AGGREGATOR_INDEX];
                    {
                        Set<String> aggregators = fieldToAggregatorAdditionalValues.get(valueName);
                        if (aggregators == null) {
                            aggregators = Sets.newHashSet();
                            fieldToAggregatorAdditionalValues.put(valueName, aggregators);
                        }
                        aggregators.add(aggregatorName);
                    }
                    if (!aggregatorRegistry.isAggregator(aggregatorName)) {
                        throw new IllegalArgumentException(aggregatorName + " is not a valid aggregator.");
                    }
                    if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                        Set<String> aggregatorNames = allValueToAggregator.get(valueName);
                        if (aggregatorNames == null) {
                            aggregatorNames = Sets.newHashSet();
                            allValueToAggregator.put(valueName, aggregatorNames);
                        }
                        aggregatorNames.add(aggregatorName);
                        Set<String> aggregators = specificValueToAggregator.get(valueName);
                        if (aggregators == null) {
                            aggregators = Sets.newHashSet();
                            specificValueToAggregator.put(valueName, aggregators);
                        }
                        if (!aggregators.add(aggregatorName)) {
                            throw new IllegalArgumentException("The aggregator " + aggregatorName + " was already defined in the " + FIELD_VALUES + " section for the value " + valueName);
                        }
                    } else {
                        // Check for duplicate on the fly aggregators
                        Set<String> aggregatorNames = specificValueToOTFAggregator.get(valueName);
                        if (aggregatorNames == null) {
                            aggregatorNames = Sets.newHashSet();
                            specificValueToOTFAggregator.put(valueName, aggregatorNames);
                        }
                        if (!aggregatorNames.add(aggregatorName)) {
                            throw new IllegalArgumentException("The aggregator " + aggregatorName + " cannot be specified twice for the value " + valueName);
                        }
                        aggregatorNames = allValueToOTFAggregator.get(valueName);
                        if (aggregatorNames == null) {
                            aggregatorNames = Sets.newHashSet();
                            allValueToOTFAggregator.put(valueName, aggregatorNames);
                        }
                        if (!aggregatorNames.add(aggregatorName)) {
                            throw new IllegalArgumentException("The aggregator " + aggregatorName + " cannot be specified twice for the value " + valueName);
                        }
                        Set<String> aggregators = specificValueToAggregator.get(valueName);
                        if (aggregators == null) {
                            aggregators = Sets.newHashSet();
                            specificValueToAggregator.put(valueName, aggregators);
                        }
                        if (aggregators == null) {
                            throw new IllegalArgumentException("The additional value " + additionalValue + "Does not have a corresponding value " + valueName + " defined in the " + FIELD_VALUES + " section.");
                        }
                        aggregators.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
                    }
                } else {
                    // it is a composite aggragate
                    JSONObject jsonAddition = additionalValues.getJSONObject(additionalValueIndex);
                    String valueName = (String) jsonAddition.keys().next();
                    verifyValueDefined(valueName, aggFieldToType.keySet());
                    JSONObject jsonAggregator = jsonAddition.getJSONObject(valueName);
                    String aggregatorName = jsonAggregator.getString(FIELD_VALUES_AGGREGATOR);
                    Map<String, Object> propertyNameToValue = getPropertyNameToValue(jsonAggregator, aggregatorName);
                    // the steps following is for composite aggregator.
                    if (isCompositeAggregator(aggregatorName)) {
                        String embededAggregatorName = (String) propertyNameToValue.get(PROPERTY_NAME_EMBEDED_AGGREGATOR);
                        /**
                         * don't add embed aggregator here as the emebed aggregator is with different dimension as this dimension
                         * maybe haven't created yet. the subCombination should be part of the combination
                         */
                        Set<String> compositeAggregators = specificValueToCompositeAggregator.get(valueName);
                        if (compositeAggregators == null) {
                            compositeAggregators = Sets.newHashSet();
                            specificValueToCompositeAggregator.put(valueName, compositeAggregators);
                        }
                        CompositeAggregator aggregator = addCompositeAggregator(aggregatorName, allValueToCompositeAggregator, compositeAggregators, valueName, embededAggregatorName, propertyNameToValue, null);
                    } else {
                        throw new IllegalArgumentException("Unknow aggregator name: " + aggregatorName + ", please check if it valid.");
                    }
                }
            }
        }
        if (specificValueToAggregator.isEmpty() && specificValueToCompositeAggregator.isEmpty()) {
            throw new IllegalArgumentException("No aggregations defined for the " + "following field combination " + combinationFields.toString());
        }
        for (CustomTimeBucket customTimeBucket : customTimeBucketsCombination) {
            dimensionsDescriptorIDToValueToAggregator.add(specificValueToAggregator);
            dimensionsDescriptorIDToValueToOTFAggregator.add(specificValueToOTFAggregator);
            dimensionsDescriptorIDToValueToCompositeAggregator.add(specificValueToCompositeAggregator);
        }
    }
    customTimeBucketsAll.clear();
    customTimeBucketsAll.addAll(customTimeBucketsTotalSet);
    Collections.sort(customTimeBucketsAll);
    for (CustomTimeBucket customTimeBucket : customTimeBucketsAll) {
        if (customTimeBucketRegistry.getTimeBucketId(customTimeBucket) == null) {
            if (customTimeBucket.isUnit() || customTimeBucket.getTimeBucket() == TimeBucket.ALL) {
                customTimeBucketRegistry.register(customTimeBucket, customTimeBucket.getTimeBucket().ordinal());
            } else {
                customTimeBucketRegistry.register(customTimeBucket);
            }
        }
    }
    // compute addition dimension and aggregator for composite aggregator
    computeAdditionalDimensionForCompositeAggregators();
    // DD ID To Aggregator To Aggregate Descriptor
    dimensionsDescriptorIDToAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToAggregator);
    // DD ID To OTF Aggregator To Aggregator Descriptor
    dimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToOTFAggregator);
    dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToCompositeAggregator);
    // Dimensions Descriptor To ID
    dimensionsDescriptorToID = Maps.newHashMap();
    for (int index = 0; index < dimensionsDescriptorIDToDimensionsDescriptor.size(); index++) {
        dimensionsDescriptorToID.put(dimensionsDescriptorIDToDimensionsDescriptor.get(index), index);
    }
    // Build id maps
    buildDimensionsDescriptorIDAggregatorIDMaps();
    aggregatorRegistry.buildTopBottomAggregatorIDToAggregator();
    // fulfill the embed ddids of composite aggregators
    fulfillCompositeAggregatorExtraInfo();
}
Also used : Set(java.util.Set) CustomTimeBucketRegistry(org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry) JSONArray(org.codehaus.jettison.json.JSONArray) JSONException(org.codehaus.jettison.json.JSONException) AbstractCompositeAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.AbstractCompositeAggregator) CompositeAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.CompositeAggregator) DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) JSONObject(org.codehaus.jettison.json.JSONObject) JSONObject(org.codehaus.jettison.json.JSONObject) Map(java.util.Map) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap)

Aggregations

CustomTimeBucketRegistry (org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry)4 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)2 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)2 Map (java.util.Map)2 Set (java.util.Set)2 CustomTimeBucket (org.apache.apex.malhar.lib.appdata.schemas.CustomTimeBucket)2 DimensionsDescriptor (org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor)2 JSONArray (org.codehaus.jettison.json.JSONArray)2 JSONException (org.codehaus.jettison.json.JSONException)2 JSONObject (org.codehaus.jettison.json.JSONObject)2 Test (org.junit.Test)2 AbstractCompositeAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.AbstractCompositeAggregator)1 CompositeAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.CompositeAggregator)1 IncrementalAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator)1 OTFAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator)1