Search in sources :

Example 1 with OTFAggregator

use of org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator in project apex-malhar by apache.

the class DimensionalConfigurationSchema method initialize.

/**
 * This is a helper method which initializes the metadata for the {@link DimensionalConfigurationSchema}.
 *
 * @param keys                   The key objects to use when creating this configuration schema.
 * @param values                 The value objects to use when creating this configuration schema.
 * @param timeBuckets            The time buckets to use when creating this configuration schema.
 * @param dimensionsCombinations The dimensionsCombinations to use when creating this configuration schema.
 */
private void initialize(List<Key> keys, List<Value> values, List<TimeBucket> timeBuckets, List<DimensionsCombination> dimensionsCombinations) {
    tags = Lists.newArrayList();
    keyToTags = Maps.newHashMap();
    for (Key key : keys) {
        keyToTags.put(key.getName(), new ArrayList<String>());
    }
    valueToTags = Maps.newHashMap();
    for (Value value : values) {
        valueToTags.put(value.getName(), new ArrayList<String>());
    }
    // time buckets
    this.timeBuckets = timeBuckets;
    this.customTimeBuckets = new ArrayList<>();
    customTimeBucketRegistry = new CustomTimeBucketRegistry(STARTING_TIMEBUCKET_ID);
    for (TimeBucket timeBucket : timeBuckets) {
        CustomTimeBucket customTimeBucket = new CustomTimeBucket(timeBucket);
        customTimeBuckets.add(customTimeBucket);
        customTimeBucketRegistry.register(customTimeBucket, timeBucket.ordinal());
    }
    // Input aggregate values
    Map<String, Type> valueFieldToType = Maps.newHashMap();
    for (Value value : values) {
        valueFieldToType.put(value.getName(), value.getType());
    }
    inputValuesDescriptor = new FieldsDescriptor(valueFieldToType);
    // Input keys
    Map<String, Type> keyFieldToType = Maps.newHashMap();
    keysToEnumValuesList = Maps.newHashMap();
    for (Key key : keys) {
        keyFieldToType.put(key.getName(), key.getType());
        keysToEnumValuesList.put(key.getName(), key.getEnumValues());
    }
    keyDescriptor = new FieldsDescriptor(keyFieldToType);
    Map<String, Type> fieldToTypeWithTime = Maps.newHashMap(keyFieldToType);
    keyDescriptorWithTime = keyDescriptorWithTime(fieldToTypeWithTime, customTimeBuckets);
    // schemaAllValueToAggregatorToType
    schemaAllValueToAggregatorToType = Maps.newHashMap();
    Map<String, Set<String>> specificValueToAggregator = Maps.newHashMap();
    Map<String, Set<String>> specificValueToOTFAggregator = Maps.newHashMap();
    Map<String, Set<String>> allSpecificValueToAggregator = Maps.newHashMap();
    for (Value value : values) {
        String valueName = value.getName();
        Set<String> aggregators = value.getAggregators();
        Set<String> specificAggregatorSet = Sets.newHashSet();
        Set<String> allAggregatorSet = Sets.newHashSet();
        Set<String> otfAggregators = Sets.newHashSet();
        for (String aggregatorName : aggregators) {
            if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                specificAggregatorSet.add(aggregatorName);
                allAggregatorSet.add(aggregatorName);
            } else {
                otfAggregators.add(aggregatorName);
                List<String> aggregatorNames = aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName);
                specificAggregatorSet.addAll(aggregatorNames);
                allAggregatorSet.addAll(aggregatorNames);
                allAggregatorSet.add(aggregatorName);
            }
        }
        specificValueToOTFAggregator.put(valueName, otfAggregators);
        specificValueToAggregator.put(valueName, specificAggregatorSet);
        allSpecificValueToAggregator.put(valueName, allAggregatorSet);
    }
    for (Map.Entry<String, Set<String>> entry : allSpecificValueToAggregator.entrySet()) {
        String valueName = entry.getKey();
        Type inputType = inputValuesDescriptor.getType(valueName);
        Set<String> aggregators = entry.getValue();
        Map<String, Type> aggregatorToType = Maps.newHashMap();
        for (String aggregatorName : aggregators) {
            if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
                aggregatorToType.put(aggregatorName, aggregator.getOutputType(inputType));
            } else {
                OTFAggregator otfAggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
                aggregatorToType.put(aggregatorName, otfAggregator.getOutputType());
            }
        }
        schemaAllValueToAggregatorToType.put(valueName, aggregatorToType);
    }
    // ddID
    dimensionsDescriptorIDToDimensionsDescriptor = Lists.newArrayList();
    dimensionsDescriptorIDToKeyDescriptor = Lists.newArrayList();
    dimensionsDescriptorToID = Maps.newHashMap();
    dimensionsDescriptorIDToValueToAggregator = Lists.newArrayList();
    dimensionsDescriptorIDToValueToOTFAggregator = Lists.newArrayList();
    int ddID = 0;
    for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
        for (TimeBucket timeBucket : timeBuckets) {
            DimensionsDescriptor dd = new DimensionsDescriptor(timeBucket, dimensionsCombination.getFields());
            dimensionsDescriptorIDToDimensionsDescriptor.add(dd);
            dimensionsDescriptorIDToKeyDescriptor.add(dd.createFieldsDescriptor(keyDescriptor));
            dimensionsDescriptorToID.put(dd, ddID);
            Map<String, Set<String>> valueToAggregator = Maps.newHashMap();
            Map<String, Set<String>> valueToOTFAggregator = Maps.newHashMap();
            Map<String, Set<String>> tempValueToAggregator = dimensionsCombination.getValueToAggregators();
            for (Map.Entry<String, Set<String>> entry : tempValueToAggregator.entrySet()) {
                String value = entry.getKey();
                Set<String> staticAggregatorNames = Sets.newHashSet();
                Set<String> otfAggregatorNames = Sets.newHashSet();
                Set<String> aggregatorNames = entry.getValue();
                for (String aggregatorName : aggregatorNames) {
                    if (!aggregatorRegistry.isAggregator(aggregatorName)) {
                        throw new UnsupportedOperationException("The aggregator " + aggregatorName + " is not valid.");
                    }
                    if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                        staticAggregatorNames.add(aggregatorName);
                    } else {
                        staticAggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
                        otfAggregatorNames.add(aggregatorName);
                    }
                }
                valueToAggregator.put(value, staticAggregatorNames);
                valueToOTFAggregator.put(value, otfAggregatorNames);
            }
            mergeMaps(valueToAggregator, specificValueToAggregator);
            mergeMaps(valueToOTFAggregator, specificValueToOTFAggregator);
            dimensionsDescriptorIDToValueToAggregator.add(valueToAggregator);
            dimensionsDescriptorIDToValueToOTFAggregator.add(valueToOTFAggregator);
            ddID++;
        }
    }
    for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToAggregator) {
        if (specificValueToAggregator.isEmpty()) {
            continue;
        }
        for (Map.Entry<String, Set<String>> entry : specificValueToAggregator.entrySet()) {
            String valueName = entry.getKey();
            Set<String> aggName = entry.getValue();
            if (aggName.isEmpty()) {
                continue;
            }
            Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
            if (ddAggregatorSet == null) {
                ddAggregatorSet = Sets.newHashSet();
                valueToAggregator.put(valueName, ddAggregatorSet);
            }
            ddAggregatorSet.addAll(aggName);
        }
    }
    for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToOTFAggregator) {
        if (specificValueToOTFAggregator.isEmpty()) {
            continue;
        }
        for (Map.Entry<String, Set<String>> entry : specificValueToOTFAggregator.entrySet()) {
            String valueName = entry.getKey();
            Set<String> aggName = entry.getValue();
            if (aggName.isEmpty()) {
                continue;
            }
            Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
            if (ddAggregatorSet == null) {
                ddAggregatorSet = Sets.newHashSet();
                valueToAggregator.put(valueName, ddAggregatorSet);
            }
            ddAggregatorSet.addAll(aggName);
        }
    }
    // ddIDToAggregatorToAggregateDescriptor
    dimensionsDescriptorIDToAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToAggregator);
    dimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToOTFAggregator);
    // combination ID values
    dimensionsDescriptorIDToFieldToAggregatorAdditionalValues = Lists.newArrayList();
    dimensionsDescriptorIDToKeys = Lists.newArrayList();
    for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
        dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(dimensionsCombination.getValueToAggregators());
        dimensionsDescriptorIDToKeys.add(dimensionsCombination.getFields());
    }
    // Build keyString
    JSONArray keyArray = new JSONArray();
    for (Key key : keys) {
        JSONObject jo = new JSONObject();
        try {
            jo.put(FIELD_KEYS_NAME, key.getName());
            jo.put(FIELD_KEYS_TYPE, key.getType().getName());
            JSONArray enumArray = new JSONArray();
            for (Object enumVal : key.getEnumValues()) {
                enumArray.put(enumVal);
            }
            jo.put(FIELD_KEYS_ENUMVALUES, enumArray);
        } catch (JSONException ex) {
            throw new RuntimeException(ex);
        }
        keyArray.put(jo);
    }
    keysString = keyArray.toString();
    // Build time buckets
    JSONArray timeBucketArray = new JSONArray();
    for (CustomTimeBucket timeBucket : customTimeBuckets) {
        timeBucketArray.put(timeBucket.getText());
    }
    bucketsString = timeBucketArray.toString();
    // buildDDIDAggID
    // composite aggregator are not supported in this method. add empty list to avoid unit test error.
    dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor = Lists.newArrayList();
    buildDimensionsDescriptorIDAggregatorIDMaps();
}
Also used : Set(java.util.Set) IncrementalAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator) CustomTimeBucketRegistry(org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry) OTFAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator) JSONArray(org.codehaus.jettison.json.JSONArray) JSONException(org.codehaus.jettison.json.JSONException) DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) JSONObject(org.codehaus.jettison.json.JSONObject) JSONObject(org.codehaus.jettison.json.JSONObject) Map(java.util.Map) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap)

Example 2 with OTFAggregator

use of org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator in project apex-malhar by apache.

the class DimensionalConfigurationSchema method addNonCompositeAggregator.

protected Object addNonCompositeAggregator(String aggregatorName, Map<String, Set<String>> allValueToAggregator, Map<String, Set<String>> allValueToOTFAggregator, String valueName, Set<String> aggregatorSet, Map<String, Type> aggregatorToType, Type typeT, Set<String> aggregatorOTFSet, boolean checkDuplicate) {
    if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
        Set<String> aggregatorNames = allValueToAggregator.get(valueName);
        if (aggregatorNames == null) {
            aggregatorNames = Sets.newHashSet();
            allValueToAggregator.put(valueName, aggregatorNames);
        }
        aggregatorNames.add(aggregatorName);
        if (!aggregatorSet.add(aggregatorName) && checkDuplicate) {
            throw new IllegalArgumentException("An aggregator " + aggregatorName + " cannot be specified twice for a value");
        }
        IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
        aggregatorToType.put(aggregatorName, aggregator.getOutputType(typeT));
        return aggregator;
    }
    if (aggregatorRegistry.isOTFAggregator(aggregatorName)) {
        // Check for duplicate on the fly aggregators
        Set<String> aggregatorNames = allValueToOTFAggregator.get(valueName);
        if (aggregatorNames == null) {
            aggregatorNames = Sets.newHashSet();
            allValueToOTFAggregator.put(valueName, aggregatorNames);
        }
        if (!aggregatorNames.add(aggregatorName) && checkDuplicate) {
            throw new IllegalArgumentException("An aggregator " + aggregatorName + " cannot be specified twice for a value");
        }
        aggregatorOTFSet.add(aggregatorName);
        // Add child aggregators
        aggregatorNames = allValueToAggregator.get(valueName);
        if (aggregatorNames == null) {
            aggregatorNames = Sets.newHashSet();
            allValueToAggregator.put(valueName, aggregatorNames);
        }
        OTFAggregator aggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
        aggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
        aggregatorSet.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
        aggregatorToType.put(aggregatorName, aggregator.getOutputType());
        LOG.debug("field name {} and adding aggregator names {}:", valueName, aggregatorNames);
        return aggregator;
    }
    throw new IllegalArgumentException(aggregatorName + " is not a valid non-composit aggregator.");
}
Also used : OTFAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator) IncrementalAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator)

Aggregations

IncrementalAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator)2 OTFAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator)2 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1 Map (java.util.Map)1 Set (java.util.Set)1 CustomTimeBucketRegistry (org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry)1 DimensionsDescriptor (org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor)1 JSONArray (org.codehaus.jettison.json.JSONArray)1 JSONException (org.codehaus.jettison.json.JSONException)1 JSONObject (org.codehaus.jettison.json.JSONObject)1