Search in sources :

Example 1 with IncrementalAggregator

use of org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator in project apex-malhar by apache.

the class DimensionalConfigurationSchema method initialize.

/**
 * This is a helper method which initializes the metadata for the {@link DimensionalConfigurationSchema}.
 *
 * @param keys                   The key objects to use when creating this configuration schema.
 * @param values                 The value objects to use when creating this configuration schema.
 * @param timeBuckets            The time buckets to use when creating this configuration schema.
 * @param dimensionsCombinations The dimensionsCombinations to use when creating this configuration schema.
 */
private void initialize(List<Key> keys, List<Value> values, List<TimeBucket> timeBuckets, List<DimensionsCombination> dimensionsCombinations) {
    tags = Lists.newArrayList();
    keyToTags = Maps.newHashMap();
    for (Key key : keys) {
        keyToTags.put(key.getName(), new ArrayList<String>());
    }
    valueToTags = Maps.newHashMap();
    for (Value value : values) {
        valueToTags.put(value.getName(), new ArrayList<String>());
    }
    // time buckets
    this.timeBuckets = timeBuckets;
    this.customTimeBuckets = new ArrayList<>();
    customTimeBucketRegistry = new CustomTimeBucketRegistry(STARTING_TIMEBUCKET_ID);
    for (TimeBucket timeBucket : timeBuckets) {
        CustomTimeBucket customTimeBucket = new CustomTimeBucket(timeBucket);
        customTimeBuckets.add(customTimeBucket);
        customTimeBucketRegistry.register(customTimeBucket, timeBucket.ordinal());
    }
    // Input aggregate values
    Map<String, Type> valueFieldToType = Maps.newHashMap();
    for (Value value : values) {
        valueFieldToType.put(value.getName(), value.getType());
    }
    inputValuesDescriptor = new FieldsDescriptor(valueFieldToType);
    // Input keys
    Map<String, Type> keyFieldToType = Maps.newHashMap();
    keysToEnumValuesList = Maps.newHashMap();
    for (Key key : keys) {
        keyFieldToType.put(key.getName(), key.getType());
        keysToEnumValuesList.put(key.getName(), key.getEnumValues());
    }
    keyDescriptor = new FieldsDescriptor(keyFieldToType);
    Map<String, Type> fieldToTypeWithTime = Maps.newHashMap(keyFieldToType);
    keyDescriptorWithTime = keyDescriptorWithTime(fieldToTypeWithTime, customTimeBuckets);
    // schemaAllValueToAggregatorToType
    schemaAllValueToAggregatorToType = Maps.newHashMap();
    Map<String, Set<String>> specificValueToAggregator = Maps.newHashMap();
    Map<String, Set<String>> specificValueToOTFAggregator = Maps.newHashMap();
    Map<String, Set<String>> allSpecificValueToAggregator = Maps.newHashMap();
    for (Value value : values) {
        String valueName = value.getName();
        Set<String> aggregators = value.getAggregators();
        Set<String> specificAggregatorSet = Sets.newHashSet();
        Set<String> allAggregatorSet = Sets.newHashSet();
        Set<String> otfAggregators = Sets.newHashSet();
        for (String aggregatorName : aggregators) {
            if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                specificAggregatorSet.add(aggregatorName);
                allAggregatorSet.add(aggregatorName);
            } else {
                otfAggregators.add(aggregatorName);
                List<String> aggregatorNames = aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName);
                specificAggregatorSet.addAll(aggregatorNames);
                allAggregatorSet.addAll(aggregatorNames);
                allAggregatorSet.add(aggregatorName);
            }
        }
        specificValueToOTFAggregator.put(valueName, otfAggregators);
        specificValueToAggregator.put(valueName, specificAggregatorSet);
        allSpecificValueToAggregator.put(valueName, allAggregatorSet);
    }
    for (Map.Entry<String, Set<String>> entry : allSpecificValueToAggregator.entrySet()) {
        String valueName = entry.getKey();
        Type inputType = inputValuesDescriptor.getType(valueName);
        Set<String> aggregators = entry.getValue();
        Map<String, Type> aggregatorToType = Maps.newHashMap();
        for (String aggregatorName : aggregators) {
            if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
                aggregatorToType.put(aggregatorName, aggregator.getOutputType(inputType));
            } else {
                OTFAggregator otfAggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
                aggregatorToType.put(aggregatorName, otfAggregator.getOutputType());
            }
        }
        schemaAllValueToAggregatorToType.put(valueName, aggregatorToType);
    }
    // ddID
    dimensionsDescriptorIDToDimensionsDescriptor = Lists.newArrayList();
    dimensionsDescriptorIDToKeyDescriptor = Lists.newArrayList();
    dimensionsDescriptorToID = Maps.newHashMap();
    dimensionsDescriptorIDToValueToAggregator = Lists.newArrayList();
    dimensionsDescriptorIDToValueToOTFAggregator = Lists.newArrayList();
    int ddID = 0;
    for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
        for (TimeBucket timeBucket : timeBuckets) {
            DimensionsDescriptor dd = new DimensionsDescriptor(timeBucket, dimensionsCombination.getFields());
            dimensionsDescriptorIDToDimensionsDescriptor.add(dd);
            dimensionsDescriptorIDToKeyDescriptor.add(dd.createFieldsDescriptor(keyDescriptor));
            dimensionsDescriptorToID.put(dd, ddID);
            Map<String, Set<String>> valueToAggregator = Maps.newHashMap();
            Map<String, Set<String>> valueToOTFAggregator = Maps.newHashMap();
            Map<String, Set<String>> tempValueToAggregator = dimensionsCombination.getValueToAggregators();
            for (Map.Entry<String, Set<String>> entry : tempValueToAggregator.entrySet()) {
                String value = entry.getKey();
                Set<String> staticAggregatorNames = Sets.newHashSet();
                Set<String> otfAggregatorNames = Sets.newHashSet();
                Set<String> aggregatorNames = entry.getValue();
                for (String aggregatorName : aggregatorNames) {
                    if (!aggregatorRegistry.isAggregator(aggregatorName)) {
                        throw new UnsupportedOperationException("The aggregator " + aggregatorName + " is not valid.");
                    }
                    if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                        staticAggregatorNames.add(aggregatorName);
                    } else {
                        staticAggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
                        otfAggregatorNames.add(aggregatorName);
                    }
                }
                valueToAggregator.put(value, staticAggregatorNames);
                valueToOTFAggregator.put(value, otfAggregatorNames);
            }
            mergeMaps(valueToAggregator, specificValueToAggregator);
            mergeMaps(valueToOTFAggregator, specificValueToOTFAggregator);
            dimensionsDescriptorIDToValueToAggregator.add(valueToAggregator);
            dimensionsDescriptorIDToValueToOTFAggregator.add(valueToOTFAggregator);
            ddID++;
        }
    }
    for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToAggregator) {
        if (specificValueToAggregator.isEmpty()) {
            continue;
        }
        for (Map.Entry<String, Set<String>> entry : specificValueToAggregator.entrySet()) {
            String valueName = entry.getKey();
            Set<String> aggName = entry.getValue();
            if (aggName.isEmpty()) {
                continue;
            }
            Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
            if (ddAggregatorSet == null) {
                ddAggregatorSet = Sets.newHashSet();
                valueToAggregator.put(valueName, ddAggregatorSet);
            }
            ddAggregatorSet.addAll(aggName);
        }
    }
    for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToOTFAggregator) {
        if (specificValueToOTFAggregator.isEmpty()) {
            continue;
        }
        for (Map.Entry<String, Set<String>> entry : specificValueToOTFAggregator.entrySet()) {
            String valueName = entry.getKey();
            Set<String> aggName = entry.getValue();
            if (aggName.isEmpty()) {
                continue;
            }
            Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
            if (ddAggregatorSet == null) {
                ddAggregatorSet = Sets.newHashSet();
                valueToAggregator.put(valueName, ddAggregatorSet);
            }
            ddAggregatorSet.addAll(aggName);
        }
    }
    // ddIDToAggregatorToAggregateDescriptor
    dimensionsDescriptorIDToAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToAggregator);
    dimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToOTFAggregator);
    // combination ID values
    dimensionsDescriptorIDToFieldToAggregatorAdditionalValues = Lists.newArrayList();
    dimensionsDescriptorIDToKeys = Lists.newArrayList();
    for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
        dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(dimensionsCombination.getValueToAggregators());
        dimensionsDescriptorIDToKeys.add(dimensionsCombination.getFields());
    }
    // Build keyString
    JSONArray keyArray = new JSONArray();
    for (Key key : keys) {
        JSONObject jo = new JSONObject();
        try {
            jo.put(FIELD_KEYS_NAME, key.getName());
            jo.put(FIELD_KEYS_TYPE, key.getType().getName());
            JSONArray enumArray = new JSONArray();
            for (Object enumVal : key.getEnumValues()) {
                enumArray.put(enumVal);
            }
            jo.put(FIELD_KEYS_ENUMVALUES, enumArray);
        } catch (JSONException ex) {
            throw new RuntimeException(ex);
        }
        keyArray.put(jo);
    }
    keysString = keyArray.toString();
    // Build time buckets
    JSONArray timeBucketArray = new JSONArray();
    for (CustomTimeBucket timeBucket : customTimeBuckets) {
        timeBucketArray.put(timeBucket.getText());
    }
    bucketsString = timeBucketArray.toString();
    // buildDDIDAggID
    // composite aggregator are not supported in this method. add empty list to avoid unit test error.
    dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor = Lists.newArrayList();
    buildDimensionsDescriptorIDAggregatorIDMaps();
}
Also used : Set(java.util.Set) IncrementalAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator) CustomTimeBucketRegistry(org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry) OTFAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator) JSONArray(org.codehaus.jettison.json.JSONArray) JSONException(org.codehaus.jettison.json.JSONException) DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) JSONObject(org.codehaus.jettison.json.JSONObject) JSONObject(org.codehaus.jettison.json.JSONObject) Map(java.util.Map) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap)

Example 2 with IncrementalAggregator

use of org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator in project apex-malhar by apache.

the class DimensionalSchema method initialize.

/**
 * Initializes the schema JSON and schema metadata.
 *
 * @throws JSONException This exception is thrown when there is an
 *                       exception building the schema for the AppData dimensions schema.
 */
private void initialize() throws JSONException {
    schema = new JSONObject();
    if (schemaKeys != null) {
        schema.put(Schema.FIELD_SCHEMA_KEYS, SchemaUtils.createJSONObject(schemaKeys));
    }
    schema.put(SnapshotSchema.FIELD_SCHEMA_TYPE, DimensionalSchema.SCHEMA_TYPE);
    schema.put(SnapshotSchema.FIELD_SCHEMA_VERSION, DimensionalSchema.SCHEMA_VERSION);
    // responseDelayMillis
    if (responseDelayMillis > 0) {
        schema.put(FIELD_RESPONSE_DELAY_MILLS, responseDelayMillis);
    }
    if (!configurationSchema.getTags().isEmpty()) {
        schema.put(FIELD_TAGS, new JSONArray(configurationSchema.getTags()));
    }
    // time
    time = new JSONObject();
    schema.put(FIELD_TIME, time);
    JSONArray bucketsArray = new JSONArray(configurationSchema.getBucketsString());
    time.put(FIELD_TIME_BUCKETS, bucketsArray);
    time.put(FIELD_SLIDING_AGGREGATE_SUPPORTED, true);
    // keys
    keys = new JSONArray(configurationSchema.getKeysString());
    for (int keyIndex = 0; keyIndex < keys.length(); keyIndex++) {
        JSONObject keyJo = keys.getJSONObject(keyIndex);
        String keyName = keyJo.getString(DimensionalConfigurationSchema.FIELD_KEYS_NAME);
        List<String> tags = configurationSchema.getKeyToTags().get(keyName);
        if (!tags.isEmpty()) {
            keyJo.put(FIELD_TAGS, new JSONArray(tags));
        }
    }
    schema.put(DimensionalConfigurationSchema.FIELD_KEYS, keys);
    // values
    JSONArray values = new JSONArray();
    schema.put(SnapshotSchema.FIELD_VALUES, values);
    FieldsDescriptor inputValuesDescriptor = configurationSchema.getInputValuesDescriptor();
    Map<String, Map<String, Type>> allValueToAggregator = configurationSchema.getSchemaAllValueToAggregatorToType();
    for (Map.Entry<String, Map<String, Type>> entry : allValueToAggregator.entrySet()) {
        String valueName = entry.getKey();
        for (Map.Entry<String, Type> entryAggType : entry.getValue().entrySet()) {
            String aggregatorName = entryAggType.getKey();
            Type outputValueType = entryAggType.getValue();
            JSONObject value = new JSONObject();
            String combinedName = valueName + DimensionalConfigurationSchema.ADDITIONAL_VALUE_SEPERATOR + aggregatorName;
            value.put(SnapshotSchema.FIELD_VALUES_NAME, combinedName);
            value.put(SnapshotSchema.FIELD_VALUES_TYPE, outputValueType.getName());
            List<String> tags = configurationSchema.getValueToTags().get(valueName);
            if (!tags.isEmpty()) {
                value.put(FIELD_TAGS, new JSONArray(tags));
            }
            values.put(value);
        }
    }
    JSONArray dimensions = new JSONArray();
    for (int combinationID = 0; combinationID < configurationSchema.getDimensionsDescriptorIDToKeys().size(); combinationID++) {
        // TODO: the auto-generated combination for computation of composite aggregator will be added.
        // should remove it.
        Fields fields = configurationSchema.getDimensionsDescriptorIDToKeys().get(combinationID);
        Map<String, Set<String>> fieldToAggregatorAdditionalValues = configurationSchema.getDimensionsDescriptorIDToFieldToAggregatorAdditionalValues().get(combinationID);
        JSONObject combination = new JSONObject();
        JSONArray combinationArray = new JSONArray();
        for (String field : fields.getFields()) {
            combinationArray.put(field);
        }
        combination.put(DimensionalConfigurationSchema.FIELD_DIMENSIONS_COMBINATIONS, combinationArray);
        if (!fieldToAggregatorAdditionalValues.isEmpty()) {
            JSONArray additionalValueArray = new JSONArray();
            for (Map.Entry<String, Set<String>> entry : fieldToAggregatorAdditionalValues.entrySet()) {
                String valueName = entry.getKey();
                for (String aggregatorName : entry.getValue()) {
                    JSONObject additionalValueObject = new JSONObject();
                    String combinedName = valueName + DimensionalConfigurationSchema.ADDITIONAL_VALUE_SEPERATOR + aggregatorName;
                    Type inputValueType = inputValuesDescriptor.getType(valueName);
                    if (!configurationSchema.getAggregatorRegistry().isAggregator(aggregatorName)) {
                        if (aggregatorName == null) {
                            LOG.error("{} is not a valid aggregator.", aggregatorName);
                        }
                    }
                    Type outputValueType;
                    if (configurationSchema.getAggregatorRegistry().isIncrementalAggregator(aggregatorName)) {
                        IncrementalAggregator aggregator = configurationSchema.getAggregatorRegistry().getNameToIncrementalAggregator().get(aggregatorName);
                        outputValueType = aggregator.getOutputType(inputValueType);
                    } else {
                        outputValueType = configurationSchema.getAggregatorRegistry().getNameToOTFAggregators().get(aggregatorName).getOutputType();
                    }
                    additionalValueObject.put(DimensionalConfigurationSchema.FIELD_VALUES_NAME, combinedName);
                    additionalValueObject.put(DimensionalConfigurationSchema.FIELD_VALUES_TYPE, outputValueType.getName());
                    additionalValueArray.put(additionalValueObject);
                }
            }
            combination.put(DimensionalConfigurationSchema.FIELD_DIMENSIONS_ADDITIONAL_VALUES, additionalValueArray);
        }
        dimensions.put(combination);
    }
    schema.put(DimensionalConfigurationSchema.FIELD_DIMENSIONS, dimensions);
    this.schemaJSON = this.schema.toString();
}
Also used : Set(java.util.Set) JSONArray(org.codehaus.jettison.json.JSONArray) IncrementalAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator) JSONObject(org.codehaus.jettison.json.JSONObject) Map(java.util.Map)

Example 3 with IncrementalAggregator

use of org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator in project apex-malhar by apache.

the class DimensionalConfigurationSchema method addNonCompositeAggregator.

protected Object addNonCompositeAggregator(String aggregatorName, Map<String, Set<String>> allValueToAggregator, Map<String, Set<String>> allValueToOTFAggregator, String valueName, Set<String> aggregatorSet, Map<String, Type> aggregatorToType, Type typeT, Set<String> aggregatorOTFSet, boolean checkDuplicate) {
    if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
        Set<String> aggregatorNames = allValueToAggregator.get(valueName);
        if (aggregatorNames == null) {
            aggregatorNames = Sets.newHashSet();
            allValueToAggregator.put(valueName, aggregatorNames);
        }
        aggregatorNames.add(aggregatorName);
        if (!aggregatorSet.add(aggregatorName) && checkDuplicate) {
            throw new IllegalArgumentException("An aggregator " + aggregatorName + " cannot be specified twice for a value");
        }
        IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
        aggregatorToType.put(aggregatorName, aggregator.getOutputType(typeT));
        return aggregator;
    }
    if (aggregatorRegistry.isOTFAggregator(aggregatorName)) {
        // Check for duplicate on the fly aggregators
        Set<String> aggregatorNames = allValueToOTFAggregator.get(valueName);
        if (aggregatorNames == null) {
            aggregatorNames = Sets.newHashSet();
            allValueToOTFAggregator.put(valueName, aggregatorNames);
        }
        if (!aggregatorNames.add(aggregatorName) && checkDuplicate) {
            throw new IllegalArgumentException("An aggregator " + aggregatorName + " cannot be specified twice for a value");
        }
        aggregatorOTFSet.add(aggregatorName);
        // Add child aggregators
        aggregatorNames = allValueToAggregator.get(valueName);
        if (aggregatorNames == null) {
            aggregatorNames = Sets.newHashSet();
            allValueToAggregator.put(valueName, aggregatorNames);
        }
        OTFAggregator aggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
        aggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
        aggregatorSet.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
        aggregatorToType.put(aggregatorName, aggregator.getOutputType());
        LOG.debug("field name {} and adding aggregator names {}:", valueName, aggregatorNames);
        return aggregator;
    }
    throw new IllegalArgumentException(aggregatorName + " is not a valid non-composit aggregator.");
}
Also used : OTFAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator) IncrementalAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator)

Example 4 with IncrementalAggregator

use of org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator in project apex-malhar by apache.

the class DimensionalConfigurationSchema method buildNonCompositeAggregatorIDMap.

protected void buildNonCompositeAggregatorIDMap(String aggregatorName, FieldsDescriptor inputDescriptor, IntArrayList aggIDList, Int2ObjectMap<FieldsDescriptor> inputMap, Int2ObjectMap<FieldsDescriptor> outputMap) {
    IncrementalAggregator incrementalAggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
    // don't need to build OTF aggregate
    if (incrementalAggregator == null) {
        return;
    }
    int aggregatorID = aggregatorRegistry.getIncrementalAggregatorNameToID().get(aggregatorName);
    mergeAggregatorID(aggIDList, aggregatorID);
    inputMap.put(aggregatorID, inputDescriptor);
    outputMap.put(aggregatorID, AggregatorUtils.getOutputFieldsDescriptor(inputDescriptor, incrementalAggregator));
}
Also used : IncrementalAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator)

Aggregations

IncrementalAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator)4 Map (java.util.Map)2 Set (java.util.Set)2 OTFAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator)2 JSONArray (org.codehaus.jettison.json.JSONArray)2 JSONObject (org.codehaus.jettison.json.JSONObject)2 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1 CustomTimeBucketRegistry (org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry)1 DimensionsDescriptor (org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor)1 JSONException (org.codehaus.jettison.json.JSONException)1