Search in sources :

Example 1 with DimensionsDescriptor

use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.

the class DimensionalConfigurationSchema method fulfillCompositeAggregatorExtraInfo.

/**
 * fulfill the embed ddids of composite aggregators
 * get the dimensional descriptor of composite aggregator; genereate field keys of embed aggregator
 */
protected void fulfillCompositeAggregatorExtraInfo() {
    Map<Set<String>, Integer> keysToCombinationId = getKeysToCombinationId();
    final int timeBucketSize = customTimeBuckets.size();
    for (int index = 0; index < dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor.size(); ++index) {
        Map<String, FieldsDescriptor> compositeAggregatorNameToDescriptor = dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor.get(index);
        for (String compositeAggregatorName : compositeAggregatorNameToDescriptor.keySet()) {
            AbstractTopBottomAggregator compositeAggregator = aggregatorRegistry.getNameToTopBottomAggregator().get(compositeAggregatorName);
            // set DimensionDescriptorID
            compositeAggregator.setDimensionDescriptorID(index);
            // aggregator id
            compositeAggregator.setAggregatorID(aggregatorRegistry.getTopBottomAggregatorNameToID().get(compositeAggregatorName));
            // keys for embed aggregator
            Set<String> keys = Sets.newHashSet();
            DimensionsDescriptor dd = dimensionsDescriptorIDToDimensionsDescriptor.get(index);
            keys.addAll(dd.getFields().getFieldsList());
            {
                Set<String> compositeKeys = Sets.newHashSet();
                compositeKeys.addAll(keys);
                compositeAggregator.setFields(compositeKeys);
                compositeAggregator.setAggregateDescriptor(compositeAggregatorNameToDescriptor.get(compositeAggregatorName));
            }
            keys.addAll(compositeAggregator.getSubCombinations());
            Integer combinationId = keysToCombinationId.get(keys);
            if (combinationId == null) {
                throw new RuntimeException("Can't find combination id for keys: " + keys);
            }
            for (int ddid = combinationId * timeBucketSize; ddid < (combinationId + 1) * timeBucketSize; ++ddid) {
                compositeAggregator.addEmbedAggregatorDdId(ddid);
            }
        }
    }
}
Also used : DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) Set(java.util.Set) AbstractTopBottomAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.AbstractTopBottomAggregator)

Example 2 with DimensionsDescriptor

use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.

the class DimensionalConfigurationSchema method initialize.

/**
 * This is a helper method which initializes the metadata for the {@link DimensionalConfigurationSchema}.
 *
 * @param keys                   The key objects to use when creating this configuration schema.
 * @param values                 The value objects to use when creating this configuration schema.
 * @param timeBuckets            The time buckets to use when creating this configuration schema.
 * @param dimensionsCombinations The dimensionsCombinations to use when creating this configuration schema.
 */
private void initialize(List<Key> keys, List<Value> values, List<TimeBucket> timeBuckets, List<DimensionsCombination> dimensionsCombinations) {
    tags = Lists.newArrayList();
    keyToTags = Maps.newHashMap();
    for (Key key : keys) {
        keyToTags.put(key.getName(), new ArrayList<String>());
    }
    valueToTags = Maps.newHashMap();
    for (Value value : values) {
        valueToTags.put(value.getName(), new ArrayList<String>());
    }
    // time buckets
    this.timeBuckets = timeBuckets;
    this.customTimeBuckets = new ArrayList<>();
    customTimeBucketRegistry = new CustomTimeBucketRegistry(STARTING_TIMEBUCKET_ID);
    for (TimeBucket timeBucket : timeBuckets) {
        CustomTimeBucket customTimeBucket = new CustomTimeBucket(timeBucket);
        customTimeBuckets.add(customTimeBucket);
        customTimeBucketRegistry.register(customTimeBucket, timeBucket.ordinal());
    }
    // Input aggregate values
    Map<String, Type> valueFieldToType = Maps.newHashMap();
    for (Value value : values) {
        valueFieldToType.put(value.getName(), value.getType());
    }
    inputValuesDescriptor = new FieldsDescriptor(valueFieldToType);
    // Input keys
    Map<String, Type> keyFieldToType = Maps.newHashMap();
    keysToEnumValuesList = Maps.newHashMap();
    for (Key key : keys) {
        keyFieldToType.put(key.getName(), key.getType());
        keysToEnumValuesList.put(key.getName(), key.getEnumValues());
    }
    keyDescriptor = new FieldsDescriptor(keyFieldToType);
    Map<String, Type> fieldToTypeWithTime = Maps.newHashMap(keyFieldToType);
    keyDescriptorWithTime = keyDescriptorWithTime(fieldToTypeWithTime, customTimeBuckets);
    // schemaAllValueToAggregatorToType
    schemaAllValueToAggregatorToType = Maps.newHashMap();
    Map<String, Set<String>> specificValueToAggregator = Maps.newHashMap();
    Map<String, Set<String>> specificValueToOTFAggregator = Maps.newHashMap();
    Map<String, Set<String>> allSpecificValueToAggregator = Maps.newHashMap();
    for (Value value : values) {
        String valueName = value.getName();
        Set<String> aggregators = value.getAggregators();
        Set<String> specificAggregatorSet = Sets.newHashSet();
        Set<String> allAggregatorSet = Sets.newHashSet();
        Set<String> otfAggregators = Sets.newHashSet();
        for (String aggregatorName : aggregators) {
            if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                specificAggregatorSet.add(aggregatorName);
                allAggregatorSet.add(aggregatorName);
            } else {
                otfAggregators.add(aggregatorName);
                List<String> aggregatorNames = aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName);
                specificAggregatorSet.addAll(aggregatorNames);
                allAggregatorSet.addAll(aggregatorNames);
                allAggregatorSet.add(aggregatorName);
            }
        }
        specificValueToOTFAggregator.put(valueName, otfAggregators);
        specificValueToAggregator.put(valueName, specificAggregatorSet);
        allSpecificValueToAggregator.put(valueName, allAggregatorSet);
    }
    for (Map.Entry<String, Set<String>> entry : allSpecificValueToAggregator.entrySet()) {
        String valueName = entry.getKey();
        Type inputType = inputValuesDescriptor.getType(valueName);
        Set<String> aggregators = entry.getValue();
        Map<String, Type> aggregatorToType = Maps.newHashMap();
        for (String aggregatorName : aggregators) {
            if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                IncrementalAggregator aggregator = aggregatorRegistry.getNameToIncrementalAggregator().get(aggregatorName);
                aggregatorToType.put(aggregatorName, aggregator.getOutputType(inputType));
            } else {
                OTFAggregator otfAggregator = aggregatorRegistry.getNameToOTFAggregators().get(aggregatorName);
                aggregatorToType.put(aggregatorName, otfAggregator.getOutputType());
            }
        }
        schemaAllValueToAggregatorToType.put(valueName, aggregatorToType);
    }
    // ddID
    dimensionsDescriptorIDToDimensionsDescriptor = Lists.newArrayList();
    dimensionsDescriptorIDToKeyDescriptor = Lists.newArrayList();
    dimensionsDescriptorToID = Maps.newHashMap();
    dimensionsDescriptorIDToValueToAggregator = Lists.newArrayList();
    dimensionsDescriptorIDToValueToOTFAggregator = Lists.newArrayList();
    int ddID = 0;
    for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
        for (TimeBucket timeBucket : timeBuckets) {
            DimensionsDescriptor dd = new DimensionsDescriptor(timeBucket, dimensionsCombination.getFields());
            dimensionsDescriptorIDToDimensionsDescriptor.add(dd);
            dimensionsDescriptorIDToKeyDescriptor.add(dd.createFieldsDescriptor(keyDescriptor));
            dimensionsDescriptorToID.put(dd, ddID);
            Map<String, Set<String>> valueToAggregator = Maps.newHashMap();
            Map<String, Set<String>> valueToOTFAggregator = Maps.newHashMap();
            Map<String, Set<String>> tempValueToAggregator = dimensionsCombination.getValueToAggregators();
            for (Map.Entry<String, Set<String>> entry : tempValueToAggregator.entrySet()) {
                String value = entry.getKey();
                Set<String> staticAggregatorNames = Sets.newHashSet();
                Set<String> otfAggregatorNames = Sets.newHashSet();
                Set<String> aggregatorNames = entry.getValue();
                for (String aggregatorName : aggregatorNames) {
                    if (!aggregatorRegistry.isAggregator(aggregatorName)) {
                        throw new UnsupportedOperationException("The aggregator " + aggregatorName + " is not valid.");
                    }
                    if (aggregatorRegistry.isIncrementalAggregator(aggregatorName)) {
                        staticAggregatorNames.add(aggregatorName);
                    } else {
                        staticAggregatorNames.addAll(aggregatorRegistry.getOTFAggregatorToIncrementalAggregators().get(aggregatorName));
                        otfAggregatorNames.add(aggregatorName);
                    }
                }
                valueToAggregator.put(value, staticAggregatorNames);
                valueToOTFAggregator.put(value, otfAggregatorNames);
            }
            mergeMaps(valueToAggregator, specificValueToAggregator);
            mergeMaps(valueToOTFAggregator, specificValueToOTFAggregator);
            dimensionsDescriptorIDToValueToAggregator.add(valueToAggregator);
            dimensionsDescriptorIDToValueToOTFAggregator.add(valueToOTFAggregator);
            ddID++;
        }
    }
    for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToAggregator) {
        if (specificValueToAggregator.isEmpty()) {
            continue;
        }
        for (Map.Entry<String, Set<String>> entry : specificValueToAggregator.entrySet()) {
            String valueName = entry.getKey();
            Set<String> aggName = entry.getValue();
            if (aggName.isEmpty()) {
                continue;
            }
            Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
            if (ddAggregatorSet == null) {
                ddAggregatorSet = Sets.newHashSet();
                valueToAggregator.put(valueName, ddAggregatorSet);
            }
            ddAggregatorSet.addAll(aggName);
        }
    }
    for (Map<String, Set<String>> valueToAggregator : dimensionsDescriptorIDToValueToOTFAggregator) {
        if (specificValueToOTFAggregator.isEmpty()) {
            continue;
        }
        for (Map.Entry<String, Set<String>> entry : specificValueToOTFAggregator.entrySet()) {
            String valueName = entry.getKey();
            Set<String> aggName = entry.getValue();
            if (aggName.isEmpty()) {
                continue;
            }
            Set<String> ddAggregatorSet = valueToAggregator.get(valueName);
            if (ddAggregatorSet == null) {
                ddAggregatorSet = Sets.newHashSet();
                valueToAggregator.put(valueName, ddAggregatorSet);
            }
            ddAggregatorSet.addAll(aggName);
        }
    }
    // ddIDToAggregatorToAggregateDescriptor
    dimensionsDescriptorIDToAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToAggregator);
    dimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor = computeAggregatorToAggregateDescriptor(dimensionsDescriptorIDToValueToOTFAggregator);
    // combination ID values
    dimensionsDescriptorIDToFieldToAggregatorAdditionalValues = Lists.newArrayList();
    dimensionsDescriptorIDToKeys = Lists.newArrayList();
    for (DimensionsCombination dimensionsCombination : dimensionsCombinations) {
        dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(dimensionsCombination.getValueToAggregators());
        dimensionsDescriptorIDToKeys.add(dimensionsCombination.getFields());
    }
    // Build keyString
    JSONArray keyArray = new JSONArray();
    for (Key key : keys) {
        JSONObject jo = new JSONObject();
        try {
            jo.put(FIELD_KEYS_NAME, key.getName());
            jo.put(FIELD_KEYS_TYPE, key.getType().getName());
            JSONArray enumArray = new JSONArray();
            for (Object enumVal : key.getEnumValues()) {
                enumArray.put(enumVal);
            }
            jo.put(FIELD_KEYS_ENUMVALUES, enumArray);
        } catch (JSONException ex) {
            throw new RuntimeException(ex);
        }
        keyArray.put(jo);
    }
    keysString = keyArray.toString();
    // Build time buckets
    JSONArray timeBucketArray = new JSONArray();
    for (CustomTimeBucket timeBucket : customTimeBuckets) {
        timeBucketArray.put(timeBucket.getText());
    }
    bucketsString = timeBucketArray.toString();
    // buildDDIDAggID
    // composite aggregator are not supported in this method. add empty list to avoid unit test error.
    dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor = Lists.newArrayList();
    buildDimensionsDescriptorIDAggregatorIDMaps();
}
Also used : Set(java.util.Set) IncrementalAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator) CustomTimeBucketRegistry(org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry) OTFAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator) JSONArray(org.codehaus.jettison.json.JSONArray) JSONException(org.codehaus.jettison.json.JSONException) DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) JSONObject(org.codehaus.jettison.json.JSONObject) JSONObject(org.codehaus.jettison.json.JSONObject) Map(java.util.Map) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap)

Example 3 with DimensionsDescriptor

use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.

the class DimensionalConfigurationSchemaTest method testAllCombinationsGeneration.

@Test
public void testAllCombinationsGeneration() {
    DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSimpleAllCombinations.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY);
    Set<DimensionsDescriptor> dimensionsDescriptors = Sets.newHashSet();
    dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(new HashSet<String>())));
    dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher"))));
    dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("advertiser"))));
    dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("location"))));
    dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "advertiser"))));
    dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "location"))));
    dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("advertiser", "location"))));
    dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "advertiser", "location"))));
    Set<DimensionsDescriptor> actualDimensionsDescriptors = Sets.newHashSet();
    for (DimensionsDescriptor dimensionsDescriptor : schema.getDimensionsDescriptorToID().keySet()) {
        actualDimensionsDescriptors.add(dimensionsDescriptor);
    }
    List<DimensionsDescriptor> ddList = Lists.newArrayList(dimensionsDescriptors);
    List<DimensionsDescriptor> ddActualList = Lists.newArrayList(actualDimensionsDescriptors);
    Collections.sort(ddList);
    Collections.sort(ddActualList);
    Assert.assertEquals(dimensionsDescriptors, actualDimensionsDescriptors);
}
Also used : DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) Test(org.junit.Test)

Example 4 with DimensionsDescriptor

use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.

the class DimensionalConfigurationSchemaTest method testCustomTimeBuckets.

@Test
public void testCustomTimeBuckets() {
    DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaCustomTimeBuckets.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY);
    Assert.assertEquals(3, schema.getTimeBuckets().size());
    Assert.assertEquals(5, schema.getCustomTimeBuckets().size());
    List<CustomTimeBucket> customTimeBuckets = Lists.newArrayList(new CustomTimeBucket(TimeBucket.MINUTE), new CustomTimeBucket(TimeBucket.HOUR), new CustomTimeBucket(TimeBucket.DAY), new CustomTimeBucket(TimeBucket.MINUTE, 5), new CustomTimeBucket(TimeBucket.HOUR, 3));
    Assert.assertEquals(customTimeBuckets, schema.getCustomTimeBuckets());
    Assert.assertEquals(40, schema.getDimensionsDescriptorIDToKeyDescriptor().size());
    JSONArray timeBucketsArray = new JSONArray();
    timeBucketsArray.put("1m").put("1h").put("1d").put("5m").put("3h");
    Assert.assertEquals(timeBucketsArray.toString(), schema.getBucketsString());
    CustomTimeBucket customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.MINUTE.ordinal());
    Assert.assertTrue(customTimeBucket.isUnit());
    Assert.assertEquals(TimeBucket.MINUTE, customTimeBucket.getTimeBucket());
    customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.HOUR.ordinal());
    Assert.assertTrue(customTimeBucket.isUnit());
    Assert.assertEquals(TimeBucket.HOUR, customTimeBucket.getTimeBucket());
    customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.DAY.ordinal());
    Assert.assertTrue(customTimeBucket.isUnit());
    Assert.assertEquals(TimeBucket.DAY, customTimeBucket.getTimeBucket());
    int id5m = schema.getCustomTimeBucketRegistry().getTimeBucketId(new CustomTimeBucket(TimeBucket.MINUTE, 5));
    int id3h = schema.getCustomTimeBucketRegistry().getTimeBucketId(new CustomTimeBucket(TimeBucket.HOUR, 3));
    Assert.assertEquals(256, id5m);
    Assert.assertEquals(257, id3h);
    for (int ddID = 0; ddID < schema.getDimensionsDescriptorIDToDimensionsDescriptor().size(); ddID++) {
        DimensionsDescriptor dd = schema.getDimensionsDescriptorIDToDimensionsDescriptor().get(ddID);
        Assert.assertEquals(customTimeBuckets.get(ddID % 5), dd.getCustomTimeBucket());
    }
}
Also used : DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) JSONArray(org.codehaus.jettison.json.JSONArray) Test(org.junit.Test)

Example 5 with DimensionsDescriptor

use of org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor in project apex-malhar by apache.

the class DimensionalConfigurationSchema method addValueToAggregatorToCombination.

/**
 * @param values         the fields of value to be computed
 * @param allKeys        the allKeys represents an combination
 * @param aggregatorName the name of the aggregator(incremental or OTF) to be added to this combination
 */
protected void addValueToAggregatorToCombination(Set<String> values, Set<String> allKeys, String aggregatorName) {
    Map<String, Set<String>> valueToIncrementalAggregators = Maps.newHashMap();
    Map<String, Set<String>> valueToOTFAggregators = Maps.newHashMap();
    Set<String> incrementalAggregatorNames;
    boolean isOTFAggregator = false;
    if (!isIncrementalAggregator(aggregatorName)) {
        // For OTF aggregator, need to and its depended incremental aggregators
        incrementalAggregatorNames = getOTFDependedIncrementalAggregatorNames(aggregatorName);
        isOTFAggregator = true;
    } else {
        incrementalAggregatorNames = Sets.newHashSet();
        incrementalAggregatorNames.add(aggregatorName);
    }
    for (String value : values) {
        valueToIncrementalAggregators.put(value, incrementalAggregatorNames);
        if (isOTFAggregator) {
            valueToOTFAggregators.put(value, Sets.newHashSet(aggregatorName));
        }
    }
    for (CustomTimeBucket customTimeBucket : customTimeBucketsCombination) {
        dimensionsDescriptorIDToValueToAggregator.add(valueToIncrementalAggregators);
        dimensionsDescriptorIDToValueToOTFAggregator.add(valueToOTFAggregators);
        // add empty information just for alignment
        dimensionsDescriptorIDToValueToCompositeAggregator.add(Collections.<String, Set<String>>emptyMap());
        dimensionsDescriptorIDToFieldToAggregatorAdditionalValues.add(Collections.<String, Set<String>>emptyMap());
        // key descriptor
        DimensionsDescriptor dimensionsDescriptor = new DimensionsDescriptor(customTimeBucket, new Fields(allKeys));
        dimensionsDescriptorIDToDimensionsDescriptor.add(dimensionsDescriptor);
        dimensionsDescriptorIDToKeyDescriptor.add(dimensionsDescriptor.createFieldsDescriptor(keyDescriptor));
    }
}
Also used : DimensionsDescriptor(org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor) Set(java.util.Set)

Aggregations

DimensionsDescriptor (org.apache.apex.malhar.lib.dimensions.DimensionsDescriptor)6 Set (java.util.Set)4 JSONArray (org.codehaus.jettison.json.JSONArray)3 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)2 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)2 Map (java.util.Map)2 CustomTimeBucketRegistry (org.apache.apex.malhar.lib.dimensions.CustomTimeBucketRegistry)2 JSONException (org.codehaus.jettison.json.JSONException)2 JSONObject (org.codehaus.jettison.json.JSONObject)2 Test (org.junit.Test)2 AbstractCompositeAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.AbstractCompositeAggregator)1 AbstractTopBottomAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.AbstractTopBottomAggregator)1 CompositeAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.CompositeAggregator)1 IncrementalAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.IncrementalAggregator)1 OTFAggregator (org.apache.apex.malhar.lib.dimensions.aggregator.OTFAggregator)1