Search in sources :

Example 1 with TopKDimensionToMetricsSpec

use of com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec in project pinot by linkedin.

the class ThirdEyeConfigTest method testTopKWhitelistConfig.

@Test
public void testTopKWhitelistConfig() throws IllegalArgumentException {
    boolean failed = false;
    TopkWhitelistSpec topKWhitelistSpec = thirdeyeConfig.getTopKWhitelist();
    // thresholds
    Map<String, Double> threshold = topKWhitelistSpec.getThreshold();
    Assert.assertEquals(threshold.size(), 2, "Incorrect metric thresholds size");
    Assert.assertEquals(threshold.get("m1") == 0.02 && threshold.get("m3") == 0.1, true, "Incorrect metric thresholds config");
    try {
        props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), "0.1");
        config = ThirdEyeConfig.fromProperties(props);
    } catch (IllegalStateException e) {
        failed = true;
    }
    Assert.assertTrue(failed, "Expected exception due to unequal number of metrics and threshold");
    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString());
    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString());
    config = ThirdEyeConfig.fromProperties(props);
    Assert.assertEquals(config.getTopKWhitelist().getThreshold(), null, "Default threshold config should be null");
    // whitelist
    Map<String, String> whitelist = topKWhitelistSpec.getWhitelist();
    Assert.assertEquals(whitelist.size(), 2, "Incorrect size of whitelist dimensions");
    Assert.assertEquals(whitelist.get("d1"), "x,y", "Incorrect whitelist config");
    Assert.assertEquals(whitelist.get("d2"), "a", "Incorrect whitelist config");
    props.remove(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString());
    config = ThirdEyeConfig.fromProperties(props);
    Assert.assertEquals(config.getTopKWhitelist().getWhitelist(), null, "Default whitelist config should be null");
    // topk
    List<TopKDimensionToMetricsSpec> topk = topKWhitelistSpec.getTopKDimensionToMetricsSpec();
    Assert.assertEquals(topk.size(), 2, "Incorrect topk dimensions config size");
    TopKDimensionToMetricsSpec topkSpec = topk.get(0);
    Assert.assertEquals(topkSpec.getDimensionName().equals("d2") && topkSpec.getTopk().size() == 2 && topkSpec.getTopk().get("m1") == 20 && topkSpec.getTopk().get("m2") == 30, true, "Incorrect topk config");
    topkSpec = topk.get(1);
    Assert.assertEquals(topkSpec.getDimensionName().equals("d3") && topkSpec.getTopk().size() == 1 && topkSpec.getTopk().get("m1") == 50, true, "Incorrect topk config");
    failed = false;
    try {
        props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d3", "m1");
        props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d3", "50,50");
        config = ThirdEyeConfig.fromProperties(props);
    } catch (IllegalStateException e) {
        failed = true;
    }
    Assert.assertTrue(failed, "Expecetd exception due to inequal number of metrics and kvalues for dimension");
    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString());
    config = ThirdEyeConfig.fromProperties(props);
    Assert.assertEquals(config.getTopKWhitelist(), null, "Default topk should be null");
}
Also used : TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec) Test(org.testng.annotations.Test)

Example 2 with TopKDimensionToMetricsSpec

use of com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec in project pinot by linkedin.

the class DerivedColumnTransformationPhaseJob method newSchema.

public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema outputSchema = null;
    Set<String> topKTransformDimensionSet = new HashSet<>();
    TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
    // gather topk columns
    if (topkWhitelist != null) {
        List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
        if (topKDimensionToMetricsSpecs != null) {
            for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
                topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
            }
        }
    }
    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
    // add new column for topk columns
    for (String dimension : thirdeyeConfig.getDimensionNames()) {
        fieldAssembler = fieldAssembler.name(dimension).type().nullable().stringType().noDefault();
        if (topKTransformDimensionSet.contains(dimension)) {
            fieldAssembler = fieldAssembler.name(dimension + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
        }
    }
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        String metric = metricSpec.getName();
        MetricType metricType = metricSpec.getType();
        BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
        switch(metricType) {
            case SHORT:
            case INT:
                fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
                break;
            case FLOAT:
                fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
                break;
            case DOUBLE:
                fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
                break;
            case LONG:
            default:
                fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
        }
    }
    String timeColumnName = thirdeyeConfig.getTime().getColumnName();
    fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
    outputSchema = fieldAssembler.endRecord();
    LOGGER.info("New schema {}", outputSchema.toString(true));
    return outputSchema;
}
Also used : TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec) Schema(org.apache.avro.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) HashSet(java.util.HashSet)

Example 3 with TopKDimensionToMetricsSpec

use of com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec in project pinot by linkedin.

the class TopKPhaseConfig method fromThirdEyeConfig.

/**
   * This method generates necessary top k config for TopKPhase job from
   * ThirdEye config
   * @param config
   * @return
   */
public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    //metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
    Map<String, Double> metricThresholds = new HashMap<>();
    Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
    Map<String, Set<String>> whitelist = new HashMap<>();
    // topk
    if (topKWhitelist != null) {
        // metric thresholds
        if (topKWhitelist.getThreshold() != null) {
            metricThresholds = topKWhitelist.getThreshold();
        }
        for (String metric : metricNames) {
            if (metricThresholds.get(metric) == null) {
                metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
            }
        }
        // topk
        if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
            for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
                topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
            }
        }
        // whitelist
        if (topKWhitelist.getWhitelist() != null) {
            for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
                String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
                whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
            }
        }
    }
    return new TopKPhaseConfig(dimensionNames, metricNames, metricTypes, metricThresholds, topKDimensionToMetricsSpec, whitelist);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) Set(java.util.Set) HashSet(java.util.HashSet) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) HashMap(java.util.HashMap) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)

Example 4 with TopKDimensionToMetricsSpec

use of com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec in project pinot by linkedin.

the class ThirdEyeConfig method getTopKDimensionToMetricsSpecFromProperties.

private static List<TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpecFromProperties(Properties props) {
    List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = null;
    String topKDimensionNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), null);
    if (StringUtils.isNotEmpty(topKDimensionNames) && topKDimensionNames.split(FIELD_SEPARATOR).length > 0) {
        topKDimensionToMetricsSpec = new ArrayList<>();
        for (String dimension : topKDimensionNames.split(FIELD_SEPARATOR)) {
            String[] topKDimensionMetrics = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + CONFIG_JOINER + dimension).split(FIELD_SEPARATOR);
            String[] topKDimensionKValues = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + CONFIG_JOINER + dimension).split(FIELD_SEPARATOR);
            if (topKDimensionMetrics.length != topKDimensionKValues.length) {
                throw new IllegalStateException("Number of topk metric names and kvalues should be same for a dimension");
            }
            Map<String, Integer> topk = new HashMap<>();
            for (int i = 0; i < topKDimensionMetrics.length; i++) {
                topk.put(topKDimensionMetrics[i], Integer.parseInt(topKDimensionKValues[i]));
            }
            topKDimensionToMetricsSpec.add(new TopKDimensionToMetricsSpec(dimension, topk));
        }
    }
    return topKDimensionToMetricsSpec;
}
Also used : TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec) HashMap(java.util.HashMap)

Example 5 with TopKDimensionToMetricsSpec

use of com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec in project pinot by linkedin.

the class ThirdEyeConfig method getTopKWhitelistFromProperties.

private static TopkWhitelistSpec getTopKWhitelistFromProperties(Properties props) {
    TopkWhitelistSpec topKWhitelist = null;
    Map<String, Double> threshold = getThresholdFromProperties(props);
    List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = getTopKDimensionToMetricsSpecFromProperties(props);
    Map<String, String> whitelist = getWhitelistFromProperties(props);
    if (threshold != null || topKDimensionToMetricsSpec != null || whitelist != null) {
        topKWhitelist = new TopkWhitelistSpec();
        topKWhitelist.setThreshold(threshold);
        topKWhitelist.setTopKDimensionToMetricsSpec(topKDimensionToMetricsSpec);
        topKWhitelist.setWhitelist(whitelist);
    }
    return topKWhitelist;
}
Also used : TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)

Aggregations

TopKDimensionToMetricsSpec (com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)5 TopkWhitelistSpec (com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec)4 MetricSpec (com.linkedin.thirdeye.hadoop.config.MetricSpec)2 MetricType (com.linkedin.thirdeye.hadoop.config.MetricType)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 DimensionSpec (com.linkedin.thirdeye.hadoop.config.DimensionSpec)1 ArrayList (java.util.ArrayList)1 Set (java.util.Set)1 Schema (org.apache.avro.Schema)1 Test (org.testng.annotations.Test)1