Search in sources :

Example 1 with TopkWhitelistSpec

use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.

the class DerivedColumnTransformationPhaseConfig method fromThirdEyeConfig.

public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    // metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    // time
    String timeColumnName = config.getTime().getColumnName();
    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
    Map<String, Set<String>> whitelist = new HashMap<>();
    // topkwhitelist
    if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
        for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
            String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
            whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
        }
    }
    return new DerivedColumnTransformationPhaseConfig(dimensionNames, metricNames, metricTypes, timeColumnName, whitelist);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) Set(java.util.Set) HashSet(java.util.HashSet) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) HashMap(java.util.HashMap) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList)

Example 2 with TopkWhitelistSpec

use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.

the class ThirdEyeConfigTest method testTopKWhitelistConfig.

@Test
public void testTopKWhitelistConfig() throws IllegalArgumentException {
    boolean failed = false;
    TopkWhitelistSpec topKWhitelistSpec = thirdeyeConfig.getTopKWhitelist();
    // thresholds
    Map<String, Double> threshold = topKWhitelistSpec.getThreshold();
    Assert.assertEquals(threshold.size(), 2, "Incorrect metric thresholds size");
    Assert.assertEquals(threshold.get("m1") == 0.02 && threshold.get("m3") == 0.1, true, "Incorrect metric thresholds config");
    try {
        props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), "0.1");
        config = ThirdEyeConfig.fromProperties(props);
    } catch (IllegalStateException e) {
        failed = true;
    }
    Assert.assertTrue(failed, "Expected exception due to unequal number of metrics and threshold");
    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString());
    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString());
    config = ThirdEyeConfig.fromProperties(props);
    Assert.assertEquals(config.getTopKWhitelist().getThreshold(), null, "Default threshold config should be null");
    // whitelist
    Map<String, String> whitelist = topKWhitelistSpec.getWhitelist();
    Assert.assertEquals(whitelist.size(), 2, "Incorrect size of whitelist dimensions");
    Assert.assertEquals(whitelist.get("d1"), "x,y", "Incorrect whitelist config");
    Assert.assertEquals(whitelist.get("d2"), "a", "Incorrect whitelist config");
    props.remove(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString());
    config = ThirdEyeConfig.fromProperties(props);
    Assert.assertEquals(config.getTopKWhitelist().getWhitelist(), null, "Default whitelist config should be null");
    // topk
    List<TopKDimensionToMetricsSpec> topk = topKWhitelistSpec.getTopKDimensionToMetricsSpec();
    Assert.assertEquals(topk.size(), 2, "Incorrect topk dimensions config size");
    TopKDimensionToMetricsSpec topkSpec = topk.get(0);
    Assert.assertEquals(topkSpec.getDimensionName().equals("d2") && topkSpec.getTopk().size() == 2 && topkSpec.getTopk().get("m1") == 20 && topkSpec.getTopk().get("m2") == 30, true, "Incorrect topk config");
    topkSpec = topk.get(1);
    Assert.assertEquals(topkSpec.getDimensionName().equals("d3") && topkSpec.getTopk().size() == 1 && topkSpec.getTopk().get("m1") == 50, true, "Incorrect topk config");
    failed = false;
    try {
        props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d3", "m1");
        props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d3", "50,50");
        config = ThirdEyeConfig.fromProperties(props);
    } catch (IllegalStateException e) {
        failed = true;
    }
    Assert.assertTrue(failed, "Expecetd exception due to inequal number of metrics and kvalues for dimension");
    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString());
    config = ThirdEyeConfig.fromProperties(props);
    Assert.assertEquals(config.getTopKWhitelist(), null, "Default topk should be null");
}
Also used : TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec) Test(org.testng.annotations.Test)

Example 3 with TopkWhitelistSpec

use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.

the class DerivedColumnTransformationPhaseJob method newSchema.

public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema outputSchema = null;
    Set<String> topKTransformDimensionSet = new HashSet<>();
    TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
    // gather topk columns
    if (topkWhitelist != null) {
        List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
        if (topKDimensionToMetricsSpecs != null) {
            for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
                topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
            }
        }
    }
    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
    // add new column for topk columns
    for (String dimension : thirdeyeConfig.getDimensionNames()) {
        fieldAssembler = fieldAssembler.name(dimension).type().nullable().stringType().noDefault();
        if (topKTransformDimensionSet.contains(dimension)) {
            fieldAssembler = fieldAssembler.name(dimension + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
        }
    }
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        String metric = metricSpec.getName();
        MetricType metricType = metricSpec.getType();
        BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
        switch(metricType) {
            case SHORT:
            case INT:
                fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
                break;
            case FLOAT:
                fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
                break;
            case DOUBLE:
                fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
                break;
            case LONG:
            default:
                fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
        }
    }
    String timeColumnName = thirdeyeConfig.getTime().getColumnName();
    fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
    outputSchema = fieldAssembler.endRecord();
    LOGGER.info("New schema {}", outputSchema.toString(true));
    return outputSchema;
}
Also used : TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec) Schema(org.apache.avro.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) HashSet(java.util.HashSet)

Example 4 with TopkWhitelistSpec

use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.

the class TopKPhaseConfig method fromThirdEyeConfig.

/**
   * This method generates necessary top k config for TopKPhase job from
   * ThirdEye config
   * @param config
   * @return
   */
public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    //metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
    Map<String, Double> metricThresholds = new HashMap<>();
    Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
    Map<String, Set<String>> whitelist = new HashMap<>();
    // topk
    if (topKWhitelist != null) {
        // metric thresholds
        if (topKWhitelist.getThreshold() != null) {
            metricThresholds = topKWhitelist.getThreshold();
        }
        for (String metric : metricNames) {
            if (metricThresholds.get(metric) == null) {
                metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
            }
        }
        // topk
        if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
            for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
                topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
            }
        }
        // whitelist
        if (topKWhitelist.getWhitelist() != null) {
            for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
                String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
                whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
            }
        }
    }
    return new TopKPhaseConfig(dimensionNames, metricNames, metricTypes, metricThresholds, topKDimensionToMetricsSpec, whitelist);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) Set(java.util.Set) HashSet(java.util.HashSet) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) HashMap(java.util.HashMap) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)

Example 5 with TopkWhitelistSpec

use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.

the class ThirdEyeConfig method fromProperties.

/**
   * Creates a ThirdEyeConfig object from the Properties object
   * @param props
   * @return
   */
public static ThirdEyeConfig fromProperties(Properties props) {
    String collection = getCollectionFromProperties(props);
    List<DimensionSpec> dimensions = getDimensionFromProperties(props);
    List<MetricSpec> metrics = getMetricsFromProperties(props);
    TimeSpec inputTime = getInputTimeFromProperties(props);
    TimeSpec time = getTimeFromProperties(props);
    SplitSpec split = getSplitFromProperties(props);
    TopkWhitelistSpec topKWhitelist = getTopKWhitelistFromProperties(props);
    ThirdEyeConfig thirdeyeConfig = new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
    return thirdeyeConfig;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) SplitSpec(com.linkedin.thirdeye.hadoop.config.SplitSpec) TimeSpec(com.linkedin.thirdeye.hadoop.config.TimeSpec)

Aggregations

TopkWhitelistSpec (com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec)6 MetricSpec (com.linkedin.thirdeye.hadoop.config.MetricSpec)4 TopKDimensionToMetricsSpec (com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)4 DimensionSpec (com.linkedin.thirdeye.hadoop.config.DimensionSpec)3 MetricType (com.linkedin.thirdeye.hadoop.config.MetricType)3 HashSet (java.util.HashSet)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Set (java.util.Set)2 SplitSpec (com.linkedin.thirdeye.hadoop.config.SplitSpec)1 TimeSpec (com.linkedin.thirdeye.hadoop.config.TimeSpec)1 Schema (org.apache.avro.Schema)1 Test (org.testng.annotations.Test)1