Search in sources :

Example 1 with MetricSpec

use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.

the class ThirdEyeConfig method getMetricsFromProperties.

private static List<MetricSpec> getMetricsFromProperties(Properties props) {
    List<MetricSpec> metrics = new ArrayList<>();
    String[] metricNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()).split(FIELD_SEPARATOR);
    String[] metricTypes = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()).split(FIELD_SEPARATOR);
    if (metricNames.length != metricTypes.length) {
        throw new IllegalStateException("Number of metric names provided " + "should be same as number of metric types");
    }
    for (int i = 0; i < metricNames.length; i++) {
        metrics.add(new MetricSpec(metricNames[i], MetricType.valueOf(metricTypes[i])));
    }
    return metrics;
}
Also used : MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList)

Example 2 with MetricSpec

use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.

the class DerivedColumnTransformationPhaseConfig method fromThirdEyeConfig.

public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    // metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    // time
    String timeColumnName = config.getTime().getColumnName();
    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
    Map<String, Set<String>> whitelist = new HashMap<>();
    // topkwhitelist
    if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
        for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
            String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
            whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
        }
    }
    return new DerivedColumnTransformationPhaseConfig(dimensionNames, metricNames, metricTypes, timeColumnName, whitelist);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) Set(java.util.Set) HashSet(java.util.HashSet) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) HashMap(java.util.HashMap) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList)

Example 3 with MetricSpec

use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.

the class AggregationPhaseConfig method fromThirdEyeConfig.

public static AggregationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    // metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    // time
    TimeSpec time = config.getTime();
    // input time
    TimeSpec inputTime = config.getInputTime();
    if (inputTime == null) {
        throw new IllegalStateException("Must provide input time configs for aggregation job");
    }
    return new AggregationPhaseConfig(dimensionNames, metricNames, metricTypes, time, inputTime);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList) TimeSpec(com.linkedin.thirdeye.hadoop.config.TimeSpec)

Example 4 with MetricSpec

use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.

the class DerivedColumnTransformationPhaseJob method newSchema.

public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema outputSchema = null;
    Set<String> topKTransformDimensionSet = new HashSet<>();
    TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
    // gather topk columns
    if (topkWhitelist != null) {
        List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
        if (topKDimensionToMetricsSpecs != null) {
            for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
                topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
            }
        }
    }
    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
    // add new column for topk columns
    for (String dimension : thirdeyeConfig.getDimensionNames()) {
        fieldAssembler = fieldAssembler.name(dimension).type().nullable().stringType().noDefault();
        if (topKTransformDimensionSet.contains(dimension)) {
            fieldAssembler = fieldAssembler.name(dimension + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
        }
    }
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        String metric = metricSpec.getName();
        MetricType metricType = metricSpec.getType();
        BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
        switch(metricType) {
            case SHORT:
            case INT:
                fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
                break;
            case FLOAT:
                fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
                break;
            case DOUBLE:
                fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
                break;
            case LONG:
            default:
                fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
        }
    }
    String timeColumnName = thirdeyeConfig.getTime().getColumnName();
    fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
    outputSchema = fieldAssembler.endRecord();
    LOGGER.info("New schema {}", outputSchema.toString(true));
    return outputSchema;
}
Also used : TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec) Schema(org.apache.avro.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) HashSet(java.util.HashSet)

Example 5 with MetricSpec

use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.

the class TopKPhaseConfig method fromThirdEyeConfig.

/**
   * This method generates necessary top k config for TopKPhase job from
   * ThirdEye config
   * @param config
   * @return
   */
public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    //metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
    Map<String, Double> metricThresholds = new HashMap<>();
    Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
    Map<String, Set<String>> whitelist = new HashMap<>();
    // topk
    if (topKWhitelist != null) {
        // metric thresholds
        if (topKWhitelist.getThreshold() != null) {
            metricThresholds = topKWhitelist.getThreshold();
        }
        for (String metric : metricNames) {
            if (metricThresholds.get(metric) == null) {
                metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
            }
        }
        // topk
        if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
            for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
                topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
            }
        }
        // whitelist
        if (topKWhitelist.getWhitelist() != null) {
            for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
                String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
                whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
            }
        }
    }
    return new TopKPhaseConfig(dimensionNames, metricNames, metricTypes, metricThresholds, topKDimensionToMetricsSpec, whitelist);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) Set(java.util.Set) HashSet(java.util.HashSet) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) HashMap(java.util.HashMap) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)

Aggregations

MetricSpec (com.linkedin.thirdeye.hadoop.config.MetricSpec)7 DimensionSpec (com.linkedin.thirdeye.hadoop.config.DimensionSpec)5 MetricType (com.linkedin.thirdeye.hadoop.config.MetricType)4 TopkWhitelistSpec (com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec)4 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)3 TimeSpec (com.linkedin.thirdeye.hadoop.config.TimeSpec)2 TopKDimensionToMetricsSpec (com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)2 HashMap (java.util.HashMap)2 Set (java.util.Set)2 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)1 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)1 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)1 Schema (com.linkedin.pinot.common.data.Schema)1 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)1 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)1 SplitSpec (com.linkedin.thirdeye.hadoop.config.SplitSpec)1 Schema (org.apache.avro.Schema)1