Search in sources :

Example 1 with DimensionSpec

use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.

the class ThirdEyeConfig method getDimensionFromProperties.

private static List<DimensionSpec> getDimensionFromProperties(Properties props) {
    List<DimensionSpec> dimensions = new ArrayList<>();
    String[] dimensionNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()).split(FIELD_SEPARATOR);
    for (String dimension : dimensionNames) {
        dimensions.add(new DimensionSpec(dimension));
    }
    return dimensions;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) ArrayList(java.util.ArrayList)

Example 2 with DimensionSpec

use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.

the class DerivedColumnTransformationPhaseConfig method fromThirdEyeConfig.

public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    // metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    // time
    String timeColumnName = config.getTime().getColumnName();
    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
    Map<String, Set<String>> whitelist = new HashMap<>();
    // topkwhitelist
    if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
        for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
            String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
            whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
        }
    }
    return new DerivedColumnTransformationPhaseConfig(dimensionNames, metricNames, metricTypes, timeColumnName, whitelist);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) Set(java.util.Set) HashSet(java.util.HashSet) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) HashMap(java.util.HashMap) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList)

Example 3 with DimensionSpec

use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.

the class AggregationPhaseConfig method fromThirdEyeConfig.

public static AggregationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    // metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    // time
    TimeSpec time = config.getTime();
    // input time
    TimeSpec inputTime = config.getInputTime();
    if (inputTime == null) {
        throw new IllegalStateException("Must provide input time configs for aggregation job");
    }
    return new AggregationPhaseConfig(dimensionNames, metricNames, metricTypes, time, inputTime);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList) TimeSpec(com.linkedin.thirdeye.hadoop.config.TimeSpec)

Example 4 with DimensionSpec

use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.

the class TopKPhaseConfig method fromThirdEyeConfig.

/**
   * This method generates necessary top k config for TopKPhase job from
   * ThirdEye config
   * @param config
   * @return
   */
public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    //metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
    Map<String, Double> metricThresholds = new HashMap<>();
    Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
    Map<String, Set<String>> whitelist = new HashMap<>();
    // topk
    if (topKWhitelist != null) {
        // metric thresholds
        if (topKWhitelist.getThreshold() != null) {
            metricThresholds = topKWhitelist.getThreshold();
        }
        for (String metric : metricNames) {
            if (metricThresholds.get(metric) == null) {
                metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
            }
        }
        // topk
        if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
            for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
                topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
            }
        }
        // whitelist
        if (topKWhitelist.getWhitelist() != null) {
            for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
                String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
                whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
            }
        }
    }
    return new TopKPhaseConfig(dimensionNames, metricNames, metricTypes, metricThresholds, topKDimensionToMetricsSpec, whitelist);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) Set(java.util.Set) HashSet(java.util.HashSet) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) HashMap(java.util.HashMap) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)

Example 5 with DimensionSpec

use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.

the class ThirdeyePinotSchemaUtils method createSchema.

/**
   * Transforms the thirdeyeConfig to pinot schema
   * Adds default __COUNT metric if not already present
   * Adds additional columns for all dimensions which
   * are wither specified as topk or whitelist
   * and hence have a transformed new column_raw
   * @param thirdeyeConfig
   * @return
   */
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema schema = new Schema();
    Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
        FieldSpec fieldSpec = new DimensionFieldSpec();
        String dimensionName = dimensionSpec.getName();
        fieldSpec.setName(dimensionName);
        fieldSpec.setDataType(DataType.STRING);
        fieldSpec.setSingleValueField(true);
        schema.addField(dimensionName, fieldSpec);
        if (transformDimensions.contains(dimensionName)) {
            fieldSpec = new DimensionFieldSpec();
            dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
            fieldSpec.setName(dimensionName);
            fieldSpec.setDataType(DataType.STRING);
            fieldSpec.setSingleValueField(true);
            schema.addField(dimensionName, fieldSpec);
        }
    }
    boolean countIncluded = false;
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = metricSpec.getName();
        if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
            countIncluded = true;
        }
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        fieldSpec.setSingleValueField(true);
        schema.addField(metricName, fieldSpec);
    }
    if (!countIncluded) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.LONG);
        fieldSpec.setDefaultNullValue(1);
        schema.addField(metricName, fieldSpec);
    }
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
    schema.setSchemaName(thirdeyeConfig.getCollection());
    return schema;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

DimensionSpec (com.linkedin.thirdeye.hadoop.config.DimensionSpec)6 MetricSpec (com.linkedin.thirdeye.hadoop.config.MetricSpec)5 ArrayList (java.util.ArrayList)4 MetricType (com.linkedin.thirdeye.hadoop.config.MetricType)3 TopkWhitelistSpec (com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec)3 TimeSpec (com.linkedin.thirdeye.hadoop.config.TimeSpec)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Set (java.util.Set)2 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)1 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)1 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)1 Schema (com.linkedin.pinot.common.data.Schema)1 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)1 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)1 SplitSpec (com.linkedin.thirdeye.hadoop.config.SplitSpec)1 TopKDimensionToMetricsSpec (com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)1