Search in sources :

Example 6 with MetricSpec

use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.

the class ThirdeyePinotSchemaUtils method createSchema.

/**
   * Transforms the thirdeyeConfig to pinot schema
   * Adds default __COUNT metric if not already present
   * Adds additional columns for all dimensions which
   * are wither specified as topk or whitelist
   * and hence have a transformed new column_raw
   * @param thirdeyeConfig
   * @return
   */
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema schema = new Schema();
    Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
        FieldSpec fieldSpec = new DimensionFieldSpec();
        String dimensionName = dimensionSpec.getName();
        fieldSpec.setName(dimensionName);
        fieldSpec.setDataType(DataType.STRING);
        fieldSpec.setSingleValueField(true);
        schema.addField(dimensionName, fieldSpec);
        if (transformDimensions.contains(dimensionName)) {
            fieldSpec = new DimensionFieldSpec();
            dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
            fieldSpec.setName(dimensionName);
            fieldSpec.setDataType(DataType.STRING);
            fieldSpec.setSingleValueField(true);
            schema.addField(dimensionName, fieldSpec);
        }
    }
    boolean countIncluded = false;
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = metricSpec.getName();
        if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
            countIncluded = true;
        }
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        fieldSpec.setSingleValueField(true);
        schema.addField(metricName, fieldSpec);
    }
    if (!countIncluded) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.LONG);
        fieldSpec.setDefaultNullValue(1);
        schema.addField(metricName, fieldSpec);
    }
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
    schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
    schema.setSchemaName(thirdeyeConfig.getCollection());
    return schema;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 7 with MetricSpec

use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.

the class ThirdEyeConfig method fromProperties.

/**
   * Creates a ThirdEyeConfig object from the Properties object
   * @param props
   * @return
   */
public static ThirdEyeConfig fromProperties(Properties props) {
    String collection = getCollectionFromProperties(props);
    List<DimensionSpec> dimensions = getDimensionFromProperties(props);
    List<MetricSpec> metrics = getMetricsFromProperties(props);
    TimeSpec inputTime = getInputTimeFromProperties(props);
    TimeSpec time = getTimeFromProperties(props);
    SplitSpec split = getSplitFromProperties(props);
    TopkWhitelistSpec topKWhitelist = getTopKWhitelistFromProperties(props);
    ThirdEyeConfig thirdeyeConfig = new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
    return thirdeyeConfig;
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) SplitSpec(com.linkedin.thirdeye.hadoop.config.SplitSpec) TimeSpec(com.linkedin.thirdeye.hadoop.config.TimeSpec)

Aggregations

MetricSpec (com.linkedin.thirdeye.hadoop.config.MetricSpec)7 DimensionSpec (com.linkedin.thirdeye.hadoop.config.DimensionSpec)5 MetricType (com.linkedin.thirdeye.hadoop.config.MetricType)4 TopkWhitelistSpec (com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec)4 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)3 TimeSpec (com.linkedin.thirdeye.hadoop.config.TimeSpec)2 TopKDimensionToMetricsSpec (com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)2 HashMap (java.util.HashMap)2 Set (java.util.Set)2 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)1 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)1 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)1 Schema (com.linkedin.pinot.common.data.Schema)1 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)1 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)1 SplitSpec (com.linkedin.thirdeye.hadoop.config.SplitSpec)1 Schema (org.apache.avro.Schema)1