use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.
the class ThirdEyeConfig method getDimensionFromProperties.
private static List<DimensionSpec> getDimensionFromProperties(Properties props) {
List<DimensionSpec> dimensions = new ArrayList<>();
String[] dimensionNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()).split(FIELD_SEPARATOR);
for (String dimension : dimensionNames) {
dimensions.add(new DimensionSpec(dimension));
}
return dimensions;
}
use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.
the class DerivedColumnTransformationPhaseConfig method fromThirdEyeConfig.
public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
// metrics
List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
for (MetricSpec spec : config.getMetrics()) {
metricNames.add(spec.getName());
metricTypes.add(spec.getType());
}
// dimensions
List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
for (DimensionSpec dimensionSpec : config.getDimensions()) {
dimensionNames.add(dimensionSpec.getName());
}
// time
String timeColumnName = config.getTime().getColumnName();
TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
Map<String, Set<String>> whitelist = new HashMap<>();
// topkwhitelist
if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
}
}
return new DerivedColumnTransformationPhaseConfig(dimensionNames, metricNames, metricTypes, timeColumnName, whitelist);
}
use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.
the class AggregationPhaseConfig method fromThirdEyeConfig.
public static AggregationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
// metrics
List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
for (MetricSpec spec : config.getMetrics()) {
metricNames.add(spec.getName());
metricTypes.add(spec.getType());
}
// dimensions
List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
for (DimensionSpec dimensionSpec : config.getDimensions()) {
dimensionNames.add(dimensionSpec.getName());
}
// time
TimeSpec time = config.getTime();
// input time
TimeSpec inputTime = config.getInputTime();
if (inputTime == null) {
throw new IllegalStateException("Must provide input time configs for aggregation job");
}
return new AggregationPhaseConfig(dimensionNames, metricNames, metricTypes, time, inputTime);
}
use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.
the class TopKPhaseConfig method fromThirdEyeConfig.
/**
* This method generates necessary top k config for TopKPhase job from
* ThirdEye config
* @param config
* @return
*/
public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
//metrics
List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
for (MetricSpec spec : config.getMetrics()) {
metricNames.add(spec.getName());
metricTypes.add(spec.getType());
}
// dimensions
List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
for (DimensionSpec dimensionSpec : config.getDimensions()) {
dimensionNames.add(dimensionSpec.getName());
}
TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
Map<String, Double> metricThresholds = new HashMap<>();
Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
Map<String, Set<String>> whitelist = new HashMap<>();
// topk
if (topKWhitelist != null) {
// metric thresholds
if (topKWhitelist.getThreshold() != null) {
metricThresholds = topKWhitelist.getThreshold();
}
for (String metric : metricNames) {
if (metricThresholds.get(metric) == null) {
metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
}
}
// topk
if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
}
}
// whitelist
if (topKWhitelist.getWhitelist() != null) {
for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
}
}
}
return new TopKPhaseConfig(dimensionNames, metricNames, metricTypes, metricThresholds, topKDimensionToMetricsSpec, whitelist);
}
use of com.linkedin.thirdeye.hadoop.config.DimensionSpec in project pinot by linkedin.
the class ThirdeyePinotSchemaUtils method createSchema.
/**
* Transforms the thirdeyeConfig to pinot schema
* Adds default __COUNT metric if not already present
* Adds additional columns for all dimensions which
* are wither specified as topk or whitelist
* and hence have a transformed new column_raw
* @param thirdeyeConfig
* @return
*/
public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
Schema schema = new Schema();
Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
FieldSpec fieldSpec = new DimensionFieldSpec();
String dimensionName = dimensionSpec.getName();
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
if (transformDimensions.contains(dimensionName)) {
fieldSpec = new DimensionFieldSpec();
dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
}
}
boolean countIncluded = false;
for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = metricSpec.getName();
if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
countIncluded = true;
}
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
fieldSpec.setSingleValueField(true);
schema.addField(metricName, fieldSpec);
}
if (!countIncluded) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.LONG);
fieldSpec.setDefaultNullValue(1);
schema.addField(metricName, fieldSpec);
}
TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, thirdeyeConfig.getTime().getTimeGranularity().getSize(), thirdeyeConfig.getTime().getTimeGranularity().getUnit(), thirdeyeConfig.getTime().getTimeFormat(), thirdeyeConfig.getTime().getColumnName());
schema.addField(thirdeyeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));
schema.setSchemaName(thirdeyeConfig.getCollection());
return schema;
}
Aggregations