use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.
the class ThirdEyeConfig method getMetricsFromProperties.
private static List<MetricSpec> getMetricsFromProperties(Properties props) {
List<MetricSpec> metrics = new ArrayList<>();
String[] metricNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()).split(FIELD_SEPARATOR);
String[] metricTypes = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()).split(FIELD_SEPARATOR);
if (metricNames.length != metricTypes.length) {
throw new IllegalStateException("Number of metric names provided " + "should be same as number of metric types");
}
for (int i = 0; i < metricNames.length; i++) {
metrics.add(new MetricSpec(metricNames[i], MetricType.valueOf(metricTypes[i])));
}
return metrics;
}
use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.
the class DerivedColumnTransformationPhaseConfig method fromThirdEyeConfig.
public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
// metrics
List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
for (MetricSpec spec : config.getMetrics()) {
metricNames.add(spec.getName());
metricTypes.add(spec.getType());
}
// dimensions
List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
for (DimensionSpec dimensionSpec : config.getDimensions()) {
dimensionNames.add(dimensionSpec.getName());
}
// time
String timeColumnName = config.getTime().getColumnName();
TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
Map<String, Set<String>> whitelist = new HashMap<>();
// topkwhitelist
if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
}
}
return new DerivedColumnTransformationPhaseConfig(dimensionNames, metricNames, metricTypes, timeColumnName, whitelist);
}
use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.
the class AggregationPhaseConfig method fromThirdEyeConfig.
public static AggregationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
// metrics
List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
for (MetricSpec spec : config.getMetrics()) {
metricNames.add(spec.getName());
metricTypes.add(spec.getType());
}
// dimensions
List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
for (DimensionSpec dimensionSpec : config.getDimensions()) {
dimensionNames.add(dimensionSpec.getName());
}
// time
TimeSpec time = config.getTime();
// input time
TimeSpec inputTime = config.getInputTime();
if (inputTime == null) {
throw new IllegalStateException("Must provide input time configs for aggregation job");
}
return new AggregationPhaseConfig(dimensionNames, metricNames, metricTypes, time, inputTime);
}
use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.
the class DerivedColumnTransformationPhaseJob method newSchema.
public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
Schema outputSchema = null;
Set<String> topKTransformDimensionSet = new HashSet<>();
TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
// gather topk columns
if (topkWhitelist != null) {
List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
if (topKDimensionToMetricsSpecs != null) {
for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
}
}
}
RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
// add new column for topk columns
for (String dimension : thirdeyeConfig.getDimensionNames()) {
fieldAssembler = fieldAssembler.name(dimension).type().nullable().stringType().noDefault();
if (topKTransformDimensionSet.contains(dimension)) {
fieldAssembler = fieldAssembler.name(dimension + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
}
}
for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
String metric = metricSpec.getName();
MetricType metricType = metricSpec.getType();
BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
switch(metricType) {
case SHORT:
case INT:
fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
break;
case FLOAT:
fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
break;
case DOUBLE:
fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
break;
case LONG:
default:
fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
}
}
String timeColumnName = thirdeyeConfig.getTime().getColumnName();
fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
outputSchema = fieldAssembler.endRecord();
LOGGER.info("New schema {}", outputSchema.toString(true));
return outputSchema;
}
use of com.linkedin.thirdeye.hadoop.config.MetricSpec in project pinot by linkedin.
the class TopKPhaseConfig method fromThirdEyeConfig.
/**
* This method generates necessary top k config for TopKPhase job from
* ThirdEye config
* @param config
* @return
*/
public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
//metrics
List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
for (MetricSpec spec : config.getMetrics()) {
metricNames.add(spec.getName());
metricTypes.add(spec.getType());
}
// dimensions
List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
for (DimensionSpec dimensionSpec : config.getDimensions()) {
dimensionNames.add(dimensionSpec.getName());
}
TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
Map<String, Double> metricThresholds = new HashMap<>();
Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
Map<String, Set<String>> whitelist = new HashMap<>();
// topk
if (topKWhitelist != null) {
// metric thresholds
if (topKWhitelist.getThreshold() != null) {
metricThresholds = topKWhitelist.getThreshold();
}
for (String metric : metricNames) {
if (metricThresholds.get(metric) == null) {
metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
}
}
// topk
if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
}
}
// whitelist
if (topKWhitelist.getWhitelist() != null) {
for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
}
}
}
return new TopKPhaseConfig(dimensionNames, metricNames, metricTypes, metricThresholds, topKDimensionToMetricsSpec, whitelist);
}
Aggregations