use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.
the class DerivedColumnTransformationPhaseConfig method fromThirdEyeConfig.
public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
// metrics
List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
for (MetricSpec spec : config.getMetrics()) {
metricNames.add(spec.getName());
metricTypes.add(spec.getType());
}
// dimensions
List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
for (DimensionSpec dimensionSpec : config.getDimensions()) {
dimensionNames.add(dimensionSpec.getName());
}
// time
String timeColumnName = config.getTime().getColumnName();
TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
Map<String, Set<String>> whitelist = new HashMap<>();
// topkwhitelist
if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
}
}
return new DerivedColumnTransformationPhaseConfig(dimensionNames, metricNames, metricTypes, timeColumnName, whitelist);
}
use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.
the class ThirdEyeConfigTest method testTopKWhitelistConfig.
@Test
public void testTopKWhitelistConfig() throws IllegalArgumentException {
boolean failed = false;
TopkWhitelistSpec topKWhitelistSpec = thirdeyeConfig.getTopKWhitelist();
// thresholds
Map<String, Double> threshold = topKWhitelistSpec.getThreshold();
Assert.assertEquals(threshold.size(), 2, "Incorrect metric thresholds size");
Assert.assertEquals(threshold.get("m1") == 0.02 && threshold.get("m3") == 0.1, true, "Incorrect metric thresholds config");
try {
props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), "0.1");
config = ThirdEyeConfig.fromProperties(props);
} catch (IllegalStateException e) {
failed = true;
}
Assert.assertTrue(failed, "Expected exception due to unequal number of metrics and threshold");
props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString());
props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString());
config = ThirdEyeConfig.fromProperties(props);
Assert.assertEquals(config.getTopKWhitelist().getThreshold(), null, "Default threshold config should be null");
// whitelist
Map<String, String> whitelist = topKWhitelistSpec.getWhitelist();
Assert.assertEquals(whitelist.size(), 2, "Incorrect size of whitelist dimensions");
Assert.assertEquals(whitelist.get("d1"), "x,y", "Incorrect whitelist config");
Assert.assertEquals(whitelist.get("d2"), "a", "Incorrect whitelist config");
props.remove(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString());
config = ThirdEyeConfig.fromProperties(props);
Assert.assertEquals(config.getTopKWhitelist().getWhitelist(), null, "Default whitelist config should be null");
// topk
List<TopKDimensionToMetricsSpec> topk = topKWhitelistSpec.getTopKDimensionToMetricsSpec();
Assert.assertEquals(topk.size(), 2, "Incorrect topk dimensions config size");
TopKDimensionToMetricsSpec topkSpec = topk.get(0);
Assert.assertEquals(topkSpec.getDimensionName().equals("d2") && topkSpec.getTopk().size() == 2 && topkSpec.getTopk().get("m1") == 20 && topkSpec.getTopk().get("m2") == 30, true, "Incorrect topk config");
topkSpec = topk.get(1);
Assert.assertEquals(topkSpec.getDimensionName().equals("d3") && topkSpec.getTopk().size() == 1 && topkSpec.getTopk().get("m1") == 50, true, "Incorrect topk config");
failed = false;
try {
props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d3", "m1");
props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d3", "50,50");
config = ThirdEyeConfig.fromProperties(props);
} catch (IllegalStateException e) {
failed = true;
}
Assert.assertTrue(failed, "Expecetd exception due to inequal number of metrics and kvalues for dimension");
props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString());
config = ThirdEyeConfig.fromProperties(props);
Assert.assertEquals(config.getTopKWhitelist(), null, "Default topk should be null");
}
use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.
the class DerivedColumnTransformationPhaseJob method newSchema.
public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
Schema outputSchema = null;
Set<String> topKTransformDimensionSet = new HashSet<>();
TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
// gather topk columns
if (topkWhitelist != null) {
List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
if (topKDimensionToMetricsSpecs != null) {
for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
}
}
}
RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
// add new column for topk columns
for (String dimension : thirdeyeConfig.getDimensionNames()) {
fieldAssembler = fieldAssembler.name(dimension).type().nullable().stringType().noDefault();
if (topKTransformDimensionSet.contains(dimension)) {
fieldAssembler = fieldAssembler.name(dimension + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
}
}
for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
String metric = metricSpec.getName();
MetricType metricType = metricSpec.getType();
BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
switch(metricType) {
case SHORT:
case INT:
fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
break;
case FLOAT:
fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
break;
case DOUBLE:
fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
break;
case LONG:
default:
fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
}
}
String timeColumnName = thirdeyeConfig.getTime().getColumnName();
fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
outputSchema = fieldAssembler.endRecord();
LOGGER.info("New schema {}", outputSchema.toString(true));
return outputSchema;
}
use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.
the class TopKPhaseConfig method fromThirdEyeConfig.
/**
* This method generates necessary top k config for TopKPhase job from
* ThirdEye config
* @param config
* @return
*/
public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
//metrics
List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
for (MetricSpec spec : config.getMetrics()) {
metricNames.add(spec.getName());
metricTypes.add(spec.getType());
}
// dimensions
List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
for (DimensionSpec dimensionSpec : config.getDimensions()) {
dimensionNames.add(dimensionSpec.getName());
}
TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
Map<String, Double> metricThresholds = new HashMap<>();
Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
Map<String, Set<String>> whitelist = new HashMap<>();
// topk
if (topKWhitelist != null) {
// metric thresholds
if (topKWhitelist.getThreshold() != null) {
metricThresholds = topKWhitelist.getThreshold();
}
for (String metric : metricNames) {
if (metricThresholds.get(metric) == null) {
metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
}
}
// topk
if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
}
}
// whitelist
if (topKWhitelist.getWhitelist() != null) {
for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
}
}
}
return new TopKPhaseConfig(dimensionNames, metricNames, metricTypes, metricThresholds, topKDimensionToMetricsSpec, whitelist);
}
use of com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec in project pinot by linkedin.
the class ThirdEyeConfig method fromProperties.
/**
* Creates a ThirdEyeConfig object from the Properties object
* @param props
* @return
*/
public static ThirdEyeConfig fromProperties(Properties props) {
String collection = getCollectionFromProperties(props);
List<DimensionSpec> dimensions = getDimensionFromProperties(props);
List<MetricSpec> metrics = getMetricsFromProperties(props);
TimeSpec inputTime = getInputTimeFromProperties(props);
TimeSpec time = getTimeFromProperties(props);
SplitSpec split = getSplitFromProperties(props);
TopkWhitelistSpec topKWhitelist = getTopKWhitelistFromProperties(props);
ThirdEyeConfig thirdeyeConfig = new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
return thirdeyeConfig;
}
Aggregations