Search in sources :

Example 6 with MetricType

use of com.linkedin.thirdeye.hadoop.config.MetricType in project pinot by linkedin.

the class AggregationPhaseMapOutputValue method toBytes.

public byte[] toBytes() throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    // metric values
    dos.writeInt(metricValues.length);
    for (int i = 0; i < metricValues.length; i++) {
        Number number = metricValues[i];
        MetricType metricType = metricTypes.get(i);
        switch(metricType) {
            case SHORT:
                dos.writeShort(number.intValue());
                break;
            case LONG:
                dos.writeLong(number.longValue());
                break;
            case INT:
                dos.writeInt(number.intValue());
                break;
            case FLOAT:
                dos.writeFloat(number.floatValue());
                break;
            case DOUBLE:
                dos.writeDouble(number.doubleValue());
                break;
        }
    }
    baos.close();
    dos.close();
    return baos.toByteArray();
}
Also used : DataOutputStream(java.io.DataOutputStream) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 7 with MetricType

use of com.linkedin.thirdeye.hadoop.config.MetricType in project pinot by linkedin.

the class DerivedColumnTransformationPhaseJob method newSchema.

public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
    Schema outputSchema = null;
    Set<String> topKTransformDimensionSet = new HashSet<>();
    TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
    // gather topk columns
    if (topkWhitelist != null) {
        List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
        if (topKDimensionToMetricsSpecs != null) {
            for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
                topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
            }
        }
    }
    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
    // add new column for topk columns
    for (String dimension : thirdeyeConfig.getDimensionNames()) {
        fieldAssembler = fieldAssembler.name(dimension).type().nullable().stringType().noDefault();
        if (topKTransformDimensionSet.contains(dimension)) {
            fieldAssembler = fieldAssembler.name(dimension + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
        }
    }
    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
        String metric = metricSpec.getName();
        MetricType metricType = metricSpec.getType();
        BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
        switch(metricType) {
            case SHORT:
            case INT:
                fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
                break;
            case FLOAT:
                fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
                break;
            case DOUBLE:
                fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
                break;
            case LONG:
            default:
                fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
        }
    }
    String timeColumnName = thirdeyeConfig.getTime().getColumnName();
    fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
    outputSchema = fieldAssembler.endRecord();
    LOGGER.info("New schema {}", outputSchema.toString(true));
    return outputSchema;
}
Also used : TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec) Schema(org.apache.avro.Schema) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) HashSet(java.util.HashSet)

Example 8 with MetricType

use of com.linkedin.thirdeye.hadoop.config.MetricType in project pinot by linkedin.

the class TopKPhaseConfig method fromThirdEyeConfig.

/**
   * This method generates necessary top k config for TopKPhase job from
   * ThirdEye config
   * @param config
   * @return
   */
public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
    //metrics
    List<String> metricNames = new ArrayList<String>(config.getMetrics().size());
    List<MetricType> metricTypes = new ArrayList<MetricType>(config.getMetrics().size());
    for (MetricSpec spec : config.getMetrics()) {
        metricNames.add(spec.getName());
        metricTypes.add(spec.getType());
    }
    // dimensions
    List<String> dimensionNames = new ArrayList<String>(config.getDimensions().size());
    for (DimensionSpec dimensionSpec : config.getDimensions()) {
        dimensionNames.add(dimensionSpec.getName());
    }
    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
    Map<String, Double> metricThresholds = new HashMap<>();
    Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
    Map<String, Set<String>> whitelist = new HashMap<>();
    // topk
    if (topKWhitelist != null) {
        // metric thresholds
        if (topKWhitelist.getThreshold() != null) {
            metricThresholds = topKWhitelist.getThreshold();
        }
        for (String metric : metricNames) {
            if (metricThresholds.get(metric) == null) {
                metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
            }
        }
        // topk
        if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
            for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
                topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
            }
        }
        // whitelist
        if (topKWhitelist.getWhitelist() != null) {
            for (Entry<String, String> entry : topKWhitelist.getWhitelist().entrySet()) {
                String[] whitelistValues = entry.getValue().split(FIELD_SEPARATOR);
                whitelist.put(entry.getKey(), new HashSet<String>(Arrays.asList(whitelistValues)));
            }
        }
    }
    return new TopKPhaseConfig(dimensionNames, metricNames, metricTypes, metricThresholds, topKDimensionToMetricsSpec, whitelist);
}
Also used : DimensionSpec(com.linkedin.thirdeye.hadoop.config.DimensionSpec) Set(java.util.Set) HashSet(java.util.HashSet) TopkWhitelistSpec(com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec) HashMap(java.util.HashMap) MetricType(com.linkedin.thirdeye.hadoop.config.MetricType) MetricSpec(com.linkedin.thirdeye.hadoop.config.MetricSpec) ArrayList(java.util.ArrayList) TopKDimensionToMetricsSpec(com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)

Aggregations

MetricType (com.linkedin.thirdeye.hadoop.config.MetricType)8 MetricSpec (com.linkedin.thirdeye.hadoop.config.MetricSpec)4 DimensionSpec (com.linkedin.thirdeye.hadoop.config.DimensionSpec)3 TopkWhitelistSpec (com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 TopKDimensionToMetricsSpec (com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 DataInputStream (java.io.DataInputStream)2 DataOutputStream (java.io.DataOutputStream)2 HashMap (java.util.HashMap)2 Set (java.util.Set)2 TimeSpec (com.linkedin.thirdeye.hadoop.config.TimeSpec)1 Schema (org.apache.avro.Schema)1