Search in sources :

Example 6 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class ThirdEyeUtils method createSchema.

public static Schema createSchema(CollectionSchema collectionSchema) {
    Schema schema = new Schema();
    for (DimensionSpec dimensionSpec : collectionSchema.getDimensions()) {
        FieldSpec fieldSpec = new DimensionFieldSpec();
        String dimensionName = dimensionSpec.getName();
        fieldSpec.setName(dimensionName);
        fieldSpec.setDataType(DataType.STRING);
        fieldSpec.setSingleValueField(true);
        schema.addField(dimensionName, fieldSpec);
    }
    for (MetricSpec metricSpec : collectionSchema.getMetrics()) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = metricSpec.getName();
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        fieldSpec.setSingleValueField(true);
        schema.addField(metricName, fieldSpec);
    }
    TimeSpec timeSpec = collectionSchema.getTime();
    String timeFormat = timeSpec.getFormat().equals("sinceEpoch") ? TimeFormat.EPOCH.toString() : TimeFormat.SIMPLE_DATE_FORMAT.toString() + ":" + timeSpec.getFormat();
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, timeSpec.getDataGranularity().getSize(), timeSpec.getDataGranularity().getUnit(), timeFormat, timeSpec.getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, timeSpec.getDataGranularity().getSize(), timeSpec.getDataGranularity().getUnit(), timeFormat, timeSpec.getColumnName());
    schema.addField(timeSpec.getColumnName(), new TimeFieldSpec(incoming, outgoing));
    schema.setSchemaName(collectionSchema.getCollection());
    return schema;
}
Also used : DimensionSpec(com.linkedin.thirdeye.api.DimensionSpec) TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) CollectionSchema(com.linkedin.thirdeye.api.CollectionSchema) MetricSpec(com.linkedin.thirdeye.api.MetricSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeSpec(com.linkedin.thirdeye.api.TimeSpec)

Example 7 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class AutoLoadPinotMetricsService method checkMetricChanges.

private void checkMetricChanges(String dataset, DatasetConfigDTO datasetConfig, Schema schema) {
    LOG.info("Checking for metric changes in {}", dataset);
    List<MetricFieldSpec> schemaMetricSpecs = schema.getMetricFieldSpecs();
    List<MetricConfigDTO> datasetMetricConfigs = DAO_REGISTRY.getMetricConfigDAO().findByDataset(dataset);
    List<String> datasetMetricNames = new ArrayList<>();
    for (MetricConfigDTO metricConfig : datasetMetricConfigs) {
        datasetMetricNames.add(metricConfig.getName());
    }
    List<Long> metricsToAdd = new ArrayList<>();
    for (MetricFieldSpec metricSpec : schemaMetricSpecs) {
        // metrics which are new in pinot schema, create them
        String metricName = metricSpec.getName();
        if (!datasetMetricNames.contains(metricName)) {
            MetricConfigDTO metricConfigDTO = ConfigGenerator.generateMetricConfig(metricSpec, dataset);
            LOG.info("Creating metric {} for {}", metricName, dataset);
            metricsToAdd.add(DAO_REGISTRY.getMetricConfigDAO().save(metricConfigDTO));
        }
    }
    // add new metricIds to default dashboard
    if (CollectionUtils.isNotEmpty(metricsToAdd)) {
        LOG.info("Metrics to add {}", metricsToAdd);
        String dashboardName = ThirdEyeUtils.getDefaultDashboardName(dataset);
        DashboardConfigDTO dashboardConfig = DAO_REGISTRY.getDashboardConfigDAO().findByName(dashboardName);
        List<Long> metricIds = dashboardConfig.getMetricIds();
        metricIds.addAll(metricsToAdd);
        DAO_REGISTRY.getDashboardConfigDAO().update(dashboardConfig);
    }
// TODO: write a tool, which given a metric id, erases all traces of that metric from the database
// This will include:
// 1) delete the metric from metricConfigs
// 2) remove any derived metrics which use the deleted metric
// 3) remove the metric, and derived metrics from all dashboards
// 4) remove any anomaly functions associated with the metric
// 5) remove any alerts associated with these anomaly functions
}
Also used : MetricConfigDTO(com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO) ArrayList(java.util.ArrayList) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DashboardConfigDTO(com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO)

Example 8 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class AutoLoadPinotMetricsService method checkMetricAsDimensionDataset.

private void checkMetricAsDimensionDataset(DatasetConfigDTO datasetConfigDTO, Schema schema) {
    String dataset = datasetConfigDTO.getDataset();
    String metricNamesColumn = datasetConfigDTO.getMetricNamesColumn();
    String metricValuesColumn = datasetConfigDTO.getMetricValuesColumn();
    FieldSpec metricValuesColumnFieldSpec = schema.getFieldSpecFor(metricValuesColumn);
    String dashboardName = ThirdEyeUtils.getDefaultDashboardName(dataset);
    // remove metricNamesColumn from dimensions if exists
    List<String> dimensions = datasetConfigDTO.getDimensions();
    if (dimensions.contains(metricNamesColumn)) {
        dimensions.removeAll(Lists.newArrayList(metricNamesColumn));
        datasetConfigDTO.setDimensions(dimensions);
        DAO_REGISTRY.getDatasetConfigDAO().update(datasetConfigDTO);
    }
    // remove metricValuesColumn from metrics if exists
    MetricConfigDTO metricConfigDTO = DAO_REGISTRY.getMetricConfigDAO().findByMetricAndDataset(metricValuesColumn, dataset);
    if (metricConfigDTO != null) {
        Long metricId = metricConfigDTO.getId();
        DAO_REGISTRY.getMetricConfigDAO().delete(metricConfigDTO);
        // remove metricValuesColumn id from default dashboard
        DashboardConfigDTO dashboardConfig = DAO_REGISTRY.getDashboardConfigDAO().findByName(dashboardName);
        List<Long> dashboardMetricIds = dashboardConfig.getMetricIds();
        dashboardMetricIds.removeAll(Lists.newArrayList(metricId));
        LOG.info("Updating dashboard config for {}", dashboardName);
        DAO_REGISTRY.getDashboardConfigDAO().update(dashboardConfig);
    }
    if (datasetConfigDTO.isAutoDiscoverMetrics()) {
        // query pinot to fetch distinct metricNamesColumn
        List<String> allDistinctMetricNames = fetchMetricAsADimensionMetrics(dataset, metricNamesColumn);
        // create metrics for these metric names, if they dont exist
        List<MetricConfigDTO> existingMetricConfigs = DAO_REGISTRY.getMetricConfigDAO().findByDataset(dataset);
        List<String> existingMetricNames = Lists.newArrayList();
        for (MetricConfigDTO existingMetricConfig : existingMetricConfigs) {
            existingMetricNames.add(existingMetricConfig.getName());
        }
        allDistinctMetricNames.removeAll(existingMetricNames);
        for (String metricName : allDistinctMetricNames) {
            LOG.info("Creating metric config for {}", metricName);
            MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, metricValuesColumnFieldSpec.getDataType());
            MetricConfigDTO metricConfig = ConfigGenerator.generateMetricConfig(metricFieldSpec, dataset);
            DAO_REGISTRY.getMetricConfigDAO().save(metricConfig);
        }
        // Add metrics to default dashboard
        List<Long> allMetricIds = ConfigGenerator.getMetricIdsFromMetricConfigs(DAO_REGISTRY.getMetricConfigDAO().findByDataset(dataset));
        DashboardConfigDTO dashboardConfig = DAO_REGISTRY.getDashboardConfigDAO().findByName(dashboardName);
        dashboardConfig.setMetricIds(allMetricIds);
        LOG.info("Creating dashboard config for {}", dashboardName);
        DAO_REGISTRY.getDashboardConfigDAO().update(dashboardConfig);
    }
}
Also used : MetricConfigDTO(com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DashboardConfigDTO(com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec)

Example 9 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class TestOffheapStarTreeBuilder method testRandom.

@Test
public void testRandom() throws Exception {
    int ROWS = 100;
    int numDimensions = 6;
    int numMetrics = 6;
    StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
    Schema schema = new Schema();
    builderConfig.dimensionsSplitOrder = new ArrayList<>();
    for (int i = 0; i < numDimensions; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.INT, true);
        schema.addField(dimensionFieldSpec);
        builderConfig.dimensionsSplitOrder.add(dimName);
    }
    schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < numMetrics; i++) {
        String metricName = "n" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
        schema.addField(metricFieldSpec);
    }
    builderConfig.maxLeafRecords = 10;
    builderConfig.schema = schema;
    builderConfig.outDir = new File("/tmp/startree");
    OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
    builder.init(builderConfig);
    Random r = new Random();
    HashMap<String, Object> map = new HashMap<>();
    for (int row = 0; row < ROWS; row++) {
        for (int i = 0; i < numDimensions; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + r.nextInt((numDimensions - i + 2)));
        }
        //time
        map.put("daysSinceEpoch", r.nextInt(1000));
        for (int i = 0; i < numMetrics; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, r.nextInt((numDimensions - i + 2)));
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        builder.append(genericRow);
    }
    builder.build();
    FileUtils.deleteDirectory(builderConfig.outDir);
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) GenericRow(com.linkedin.pinot.core.data.GenericRow) Random(java.util.Random) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Example 10 with MetricFieldSpec

use of com.linkedin.pinot.common.data.MetricFieldSpec in project pinot by linkedin.

the class TestOffheapStarTreeBuilder method testSimpleCore.

private void testSimpleCore(int numDimensions, int numMetrics, int numSkipMaterializationDimensions) throws Exception {
    int ROWS = (int) MathUtils.factorial(numDimensions);
    StarTreeBuilderConfig builderConfig = new StarTreeBuilderConfig();
    Schema schema = new Schema();
    builderConfig.dimensionsSplitOrder = new ArrayList<>();
    builderConfig.setSkipMaterializationForDimensions(new HashSet<String>());
    Set<String> skipMaterializationForDimensions = builderConfig.getSkipMaterializationForDimensions();
    for (int i = 0; i < numDimensions; i++) {
        String dimName = "d" + (i + 1);
        DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, DataType.STRING, true);
        schema.addField(dimensionFieldSpec);
        if (i < (numDimensions - numSkipMaterializationDimensions)) {
            builderConfig.dimensionsSplitOrder.add(dimName);
        } else {
            builderConfig.getSkipMaterializationForDimensions().add(dimName);
        }
    }
    schema.setTimeFieldSpec(new TimeFieldSpec("daysSinceEpoch", DataType.INT, TimeUnit.DAYS));
    for (int i = 0; i < numMetrics; i++) {
        String metricName = "m" + (i + 1);
        MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, DataType.INT);
        schema.addField(metricFieldSpec);
    }
    builderConfig.maxLeafRecords = 10;
    builderConfig.schema = schema;
    builderConfig.outDir = new File("/tmp/startree");
    OffHeapStarTreeBuilder builder = new OffHeapStarTreeBuilder();
    builder.init(builderConfig);
    HashMap<String, Object> map = new HashMap<>();
    for (int row = 0; row < ROWS; row++) {
        for (int i = 0; i < numDimensions; i++) {
            String dimName = schema.getDimensionFieldSpecs().get(i).getName();
            map.put(dimName, dimName + "-v" + row % (numDimensions - i));
        }
        //time
        map.put("daysSinceEpoch", 1);
        for (int i = 0; i < numMetrics; i++) {
            String metName = schema.getMetricFieldSpecs().get(i).getName();
            map.put(metName, 1);
        }
        GenericRow genericRow = new GenericRow();
        genericRow.init(map);
        builder.append(genericRow);
    }
    builder.build();
    int totalDocs = builder.getTotalRawDocumentCount() + builder.getTotalAggregateDocumentCount();
    Iterator<GenericRow> iterator = builder.iterator(0, totalDocs);
    while (iterator.hasNext()) {
        GenericRow row = iterator.next();
    //      System.out.println(row);
    }
    iterator = builder.iterator(builder.getTotalRawDocumentCount(), totalDocs);
    while (iterator.hasNext()) {
        GenericRow row = iterator.next();
        for (String skipDimension : skipMaterializationForDimensions) {
            String rowValue = (String) row.getValue(skipDimension);
            assert (rowValue.equals("ALL"));
        }
    }
    FileUtils.deleteDirectory(builderConfig.outDir);
}
Also used : HashMap(java.util.HashMap) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) GenericRow(com.linkedin.pinot.core.data.GenericRow) File(java.io.File) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)25 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)16 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)14 Schema (com.linkedin.pinot.common.data.Schema)13 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)11 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 DashboardConfigDTO (com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO)4 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)4 File (java.io.File)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 GenericRow (com.linkedin.pinot.core.data.GenericRow)3 HashMap (java.util.HashMap)3 Field (org.apache.avro.Schema.Field)3 Test (org.testng.annotations.Test)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)2 FileInputStream (java.io.FileInputStream)2 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)2 DataFileStream (org.apache.avro.file.DataFileStream)2